You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I want to use Mage pipeline to load data from partitioned object in gcs and export it to bigquery. The object's name is other_data and partitioned by year, month and day. But it seems like a wrong object name when I use the name 'other_data/*/*/*/*' because I've tried to load from specific path 'other_data/year=2024/month=10/day=2/40949e85e0734000910e2c0179278e00-0.parquet' and it worked
#558
Open
phuccodetrau opened this issue
Oct 29, 2024
· 0 comments
Data loader:
from mage_ai.settings.repo import get_repo_path
from mage_ai.io.config import ConfigFileLoader
from mage_ai.io.google_cloud_storage import GoogleCloudStorage
from os import path
if 'data_loader' not in globals():
from mage_ai.data_preparation.decorators import data_loader
if 'test' not in globals():
from mage_ai.data_preparation.decorators import test
@data_loader
def load_from_google_cloud_storage(*args, **kwargs):
"""
Template for loading data from a Google Cloud Storage bucket.
Specify your configuration settings in 'io_config.yaml'.
Data exporter:
from mage_ai.settings.repo import get_repo_path
from mage_ai.io.bigquery import BigQuery
from mage_ai.io.config import ConfigFileLoader
from pandas import DataFrame
from os import path
if 'data_exporter' not in globals():
from mage_ai.data_preparation.decorators import data_exporter
@data_exporter
def export_data_to_big_query(df: DataFrame, **kwargs) -> None:
"""
Template for exporting data to a BigQuery warehouse.
Specify your configuration settings in 'io_config.yaml'.
File /usr/local/lib/python3.10/site-packages/google/api_core/future/polling.py:261, in PollingFuture.result(self, timeout, retry, polling)
256 self._blocking_poll(timeout=timeout, retry=retry, polling=polling)
258 if self._exception is not None:
259 # pylint: disable=raising-bad-type
260 # Pylint doesn't recognize that this is valid in this case.
--> 261 raise self._exception
263 return self._result
BadRequest: 400 Table test_parquet_eec7e159_f7d0_4966_81c2_355a00ea9287_source does not have a schema.
The text was updated successfully, but these errors were encountered:
Data loader:
from mage_ai.settings.repo import get_repo_path
from mage_ai.io.config import ConfigFileLoader
from mage_ai.io.google_cloud_storage import GoogleCloudStorage
from os import path
if 'data_loader' not in globals():
from mage_ai.data_preparation.decorators import data_loader
if 'test' not in globals():
from mage_ai.data_preparation.decorators import test
@data_loader
def load_from_google_cloud_storage(*args, **kwargs):
"""
Template for loading data from a Google Cloud Storage bucket.
Specify your configuration settings in 'io_config.yaml'.
Data exporter:
from mage_ai.settings.repo import get_repo_path
from mage_ai.io.bigquery import BigQuery
from mage_ai.io.config import ConfigFileLoader
from pandas import DataFrame
from os import path
if 'data_exporter' not in globals():
from mage_ai.data_preparation.decorators import data_exporter
@data_exporter
def export_data_to_big_query(df: DataFrame, **kwargs) -> None:
"""
Template for exporting data to a BigQuery warehouse.
Specify your configuration settings in 'io_config.yaml'.
Issue:
GoogleCloudStorage initialized
└─ Loading data frame from bucket 'weather_bigdata_20241' at key 'other_data'...
DONE
BigQuery initialized
├─ Connecting to BigQuery warehouse...DONE
└─ Exporting data to table 'strong-ward-437213-j6.bigdata_20241.test_parquet'...
BadRequest Traceback (most recent call last)
File /home/src/magic-zoomcamp/data_exporters/weather_bq.py:23, in export_data_to_big_query(df, **kwargs)
---> 23 BigQuery.with_config(ConfigFileLoader(config_path, config_profile)).export(
File /usr/local/lib/python3.10/site-packages/mage_ai/io/bigquery.py:354, in BigQuery.export(self, df, table_id, database, if_exists, overwrite_types, query_string, verbose, unique_conflict_method, unique_constraints, write_disposition, create_dataset, **configuration_params)
--> 354 __process(database=database, write_disposition=write_disposition)
File /usr/local/lib/python3.10/site-packages/mage_ai/io/bigquery.py:343, in BigQuery.export..__process(database, write_disposition)
--> 343 self.__write_table(
File /usr/local/lib/python3.10/site-packages/mage_ai/io/bigquery.py:403, in BigQuery.__write_table(self, df, table_id, overwrite_types, create_dataset, **configuration_params)
--> 403 return self.client.load_table_from_dataframe(df, table_id, job_config=config).result()
File /usr/local/lib/python3.10/site-packages/google/cloud/bigquery/job/base.py:952, in _AsyncJob.result(self, retry, timeout)
--> 952 return super(_AsyncJob, self).result(timeout=timeout, **kwargs)
File /usr/local/lib/python3.10/site-packages/google/api_core/future/polling.py:261, in PollingFuture.result(self, timeout, retry, polling)
--> 261 raise self._exception
BadRequest: 400 Table test_parquet_eec7e159_f7d0_4966_81c2_355a00ea9287_source does not have a schema.
The text was updated successfully, but these errors were encountered: