Skip to content

Commit

Permalink
[FEATURE] Replace get_batch_list_from_batch_request with get_batch an…
Browse files Browse the repository at this point in the history
…d get_batch_identifiers_list (great-expectations#10295)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Bill Dirks <bill@greatexpectations.io>
  • Loading branch information
3 people authored Sep 6, 2024
1 parent e8c484f commit 126e308
Show file tree
Hide file tree
Showing 59 changed files with 752 additions and 762 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,23 +46,20 @@
# Python
# <snippet name="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/data_assets/organize_batches_in_pandas_filesystem_datasource.py my_batch_list">
my_batch_request = my_asset.build_batch_request()
batches = my_asset.get_batch_list_from_batch_request(my_batch_request)
batch = my_asset.get_batch(my_batch_request)
# </snippet>

assert my_batch_request.datasource_name == "my_datasource"
assert my_batch_request.data_asset_name == "my_taxi_data_asset"
assert my_batch_request.options == {}

assert len(batches) == 3

for batch in batches:
batch_spec = batch.batch_spec
assert batch_spec.reader_method == "read_csv"
assert batch_spec.reader_options == {}
assert batch.data.dataframe.shape == (10000, 18)
batch_spec = batch.batch_spec
assert batch_spec.reader_method == "read_csv"
assert batch_spec.reader_options == {}
assert batch.data.dataframe.shape == (10000, 18)

# Python
# <snippet name="docs/docusaurus/docs/oss/guides/connecting_to_your_data/fluent/data_assets/organize_batches_in_pandas_filesystem_datasource.py print_batch_spec">
for batch in batches:
print(batch.batch_spec)
print(batch.batch_spec)
# </snippet>
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,8 @@
assert asset

my_batch_request = asset.build_batch_request()
batches = asset.get_batch_list_from_batch_request(my_batch_request)
assert len(batches) == 1
assert set(batches[0].columns()) == {
batch = asset.get_batch(my_batch_request)
assert set(batch.columns()) == {
"vendor_id",
"pickup_datetime",
"dropoff_datetime",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,8 @@
assert my_asset

my_batch_request = my_asset.build_batch_request()
batches = my_asset.get_batch_list_from_batch_request(my_batch_request)
assert len(batches) == 1
assert set(batches[0].columns()) == {
batch = my_asset.get_batch(my_batch_request)
assert set(batch.columns()) == {
"vendor_id",
"pickup_datetime",
"dropoff_datetime",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,8 @@
assert my_asset

my_batch_request = my_asset.build_batch_request()
batches = my_asset.get_batch_list_from_batch_request(my_batch_request)
assert len(batches) == 1
assert set(batches[0].columns()) == {
batch = my_asset.get_batch(my_batch_request)
assert set(batch.columns()) == {
"passenger_count",
"total_amount",
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,8 @@
assert datasource.get_asset_names() == {"my_taxi_data_asset"}

my_batch_request = data_asset.build_batch_request({"year": "2019", "month": "03"})
batches = data_asset.get_batch_list_from_batch_request(my_batch_request)
assert len(batches) == 1
assert set(batches[0].columns()) == {
batch = data_asset.get_batch(my_batch_request)
assert set(batch.columns()) == {
"vendor_id",
"pickup_datetime",
"dropoff_datetime",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,8 @@
my_batch_request = my_batch_definition.build_batch_request(
{"year": "2019", "month": "03"}
)
batches = data_asset.get_batch_list_from_batch_request(my_batch_request)
assert len(batches) == 1
assert set(batches[0].columns()) == {
batch = data_asset.get_batch(my_batch_request)
assert set(batch.columns()) == {
"vendor_id",
"pickup_datetime",
"dropoff_datetime",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,8 @@
assert datasource.get_asset_names() == {"my_taxi_data_asset"}

my_batch_request = data_asset.build_batch_request({"year": "2019", "month": "03"})
batches = data_asset.get_batch_list_from_batch_request(my_batch_request)
assert len(batches) == 1
assert set(batches[0].columns()) == {
batch = data_asset.get_batch(my_batch_request)
assert set(batch.columns()) == {
"vendor_id",
"pickup_datetime",
"dropoff_datetime",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,8 @@
assert datasource.get_asset_names() == {"my_taxi_data_asset"}

my_batch_request = data_asset.build_batch_request({"year": "2019", "month": "03"})
batches = data_asset.get_batch_list_from_batch_request(my_batch_request)
assert len(batches) == 1
assert set(batches[0].columns()) == {
batch = data_asset.get_batch(my_batch_request)
assert set(batch.columns()) == {
"vendor_id",
"pickup_datetime",
"dropoff_datetime",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,8 @@
assert my_asset

my_batch_request = my_asset.build_batch_request({"year": "2019", "month": "03"})
batches = my_asset.get_batch_list_from_batch_request(my_batch_request)
assert len(batches) == 1
assert set(batches[0].columns()) == {
batch = my_asset.get_batch(my_batch_request)
assert set(batch.columns()) == {
"vendor_id",
"pickup_datetime",
"dropoff_datetime",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,8 @@
my_batch_request = my_batch_definition.build_batch_request(
batch_parameters={"year": "2019", "month": "03"}
)
batches = my_asset.get_batch_list_from_batch_request(my_batch_request)
assert len(batches) == 1
assert set(batches[0].columns()) == {
batch = my_asset.get_batch(my_batch_request)
assert set(batch.columns()) == {
"vendor_id",
"pickup_datetime",
"dropoff_datetime",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,8 @@
assert my_batch_request.data_asset_name == "taxi_dataframe"
assert my_batch_request.options == {}

batches = data_asset.get_batch_list_from_batch_request(my_batch_request)
assert len(batches) == 1
assert set(batches[0].columns()) == {
batch = data_asset.get_batch(my_batch_request)
assert set(batch.columns()) == {
"VendorID",
"tpep_pickup_datetime",
"tpep_dropoff_datetime",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,5 @@
assert my_batch_request.data_asset_name == "my_df_asset"
assert my_batch_request.options == {}

batches = data_asset.get_batch_list_from_batch_request(my_batch_request)
assert len(batches) == 1
assert set(batches[0].columns()) == {"a", "b", "c"}
batch = data_asset.get_batch(my_batch_request)
assert set(batch.columns()) == {"a", "b", "c"}

This file was deleted.

2 changes: 1 addition & 1 deletion docs/docusaurus/docs/snippets/aws_cloud_storage_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@


# <snippet name="docs/docusaurus/docs/snippets/aws_cloud_storage_spark.py get_batch_list">
batches = asset.get_batch_list_from_batch_request(request)
batch = asset.get_batch(request)
# </snippet>

config = context.fluent_datasources["s3_datasource"].yaml()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,19 +72,15 @@

# Python
# <snippet name="docs/docusaurus/docs/snippets/get_existing_data_asset_from_existing_datasource_pandas_filesystem_example.py my_batch_list">
batches = my_asset.get_batch_list_from_batch_request(my_batch_request)
batch = my_asset.get_batch(my_batch_request)
# </snippet>

assert len(batches) == 3

for batch in batches:
batch_spec = batch.batch_spec
assert batch_spec.reader_method == "read_csv"
assert batch_spec.reader_options == {}
assert batch.data.dataframe.shape == (10000, 18)
batch_spec = batch.batch_spec
assert batch_spec.reader_method == "read_csv"
assert batch_spec.reader_options == {}
assert batch.data.dataframe.shape == (10000, 18)

# Python
# <snippet name="docs/docusaurus/docs/snippets/get_existing_data_asset_from_existing_datasource_pandas_filesystem_example.py print_batch_spec">
for batch in batches:
print(batch.batch_spec)
print(batch.batch_spec)
# </snippet>
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,8 @@
assert my_asset

my_batch_request = my_asset.build_batch_request()
batches = my_asset.get_batch_list_from_batch_request(my_batch_request)
assert len(batches) == 1
assert set(batches[0].columns()) == {
batch = my_asset.get_batch(my_batch_request)
assert set(batch.columns()) == {
"vendor_id",
"pickup_datetime",
"dropoff_datetime",
Expand Down Expand Up @@ -88,4 +87,4 @@
)
# </snippet>

batches = my_asset.get_batch_list_from_batch_request(my_batch_request)
batch = my_asset.get_batch(my_batch_request)
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,8 @@
# </snippet>

my_batch_request = my_table_asset.build_batch_request(partitioner=partitioner)
batches = my_table_asset.get_batch_list_from_batch_request(my_batch_request)
batch = my_table_asset.get_batch(my_batch_request)

assert len(batches) == 12

assert my_table_asset.get_batch_parameters_keys(partitioner=partitioner) == (
"year",
Expand All @@ -55,17 +54,14 @@
# Python
# <snippet name="docs/docusaurus/docs/snippets/organize_batches_in_sqlite_datasource.py my_batch_list">
my_batch_request = my_table_asset.build_batch_request(partitioner=partitioner)
batches = my_table_asset.get_batch_list_from_batch_request(my_batch_request)
batch = my_table_asset.get_batch(my_batch_request)
# </snippet>

assert my_batch_request.datasource_name == "my_datasource"
assert my_batch_request.data_asset_name == "my_table_asset"
assert my_batch_request.options == {}

assert len(batches) == 12

# Python
# <snippet name="docs/docusaurus/docs/snippets/organize_batches_in_sqlite_datasource.py print_batch_spec">
for batch in batches:
print(batch.batch_spec)
print(batch.batch_spec)
# </snippet>
7 changes: 0 additions & 7 deletions docs/sphinx_api_docs_source/public_api_missing_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,14 @@
"File: great_expectations/datasource/fluent/config.py Name: pop",
"File: great_expectations/datasource/fluent/config.py Name: yaml",
"File: great_expectations/datasource/fluent/config_str.py Name: ConfigStr",
"File: great_expectations/datasource/fluent/file_path_data_asset.py Name: get_batch_list_from_batch_request",
"File: great_expectations/datasource/fluent/fluent_base_model.py Name: dict",
"File: great_expectations/datasource/fluent/fluent_base_model.py Name: yaml",
"File: great_expectations/datasource/fluent/invalid_datasource.py Name: build_batch_request",
"File: great_expectations/datasource/fluent/invalid_datasource.py Name: get_asset",
"File: great_expectations/datasource/fluent/invalid_datasource.py Name: get_batch_list_from_batch_request",
"File: great_expectations/datasource/fluent/invalid_datasource.py Name: get_batch_parameters_keys",
"File: great_expectations/datasource/fluent/pandas_datasource.py Name: dict",
"File: great_expectations/datasource/fluent/pandas_datasource.py Name: get_batch_list_from_batch_request",
"File: great_expectations/datasource/fluent/sources.py Name: add_datasource",
"File: great_expectations/datasource/fluent/sources.py Name: delete_datasource",
"File: great_expectations/datasource/fluent/spark_datasource.py Name: get_batch_list_from_batch_request",
"File: great_expectations/datasource/fluent/sql_datasource.py Name: get_batch_list_from_batch_request",
"File: great_expectations/datasource/new_datasource.py Name: get_batch_list_from_batch_request",
"File: great_expectations/exceptions/exceptions.py Name: InvalidExpectationConfigurationError",
"File: great_expectations/expectations/expectation.py Name: validate_configuration",
"File: great_expectations/expectations/expectation_configuration.py Name: to_domain_obj",
Expand All @@ -68,7 +62,6 @@
"File: great_expectations/expectations/set_based_column_map_expectation.py Name: register_metric",
"File: great_expectations/expectations/set_based_column_map_expectation.py Name: validate_configuration",
"File: great_expectations/experimental/datasource/fabric.py Name: build_batch_request",
"File: great_expectations/experimental/datasource/fabric.py Name: get_batch_list_from_batch_request",
"File: great_expectations/experimental/metric_repository/metric_retriever.py Name: get_validator",
"File: great_expectations/experimental/rule_based_profiler/helpers/util.py Name: build_batch_request",
"File: great_expectations/experimental/rule_based_profiler/rule_based_profiler.py Name: run",
Expand Down
10 changes: 2 additions & 8 deletions great_expectations/core/batch_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,8 @@ def get_batch(self, batch_parameters: Optional[BatchParameters] = None) -> Batch
Returns:
A Batch of data.
"""
batch_list = self.data_asset.get_batch_list_from_batch_request(
self.build_batch_request(batch_parameters=batch_parameters)
)

if len(batch_list) == 0:
raise ValueError("No batch found") # noqa: TRY003

return batch_list[-1]
batch_request = self.build_batch_request(batch_parameters=batch_parameters)
return self.data_asset.get_batch(batch_request)

def is_added(self) -> BatchDefinitionAddedDiagnostics:
return BatchDefinitionAddedDiagnostics(
Expand Down
Loading

0 comments on commit 126e308

Please sign in to comment.