From c534b646d5ac75eeed1e8dccfa8d7ed42a4225d2 Mon Sep 17 00:00:00 2001 From: Joshua Stauffer <66793731+joshua-stauffer@users.noreply.github.com> Date: Fri, 20 Dec 2024 13:10:20 +0100 Subject: [PATCH] [FEATURE] Implement suite factory add_or_update (#10796) --- .../core/factory/suite_factory.py | 36 ++- tests/core/factory/test_suite_factory.py | 267 +++++++++++++++++- 2 files changed, 299 insertions(+), 4 deletions(-) diff --git a/great_expectations/core/factory/suite_factory.py b/great_expectations/core/factory/suite_factory.py index e17e666b3bc0..1c0a85859a42 100644 --- a/great_expectations/core/factory/suite_factory.py +++ b/great_expectations/core/factory/suite_factory.py @@ -33,7 +33,7 @@ def _include_rendered_content(self) -> bool: def add(self, suite: ExpectationSuite) -> ExpectationSuite: """Add an ExpectationSuite to the collection. - Parameters: + Args: suite: ExpectationSuite to add Raises: @@ -62,7 +62,7 @@ def add(self, suite: ExpectationSuite) -> ExpectationSuite: def delete(self, name: str) -> None: """Delete an ExpectationSuite from the collection. - Parameters: + Args: name: The name of the ExpectationSuite to delete Raises: @@ -89,7 +89,7 @@ def delete(self, name: str) -> None: def get(self, name: str) -> ExpectationSuite: """Get an ExpectationSuite from the collection by name. - Parameters: + Args: name: Name of ExpectationSuite to get Raises: @@ -125,3 +125,33 @@ def all(self) -> Iterable[ExpectationSuite]: self._store.submit_all_deserialization_event(e) raise return deserializable_suites + + @public_api + def add_or_update(self, suite: ExpectationSuite) -> ExpectationSuite: + """Add or update an ExpectationSuite by name. + + If an ExpectationSuite with the same name exists, overwrite it, otherwise + create a new ExpectationSuite. On update, Expectations in the Suite which + match a previously existing Expectation maintain a stable ID, and + Expectations which have changed receive a new ID. + + Args: + suite: ExpectationSuite to add or update + """ + try: + existing_suite = self.get(name=suite.name) + except DataContextError: + return self.add(suite=suite) + + # add IDs to expectations that haven't changed + existing_expectations = existing_suite.expectations + for expectation in suite.expectations: + try: + index = existing_expectations.index(expectation) + expectation.id = existing_expectations[index].id + except ValueError: + pass # expectation is new or updated + + suite.id = existing_suite.id + suite.save() + return suite diff --git a/tests/core/factory/test_suite_factory.py b/tests/core/factory/test_suite_factory.py index 0b7179eac0fa..393f2209259c 100644 --- a/tests/core/factory/test_suite_factory.py +++ b/tests/core/factory/test_suite_factory.py @@ -1,8 +1,12 @@ import re +from copy import copy from typing import Dict from unittest import mock +from unittest.mock import ( + ANY, + Mock, # noqa: TID251 +) from unittest.mock import ANY as ANY_TEST_ARG -from unittest.mock import Mock # noqa: TID251 import pytest from pytest_mock import MockerFixture @@ -19,6 +23,10 @@ from great_expectations.data_context.data_context.context_factory import set_context from great_expectations.data_context.store import ExpectationsStore from great_expectations.exceptions import DataContextError +from great_expectations.expectations import ( + ExpectColumnDistinctValuesToContainSet, + ExpectColumnSumToBeBetween, +) from great_expectations.types import SerializableDictDot @@ -325,6 +333,263 @@ def test_suite_factory_all_with_bad_pydantic_config( assert re.match("pydantic.*ValidationError", analytics_submit_args.error_type) +class TestSuiteFactoryAddOrUpdate: + @pytest.mark.filesystem + def test_add_empty_new_suite__filesystem(self, empty_data_context): + self._test_add_empty_new_suite(empty_data_context) + + @pytest.mark.cloud + def test_add_empty_new_suite__cloud(self, empty_cloud_context_fluent): + self._test_add_empty_new_suite(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_add_empty_new_suite__ephemeral(self, ephemeral_context_with_defaults): + self._test_add_empty_new_suite(ephemeral_context_with_defaults) + + def _test_add_empty_new_suite(self, context: AbstractDataContext): + # arrange + suite_name = "suite A" + suite = ExpectationSuite(name=suite_name) + + # act + created_suite = context.suites.add_or_update(suite=suite) + + # assert + assert created_suite.id + context.suites.get(suite_name) + + @pytest.mark.filesystem + def test_add_new_suite_with_expectations_filesystem(self, empty_data_context): + self._test_add_new_suite_with_expectations(empty_data_context) + + @pytest.mark.cloud + def test_add_new_suite_with_expectations__cloud(self, empty_cloud_context_fluent): + self._test_add_new_suite_with_expectations(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_add_new_suite_with_expectations__ephemeral(self, ephemeral_context_with_defaults): + self._test_add_new_suite_with_expectations(ephemeral_context_with_defaults) + + def _test_add_new_suite_with_expectations(self, context: AbstractDataContext): + # arrange + suite_name = "suite A" + expectations = [ + ExpectColumnSumToBeBetween( + column="col A", + min_value=0, + max_value=10, + ), + ExpectColumnDistinctValuesToContainSet( + column="col B", + value_set=["a", "b", "c"], + ), + ] + suite = ExpectationSuite( + name=suite_name, + expectations=[copy(exp) for exp in expectations], + ) + + # act + created_suite = context.suites.add_or_update(suite=suite) + + # assert + assert created_suite.id + context.suites.get(suite_name) + for exp, created_exp in zip(expectations, created_suite.expectations): + assert created_exp.id + exp.id = ANY + assert exp == created_exp + + @pytest.mark.filesystem + def test_update_existing_suite_adds_expectations__filesystem(self, empty_data_context): + self._test_update_existing_suite_adds_expectations(empty_data_context) + + @pytest.mark.cloud + def test_update_existing_suite_adds_expectations__cloud(self, empty_cloud_context_fluent): + self._test_update_existing_suite_adds_expectations(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_update_existing_suite_adds_expectations__ephemeral( + self, ephemeral_context_with_defaults + ): + self._test_update_existing_suite_adds_expectations(ephemeral_context_with_defaults) + + def _test_update_existing_suite_adds_expectations(self, context: AbstractDataContext): + # arrange + suite_name = "suite A" + expectations = [ + ExpectColumnSumToBeBetween( + column="col A", + min_value=0, + max_value=10, + ), + ExpectColumnDistinctValuesToContainSet( + column="col B", + value_set=["a", "b", "c"], + ), + ] + suite = ExpectationSuite( + name=suite_name, + expectations=[copy(exp) for exp in expectations], + ) + existing_suite = context.suites.add(suite=ExpectationSuite(name=suite_name)) + + # act + updated_suite = context.suites.add_or_update(suite=suite) + + # assert + assert updated_suite.id == existing_suite.id + for exp, created_exp in zip(expectations, updated_suite.expectations): + assert created_exp.id + exp.id = ANY + assert exp == created_exp + + @pytest.mark.filesystem + def test_update_existing_suite_updates_expectations__filesystem(self, empty_data_context): + self._test_update_existing_suite_updates_expectations(empty_data_context) + + @pytest.mark.cloud + def test_update_existing_suite_updates_expectations__cloud(self, empty_cloud_context_fluent): + self._test_update_existing_suite_updates_expectations(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_update_existing_suite_updates_expectations__ephemeral( + self, ephemeral_context_with_defaults + ): + self._test_update_existing_suite_updates_expectations(ephemeral_context_with_defaults) + + def _test_update_existing_suite_updates_expectations(self, context: AbstractDataContext): + # arrange + suite_name = "suite A" + expectations = [ + ExpectColumnSumToBeBetween( + column="col A", + min_value=0, + max_value=10, + ), + ExpectColumnDistinctValuesToContainSet( + column="col B", + value_set=["a", "b", "c"], + ), + ] + existing_suite = context.suites.add( + suite=ExpectationSuite( + name=suite_name, + expectations=[copy(exp) for exp in expectations], + ) + ) + new_col_name = "col C" + for exp in expectations: + exp.column = new_col_name + suite = ExpectationSuite( + name=suite_name, + expectations=[copy(exp) for exp in expectations], + ) + + # act + updated_suite = context.suites.add_or_update(suite=suite) + + # assert + assert updated_suite.id == existing_suite.id + for exp, created_exp in zip(expectations, updated_suite.expectations): + assert created_exp.id + exp.id = ANY + assert exp == created_exp + assert created_exp.column == new_col_name # type: ignore[attr-defined] # column exists + + for old_exp, new_exp in zip(existing_suite.expectations, updated_suite.expectations): + # expectations have been deleted and re added, not updated + assert old_exp.id != new_exp.id + + @pytest.mark.filesystem + def test_update_existing_suite_deletes_expectations__filesystem(self, empty_data_context): + self._test_update_existing_suite_deletes_expectations(empty_data_context) + + @pytest.mark.cloud + def test_update_existing_suite_deletes_expectations__cloud(self, empty_cloud_context_fluent): + self._test_update_existing_suite_deletes_expectations(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_update_existing_suite_deletes_expectations__ephemeral( + self, ephemeral_context_with_defaults + ): + self._test_update_existing_suite_deletes_expectations(ephemeral_context_with_defaults) + + def _test_update_existing_suite_deletes_expectations(self, context: AbstractDataContext): + # arrange + suite_name = "suite A" + expectations = [ + ExpectColumnSumToBeBetween( + column="col A", + min_value=0, + max_value=10, + ), + ExpectColumnDistinctValuesToContainSet( + column="col B", + value_set=["a", "b", "c"], + ), + ] + existing_suite = context.suites.add( + suite=ExpectationSuite( + name=suite_name, + expectations=[copy(exp) for exp in expectations], + ) + ) + new_col_name = "col C" + for exp in expectations: + exp.column = new_col_name + suite = ExpectationSuite( + name=suite_name, + expectations=[], + ) + + # act + updated_suite = context.suites.add_or_update(suite=suite) + + # assert + assert updated_suite.id == existing_suite.id + assert updated_suite.expectations == [] + + @pytest.mark.filesystem + def test_add_or_update_is_idempotent__filesystem(self, empty_data_context): + self._test_add_or_update_is_idempotent(empty_data_context) + + @pytest.mark.cloud + def test_add_or_update_is_idempotent__cloud(self, empty_cloud_context_fluent): + self._test_add_or_update_is_idempotent(empty_cloud_context_fluent) + + @pytest.mark.unit + def test_add_or_update_is_idempotent__ephemeral(self, ephemeral_context_with_defaults): + self._test_add_or_update_is_idempotent(ephemeral_context_with_defaults) + + def _test_add_or_update_is_idempotent(self, context: AbstractDataContext): + # arrange + suite_name = "suite A" + expectations = [ + ExpectColumnSumToBeBetween( + column="col A", + min_value=0, + max_value=10, + ), + ExpectColumnDistinctValuesToContainSet( + column="col B", + value_set=["a", "b", "c"], + ), + ] + suite = ExpectationSuite( + name=suite_name, + expectations=[copy(exp) for exp in expectations], + ) + + # act + suite_1 = context.suites.add_or_update(suite=suite) + suite_2 = context.suites.add_or_update(suite=suite) + suite_3 = context.suites.add_or_update(suite=suite) + + # assert + assert suite_1 == suite_2 == suite_3 + + class TestSuiteFactoryAnalytics: @pytest.mark.filesystem def test_suite_factory_add_emits_event_filesystem(self, empty_data_context):