From c534b646d5ac75eeed1e8dccfa8d7ed42a4225d2 Mon Sep 17 00:00:00 2001
From: Joshua Stauffer <66793731+joshua-stauffer@users.noreply.github.com>
Date: Fri, 20 Dec 2024 13:10:20 +0100
Subject: [PATCH] [FEATURE] Implement suite factory add_or_update (#10796)

---
 .../core/factory/suite_factory.py             |  36 ++-
 tests/core/factory/test_suite_factory.py      | 267 +++++++++++++++++-
 2 files changed, 299 insertions(+), 4 deletions(-)

diff --git a/great_expectations/core/factory/suite_factory.py b/great_expectations/core/factory/suite_factory.py
index e17e666b3bc0..1c0a85859a42 100644
--- a/great_expectations/core/factory/suite_factory.py
+++ b/great_expectations/core/factory/suite_factory.py
@@ -33,7 +33,7 @@ def _include_rendered_content(self) -> bool:
     def add(self, suite: ExpectationSuite) -> ExpectationSuite:
         """Add an ExpectationSuite to the collection.
 
-        Parameters:
+        Args:
             suite: ExpectationSuite to add
 
         Raises:
@@ -62,7 +62,7 @@ def add(self, suite: ExpectationSuite) -> ExpectationSuite:
     def delete(self, name: str) -> None:
         """Delete an ExpectationSuite from the collection.
 
-        Parameters:
+        Args:
             name: The name of the ExpectationSuite to delete
 
         Raises:
@@ -89,7 +89,7 @@ def delete(self, name: str) -> None:
     def get(self, name: str) -> ExpectationSuite:
         """Get an ExpectationSuite from the collection by name.
 
-        Parameters:
+        Args:
             name: Name of ExpectationSuite to get
 
         Raises:
@@ -125,3 +125,33 @@ def all(self) -> Iterable[ExpectationSuite]:
                 self._store.submit_all_deserialization_event(e)
                 raise
         return deserializable_suites
+
+    @public_api
+    def add_or_update(self, suite: ExpectationSuite) -> ExpectationSuite:
+        """Add or update an ExpectationSuite by name.
+
+        If an ExpectationSuite with the same name exists, overwrite it, otherwise
+        create a new ExpectationSuite. On update, Expectations in the Suite which
+        match a previously existing Expectation maintain a stable ID, and
+        Expectations which have changed receive a new ID.
+
+        Args:
+            suite: ExpectationSuite to add or update
+        """
+        try:
+            existing_suite = self.get(name=suite.name)
+        except DataContextError:
+            return self.add(suite=suite)
+
+        # add IDs to expectations that haven't changed
+        existing_expectations = existing_suite.expectations
+        for expectation in suite.expectations:
+            try:
+                index = existing_expectations.index(expectation)
+                expectation.id = existing_expectations[index].id
+            except ValueError:
+                pass  # expectation is new or updated
+
+        suite.id = existing_suite.id
+        suite.save()
+        return suite
diff --git a/tests/core/factory/test_suite_factory.py b/tests/core/factory/test_suite_factory.py
index 0b7179eac0fa..393f2209259c 100644
--- a/tests/core/factory/test_suite_factory.py
+++ b/tests/core/factory/test_suite_factory.py
@@ -1,8 +1,12 @@
 import re
+from copy import copy
 from typing import Dict
 from unittest import mock
+from unittest.mock import (
+    ANY,
+    Mock,  # noqa: TID251
+)
 from unittest.mock import ANY as ANY_TEST_ARG
-from unittest.mock import Mock  # noqa: TID251
 
 import pytest
 from pytest_mock import MockerFixture
@@ -19,6 +23,10 @@
 from great_expectations.data_context.data_context.context_factory import set_context
 from great_expectations.data_context.store import ExpectationsStore
 from great_expectations.exceptions import DataContextError
+from great_expectations.expectations import (
+    ExpectColumnDistinctValuesToContainSet,
+    ExpectColumnSumToBeBetween,
+)
 from great_expectations.types import SerializableDictDot
 
 
@@ -325,6 +333,263 @@ def test_suite_factory_all_with_bad_pydantic_config(
     assert re.match("pydantic.*ValidationError", analytics_submit_args.error_type)
 
 
+class TestSuiteFactoryAddOrUpdate:
+    @pytest.mark.filesystem
+    def test_add_empty_new_suite__filesystem(self, empty_data_context):
+        self._test_add_empty_new_suite(empty_data_context)
+
+    @pytest.mark.cloud
+    def test_add_empty_new_suite__cloud(self, empty_cloud_context_fluent):
+        self._test_add_empty_new_suite(empty_cloud_context_fluent)
+
+    @pytest.mark.unit
+    def test_add_empty_new_suite__ephemeral(self, ephemeral_context_with_defaults):
+        self._test_add_empty_new_suite(ephemeral_context_with_defaults)
+
+    def _test_add_empty_new_suite(self, context: AbstractDataContext):
+        # arrange
+        suite_name = "suite A"
+        suite = ExpectationSuite(name=suite_name)
+
+        # act
+        created_suite = context.suites.add_or_update(suite=suite)
+
+        # assert
+        assert created_suite.id
+        context.suites.get(suite_name)
+
+    @pytest.mark.filesystem
+    def test_add_new_suite_with_expectations_filesystem(self, empty_data_context):
+        self._test_add_new_suite_with_expectations(empty_data_context)
+
+    @pytest.mark.cloud
+    def test_add_new_suite_with_expectations__cloud(self, empty_cloud_context_fluent):
+        self._test_add_new_suite_with_expectations(empty_cloud_context_fluent)
+
+    @pytest.mark.unit
+    def test_add_new_suite_with_expectations__ephemeral(self, ephemeral_context_with_defaults):
+        self._test_add_new_suite_with_expectations(ephemeral_context_with_defaults)
+
+    def _test_add_new_suite_with_expectations(self, context: AbstractDataContext):
+        # arrange
+        suite_name = "suite A"
+        expectations = [
+            ExpectColumnSumToBeBetween(
+                column="col A",
+                min_value=0,
+                max_value=10,
+            ),
+            ExpectColumnDistinctValuesToContainSet(
+                column="col B",
+                value_set=["a", "b", "c"],
+            ),
+        ]
+        suite = ExpectationSuite(
+            name=suite_name,
+            expectations=[copy(exp) for exp in expectations],
+        )
+
+        # act
+        created_suite = context.suites.add_or_update(suite=suite)
+
+        # assert
+        assert created_suite.id
+        context.suites.get(suite_name)
+        for exp, created_exp in zip(expectations, created_suite.expectations):
+            assert created_exp.id
+            exp.id = ANY
+            assert exp == created_exp
+
+    @pytest.mark.filesystem
+    def test_update_existing_suite_adds_expectations__filesystem(self, empty_data_context):
+        self._test_update_existing_suite_adds_expectations(empty_data_context)
+
+    @pytest.mark.cloud
+    def test_update_existing_suite_adds_expectations__cloud(self, empty_cloud_context_fluent):
+        self._test_update_existing_suite_adds_expectations(empty_cloud_context_fluent)
+
+    @pytest.mark.unit
+    def test_update_existing_suite_adds_expectations__ephemeral(
+        self, ephemeral_context_with_defaults
+    ):
+        self._test_update_existing_suite_adds_expectations(ephemeral_context_with_defaults)
+
+    def _test_update_existing_suite_adds_expectations(self, context: AbstractDataContext):
+        # arrange
+        suite_name = "suite A"
+        expectations = [
+            ExpectColumnSumToBeBetween(
+                column="col A",
+                min_value=0,
+                max_value=10,
+            ),
+            ExpectColumnDistinctValuesToContainSet(
+                column="col B",
+                value_set=["a", "b", "c"],
+            ),
+        ]
+        suite = ExpectationSuite(
+            name=suite_name,
+            expectations=[copy(exp) for exp in expectations],
+        )
+        existing_suite = context.suites.add(suite=ExpectationSuite(name=suite_name))
+
+        # act
+        updated_suite = context.suites.add_or_update(suite=suite)
+
+        # assert
+        assert updated_suite.id == existing_suite.id
+        for exp, created_exp in zip(expectations, updated_suite.expectations):
+            assert created_exp.id
+            exp.id = ANY
+            assert exp == created_exp
+
+    @pytest.mark.filesystem
+    def test_update_existing_suite_updates_expectations__filesystem(self, empty_data_context):
+        self._test_update_existing_suite_updates_expectations(empty_data_context)
+
+    @pytest.mark.cloud
+    def test_update_existing_suite_updates_expectations__cloud(self, empty_cloud_context_fluent):
+        self._test_update_existing_suite_updates_expectations(empty_cloud_context_fluent)
+
+    @pytest.mark.unit
+    def test_update_existing_suite_updates_expectations__ephemeral(
+        self, ephemeral_context_with_defaults
+    ):
+        self._test_update_existing_suite_updates_expectations(ephemeral_context_with_defaults)
+
+    def _test_update_existing_suite_updates_expectations(self, context: AbstractDataContext):
+        # arrange
+        suite_name = "suite A"
+        expectations = [
+            ExpectColumnSumToBeBetween(
+                column="col A",
+                min_value=0,
+                max_value=10,
+            ),
+            ExpectColumnDistinctValuesToContainSet(
+                column="col B",
+                value_set=["a", "b", "c"],
+            ),
+        ]
+        existing_suite = context.suites.add(
+            suite=ExpectationSuite(
+                name=suite_name,
+                expectations=[copy(exp) for exp in expectations],
+            )
+        )
+        new_col_name = "col C"
+        for exp in expectations:
+            exp.column = new_col_name
+        suite = ExpectationSuite(
+            name=suite_name,
+            expectations=[copy(exp) for exp in expectations],
+        )
+
+        # act
+        updated_suite = context.suites.add_or_update(suite=suite)
+
+        # assert
+        assert updated_suite.id == existing_suite.id
+        for exp, created_exp in zip(expectations, updated_suite.expectations):
+            assert created_exp.id
+            exp.id = ANY
+            assert exp == created_exp
+            assert created_exp.column == new_col_name  # type: ignore[attr-defined]  # column exists
+
+        for old_exp, new_exp in zip(existing_suite.expectations, updated_suite.expectations):
+            # expectations have been deleted and re added, not updated
+            assert old_exp.id != new_exp.id
+
+    @pytest.mark.filesystem
+    def test_update_existing_suite_deletes_expectations__filesystem(self, empty_data_context):
+        self._test_update_existing_suite_deletes_expectations(empty_data_context)
+
+    @pytest.mark.cloud
+    def test_update_existing_suite_deletes_expectations__cloud(self, empty_cloud_context_fluent):
+        self._test_update_existing_suite_deletes_expectations(empty_cloud_context_fluent)
+
+    @pytest.mark.unit
+    def test_update_existing_suite_deletes_expectations__ephemeral(
+        self, ephemeral_context_with_defaults
+    ):
+        self._test_update_existing_suite_deletes_expectations(ephemeral_context_with_defaults)
+
+    def _test_update_existing_suite_deletes_expectations(self, context: AbstractDataContext):
+        # arrange
+        suite_name = "suite A"
+        expectations = [
+            ExpectColumnSumToBeBetween(
+                column="col A",
+                min_value=0,
+                max_value=10,
+            ),
+            ExpectColumnDistinctValuesToContainSet(
+                column="col B",
+                value_set=["a", "b", "c"],
+            ),
+        ]
+        existing_suite = context.suites.add(
+            suite=ExpectationSuite(
+                name=suite_name,
+                expectations=[copy(exp) for exp in expectations],
+            )
+        )
+        new_col_name = "col C"
+        for exp in expectations:
+            exp.column = new_col_name
+        suite = ExpectationSuite(
+            name=suite_name,
+            expectations=[],
+        )
+
+        # act
+        updated_suite = context.suites.add_or_update(suite=suite)
+
+        # assert
+        assert updated_suite.id == existing_suite.id
+        assert updated_suite.expectations == []
+
+    @pytest.mark.filesystem
+    def test_add_or_update_is_idempotent__filesystem(self, empty_data_context):
+        self._test_add_or_update_is_idempotent(empty_data_context)
+
+    @pytest.mark.cloud
+    def test_add_or_update_is_idempotent__cloud(self, empty_cloud_context_fluent):
+        self._test_add_or_update_is_idempotent(empty_cloud_context_fluent)
+
+    @pytest.mark.unit
+    def test_add_or_update_is_idempotent__ephemeral(self, ephemeral_context_with_defaults):
+        self._test_add_or_update_is_idempotent(ephemeral_context_with_defaults)
+
+    def _test_add_or_update_is_idempotent(self, context: AbstractDataContext):
+        # arrange
+        suite_name = "suite A"
+        expectations = [
+            ExpectColumnSumToBeBetween(
+                column="col A",
+                min_value=0,
+                max_value=10,
+            ),
+            ExpectColumnDistinctValuesToContainSet(
+                column="col B",
+                value_set=["a", "b", "c"],
+            ),
+        ]
+        suite = ExpectationSuite(
+            name=suite_name,
+            expectations=[copy(exp) for exp in expectations],
+        )
+
+        # act
+        suite_1 = context.suites.add_or_update(suite=suite)
+        suite_2 = context.suites.add_or_update(suite=suite)
+        suite_3 = context.suites.add_or_update(suite=suite)
+
+        # assert
+        assert suite_1 == suite_2 == suite_3
+
+
 class TestSuiteFactoryAnalytics:
     @pytest.mark.filesystem
     def test_suite_factory_add_emits_event_filesystem(self, empty_data_context):