Skip to content

Commit

Permalink
[FEATURE] Implement suite factory add_or_update (#10796)
Browse files Browse the repository at this point in the history
  • Loading branch information
joshua-stauffer authored Dec 20, 2024
1 parent 6bff223 commit c534b64
Show file tree
Hide file tree
Showing 2 changed files with 299 additions and 4 deletions.
36 changes: 33 additions & 3 deletions great_expectations/core/factory/suite_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def _include_rendered_content(self) -> bool:
def add(self, suite: ExpectationSuite) -> ExpectationSuite:
"""Add an ExpectationSuite to the collection.
Parameters:
Args:
suite: ExpectationSuite to add
Raises:
Expand Down Expand Up @@ -62,7 +62,7 @@ def add(self, suite: ExpectationSuite) -> ExpectationSuite:
def delete(self, name: str) -> None:
"""Delete an ExpectationSuite from the collection.
Parameters:
Args:
name: The name of the ExpectationSuite to delete
Raises:
Expand All @@ -89,7 +89,7 @@ def delete(self, name: str) -> None:
def get(self, name: str) -> ExpectationSuite:
"""Get an ExpectationSuite from the collection by name.
Parameters:
Args:
name: Name of ExpectationSuite to get
Raises:
Expand Down Expand Up @@ -125,3 +125,33 @@ def all(self) -> Iterable[ExpectationSuite]:
self._store.submit_all_deserialization_event(e)
raise
return deserializable_suites

@public_api
def add_or_update(self, suite: ExpectationSuite) -> ExpectationSuite:
"""Add or update an ExpectationSuite by name.
If an ExpectationSuite with the same name exists, overwrite it, otherwise
create a new ExpectationSuite. On update, Expectations in the Suite which
match a previously existing Expectation maintain a stable ID, and
Expectations which have changed receive a new ID.
Args:
suite: ExpectationSuite to add or update
"""
try:
existing_suite = self.get(name=suite.name)
except DataContextError:
return self.add(suite=suite)

# add IDs to expectations that haven't changed
existing_expectations = existing_suite.expectations
for expectation in suite.expectations:
try:
index = existing_expectations.index(expectation)
expectation.id = existing_expectations[index].id
except ValueError:
pass # expectation is new or updated

suite.id = existing_suite.id
suite.save()
return suite
267 changes: 266 additions & 1 deletion tests/core/factory/test_suite_factory.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import re
from copy import copy
from typing import Dict
from unittest import mock
from unittest.mock import (
ANY,
Mock, # noqa: TID251
)
from unittest.mock import ANY as ANY_TEST_ARG
from unittest.mock import Mock # noqa: TID251

import pytest
from pytest_mock import MockerFixture
Expand All @@ -19,6 +23,10 @@
from great_expectations.data_context.data_context.context_factory import set_context
from great_expectations.data_context.store import ExpectationsStore
from great_expectations.exceptions import DataContextError
from great_expectations.expectations import (
ExpectColumnDistinctValuesToContainSet,
ExpectColumnSumToBeBetween,
)
from great_expectations.types import SerializableDictDot


Expand Down Expand Up @@ -325,6 +333,263 @@ def test_suite_factory_all_with_bad_pydantic_config(
assert re.match("pydantic.*ValidationError", analytics_submit_args.error_type)


class TestSuiteFactoryAddOrUpdate:
@pytest.mark.filesystem
def test_add_empty_new_suite__filesystem(self, empty_data_context):
self._test_add_empty_new_suite(empty_data_context)

@pytest.mark.cloud
def test_add_empty_new_suite__cloud(self, empty_cloud_context_fluent):
self._test_add_empty_new_suite(empty_cloud_context_fluent)

@pytest.mark.unit
def test_add_empty_new_suite__ephemeral(self, ephemeral_context_with_defaults):
self._test_add_empty_new_suite(ephemeral_context_with_defaults)

def _test_add_empty_new_suite(self, context: AbstractDataContext):
# arrange
suite_name = "suite A"
suite = ExpectationSuite(name=suite_name)

# act
created_suite = context.suites.add_or_update(suite=suite)

# assert
assert created_suite.id
context.suites.get(suite_name)

@pytest.mark.filesystem
def test_add_new_suite_with_expectations_filesystem(self, empty_data_context):
self._test_add_new_suite_with_expectations(empty_data_context)

@pytest.mark.cloud
def test_add_new_suite_with_expectations__cloud(self, empty_cloud_context_fluent):
self._test_add_new_suite_with_expectations(empty_cloud_context_fluent)

@pytest.mark.unit
def test_add_new_suite_with_expectations__ephemeral(self, ephemeral_context_with_defaults):
self._test_add_new_suite_with_expectations(ephemeral_context_with_defaults)

def _test_add_new_suite_with_expectations(self, context: AbstractDataContext):
# arrange
suite_name = "suite A"
expectations = [
ExpectColumnSumToBeBetween(
column="col A",
min_value=0,
max_value=10,
),
ExpectColumnDistinctValuesToContainSet(
column="col B",
value_set=["a", "b", "c"],
),
]
suite = ExpectationSuite(
name=suite_name,
expectations=[copy(exp) for exp in expectations],
)

# act
created_suite = context.suites.add_or_update(suite=suite)

# assert
assert created_suite.id
context.suites.get(suite_name)
for exp, created_exp in zip(expectations, created_suite.expectations):
assert created_exp.id
exp.id = ANY
assert exp == created_exp

@pytest.mark.filesystem
def test_update_existing_suite_adds_expectations__filesystem(self, empty_data_context):
self._test_update_existing_suite_adds_expectations(empty_data_context)

@pytest.mark.cloud
def test_update_existing_suite_adds_expectations__cloud(self, empty_cloud_context_fluent):
self._test_update_existing_suite_adds_expectations(empty_cloud_context_fluent)

@pytest.mark.unit
def test_update_existing_suite_adds_expectations__ephemeral(
self, ephemeral_context_with_defaults
):
self._test_update_existing_suite_adds_expectations(ephemeral_context_with_defaults)

def _test_update_existing_suite_adds_expectations(self, context: AbstractDataContext):
# arrange
suite_name = "suite A"
expectations = [
ExpectColumnSumToBeBetween(
column="col A",
min_value=0,
max_value=10,
),
ExpectColumnDistinctValuesToContainSet(
column="col B",
value_set=["a", "b", "c"],
),
]
suite = ExpectationSuite(
name=suite_name,
expectations=[copy(exp) for exp in expectations],
)
existing_suite = context.suites.add(suite=ExpectationSuite(name=suite_name))

# act
updated_suite = context.suites.add_or_update(suite=suite)

# assert
assert updated_suite.id == existing_suite.id
for exp, created_exp in zip(expectations, updated_suite.expectations):
assert created_exp.id
exp.id = ANY
assert exp == created_exp

@pytest.mark.filesystem
def test_update_existing_suite_updates_expectations__filesystem(self, empty_data_context):
self._test_update_existing_suite_updates_expectations(empty_data_context)

@pytest.mark.cloud
def test_update_existing_suite_updates_expectations__cloud(self, empty_cloud_context_fluent):
self._test_update_existing_suite_updates_expectations(empty_cloud_context_fluent)

@pytest.mark.unit
def test_update_existing_suite_updates_expectations__ephemeral(
self, ephemeral_context_with_defaults
):
self._test_update_existing_suite_updates_expectations(ephemeral_context_with_defaults)

def _test_update_existing_suite_updates_expectations(self, context: AbstractDataContext):
# arrange
suite_name = "suite A"
expectations = [
ExpectColumnSumToBeBetween(
column="col A",
min_value=0,
max_value=10,
),
ExpectColumnDistinctValuesToContainSet(
column="col B",
value_set=["a", "b", "c"],
),
]
existing_suite = context.suites.add(
suite=ExpectationSuite(
name=suite_name,
expectations=[copy(exp) for exp in expectations],
)
)
new_col_name = "col C"
for exp in expectations:
exp.column = new_col_name
suite = ExpectationSuite(
name=suite_name,
expectations=[copy(exp) for exp in expectations],
)

# act
updated_suite = context.suites.add_or_update(suite=suite)

# assert
assert updated_suite.id == existing_suite.id
for exp, created_exp in zip(expectations, updated_suite.expectations):
assert created_exp.id
exp.id = ANY
assert exp == created_exp
assert created_exp.column == new_col_name # type: ignore[attr-defined] # column exists

for old_exp, new_exp in zip(existing_suite.expectations, updated_suite.expectations):
# expectations have been deleted and re added, not updated
assert old_exp.id != new_exp.id

@pytest.mark.filesystem
def test_update_existing_suite_deletes_expectations__filesystem(self, empty_data_context):
self._test_update_existing_suite_deletes_expectations(empty_data_context)

@pytest.mark.cloud
def test_update_existing_suite_deletes_expectations__cloud(self, empty_cloud_context_fluent):
self._test_update_existing_suite_deletes_expectations(empty_cloud_context_fluent)

@pytest.mark.unit
def test_update_existing_suite_deletes_expectations__ephemeral(
self, ephemeral_context_with_defaults
):
self._test_update_existing_suite_deletes_expectations(ephemeral_context_with_defaults)

def _test_update_existing_suite_deletes_expectations(self, context: AbstractDataContext):
# arrange
suite_name = "suite A"
expectations = [
ExpectColumnSumToBeBetween(
column="col A",
min_value=0,
max_value=10,
),
ExpectColumnDistinctValuesToContainSet(
column="col B",
value_set=["a", "b", "c"],
),
]
existing_suite = context.suites.add(
suite=ExpectationSuite(
name=suite_name,
expectations=[copy(exp) for exp in expectations],
)
)
new_col_name = "col C"
for exp in expectations:
exp.column = new_col_name
suite = ExpectationSuite(
name=suite_name,
expectations=[],
)

# act
updated_suite = context.suites.add_or_update(suite=suite)

# assert
assert updated_suite.id == existing_suite.id
assert updated_suite.expectations == []

@pytest.mark.filesystem
def test_add_or_update_is_idempotent__filesystem(self, empty_data_context):
self._test_add_or_update_is_idempotent(empty_data_context)

@pytest.mark.cloud
def test_add_or_update_is_idempotent__cloud(self, empty_cloud_context_fluent):
self._test_add_or_update_is_idempotent(empty_cloud_context_fluent)

@pytest.mark.unit
def test_add_or_update_is_idempotent__ephemeral(self, ephemeral_context_with_defaults):
self._test_add_or_update_is_idempotent(ephemeral_context_with_defaults)

def _test_add_or_update_is_idempotent(self, context: AbstractDataContext):
# arrange
suite_name = "suite A"
expectations = [
ExpectColumnSumToBeBetween(
column="col A",
min_value=0,
max_value=10,
),
ExpectColumnDistinctValuesToContainSet(
column="col B",
value_set=["a", "b", "c"],
),
]
suite = ExpectationSuite(
name=suite_name,
expectations=[copy(exp) for exp in expectations],
)

# act
suite_1 = context.suites.add_or_update(suite=suite)
suite_2 = context.suites.add_or_update(suite=suite)
suite_3 = context.suites.add_or_update(suite=suite)

# assert
assert suite_1 == suite_2 == suite_3


class TestSuiteFactoryAnalytics:
@pytest.mark.filesystem
def test_suite_factory_add_emits_event_filesystem(self, empty_data_context):
Expand Down

0 comments on commit c534b64

Please sign in to comment.