From 67eae2791b62bafb7751b32c676facb4b59fccab Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 30 Sep 2024 11:08:54 -0400 Subject: [PATCH 01/10] Make SnapshotConfig.unique_key an Optional[Union[str, List[str]]] --- core/dbt/artifacts/resources/v1/snapshot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/artifacts/resources/v1/snapshot.py b/core/dbt/artifacts/resources/v1/snapshot.py index 464d94bae69..db275984ea4 100644 --- a/core/dbt/artifacts/resources/v1/snapshot.py +++ b/core/dbt/artifacts/resources/v1/snapshot.py @@ -19,7 +19,7 @@ class SnapshotMetaColumnNames(dbtClassMixin): class SnapshotConfig(NodeConfig): materialized: str = "snapshot" strategy: Optional[str] = None - unique_key: Optional[str] = None + unique_key: Optional[Union[str, List[str]]] = None target_schema: Optional[str] = None target_database: Optional[str] = None updated_at: Optional[str] = None From 9d0cc3c2947f327333581da5ee48434e3b30e882 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 30 Sep 2024 11:10:54 -0400 Subject: [PATCH 02/10] Update dev-requirements.txt --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 20605e632b8..7d08c73ccd4 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/dbt-labs/dbt-adapters.git@main +git+https://github.com/dbt-labs/dbt-adapters.git@multiple-snapshot-unique-keys git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter git+https://github.com/dbt-labs/dbt-common.git@main git+https://github.com/dbt-labs/dbt-postgres.git@main From 8ee3f63a15208d9ef743c5150e580dd78e5604c9 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 1 Oct 2024 13:42:39 -0400 Subject: [PATCH 03/10] Changie, update test --- .../unreleased/Features-20241001-134051.yaml | 6 + schemas/dbt/manifest/v12.json | 108 ++++++++++++++++++ .../snapshots/test_basic_snapshot.py | 7 ++ 3 files changed, 121 insertions(+) create mode 100644 .changes/unreleased/Features-20241001-134051.yaml diff --git a/.changes/unreleased/Features-20241001-134051.yaml b/.changes/unreleased/Features-20241001-134051.yaml new file mode 100644 index 00000000000..60ada51ece3 --- /dev/null +++ b/.changes/unreleased/Features-20241001-134051.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Enable use of multi-column unique key in snapshots +time: 2024-10-01T13:40:51.297529-04:00 +custom: + Author: gshank + Issue: "9992" diff --git a/schemas/dbt/manifest/v12.json b/schemas/dbt/manifest/v12.json index d1246d526ce..285da79ae98 100644 --- a/schemas/dbt/manifest/v12.json +++ b/schemas/dbt/manifest/v12.json @@ -706,6 +706,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -1739,6 +1745,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -2387,6 +2399,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -3172,6 +3190,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -3976,6 +4000,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -5331,6 +5361,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -5979,6 +6015,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -6498,6 +6540,12 @@ { "type": "string" }, + { + "type": "array", + "items": { + "type": "string" + } + }, { "type": "null" } @@ -6931,6 +6979,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -10510,6 +10564,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -11543,6 +11603,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -12191,6 +12257,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -12976,6 +13048,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -13780,6 +13858,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -15135,6 +15219,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -15783,6 +15873,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -16302,6 +16398,12 @@ { "type": "string" }, + { + "type": "array", + "items": { + "type": "string" + } + }, { "type": "null" } @@ -16735,6 +16837,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { diff --git a/tests/functional/snapshots/test_basic_snapshot.py b/tests/functional/snapshots/test_basic_snapshot.py index b5a508b04a9..7e2e6429ed7 100644 --- a/tests/functional/snapshots/test_basic_snapshot.py +++ b/tests/functional/snapshots/test_basic_snapshot.py @@ -3,6 +3,7 @@ import pytest import pytz +from dbt.artifacts.resources.types import NodeType from dbt.tests.util import ( check_relations_equal, @@ -73,6 +74,12 @@ def snapshot_setup(project, num_snapshot_models=1): path = os.path.join(project.test_data_dir, "seed_pg.sql") project.run_sql_file(path) + manifest = run_dbt(["parse"]) + for node in manifest.nodes.values(): + if node.resource_type != NodeType.Snapshot: + continue + print(f"--- {node.unique_id} => {node.config.unique_key}, {type(node.config.unique_key).__name__}") + results = run_dbt(["snapshot"]) assert len(results) == num_snapshot_models From 8cf171987a59ca0596087c5e18d19956aa9312d1 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 1 Oct 2024 14:28:58 -0400 Subject: [PATCH 04/10] Remove instrumentation from test --- tests/functional/snapshots/test_basic_snapshot.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/functional/snapshots/test_basic_snapshot.py b/tests/functional/snapshots/test_basic_snapshot.py index 7e2e6429ed7..755f6d46e99 100644 --- a/tests/functional/snapshots/test_basic_snapshot.py +++ b/tests/functional/snapshots/test_basic_snapshot.py @@ -74,12 +74,6 @@ def snapshot_setup(project, num_snapshot_models=1): path = os.path.join(project.test_data_dir, "seed_pg.sql") project.run_sql_file(path) - manifest = run_dbt(["parse"]) - for node in manifest.nodes.values(): - if node.resource_type != NodeType.Snapshot: - continue - print(f"--- {node.unique_id} => {node.config.unique_key}, {type(node.config.unique_key).__name__}") - results = run_dbt(["snapshot"]) assert len(results) == num_snapshot_models From 2fd03f0a167ae445f3c16c59af741247f02cc58d Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 1 Oct 2024 15:03:24 -0400 Subject: [PATCH 05/10] Use dbt-common branch with is_list filter --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 7d08c73ccd4..2be59703511 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,6 +1,6 @@ git+https://github.com/dbt-labs/dbt-adapters.git@multiple-snapshot-unique-keys git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter -git+https://github.com/dbt-labs/dbt-common.git@main +git+https://github.com/dbt-labs/dbt-common.git@add_jinja_is_list_filter git+https://github.com/dbt-labs/dbt-postgres.git@main # black must match what's in .pre-commit-config.yaml to be sure local env matches CI black==24.3.0 From bef8ea2c96da2b7d13c4b76c7bebb24ea138e4ac Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 1 Oct 2024 17:19:57 -0400 Subject: [PATCH 06/10] Remove unneeded import --- tests/functional/snapshots/test_basic_snapshot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/functional/snapshots/test_basic_snapshot.py b/tests/functional/snapshots/test_basic_snapshot.py index 755f6d46e99..b5a508b04a9 100644 --- a/tests/functional/snapshots/test_basic_snapshot.py +++ b/tests/functional/snapshots/test_basic_snapshot.py @@ -3,7 +3,6 @@ import pytest import pytz -from dbt.artifacts.resources.types import NodeType from dbt.tests.util import ( check_relations_equal, From 46f2e6ab0b925db854172488c3581710e99dba19 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 22 Oct 2024 12:00:20 -0400 Subject: [PATCH 07/10] Remove branches from dev-requirements.txt --- dev-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 2be59703511..20605e632b8 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,6 +1,6 @@ -git+https://github.com/dbt-labs/dbt-adapters.git@multiple-snapshot-unique-keys +git+https://github.com/dbt-labs/dbt-adapters.git@main git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter -git+https://github.com/dbt-labs/dbt-common.git@add_jinja_is_list_filter +git+https://github.com/dbt-labs/dbt-common.git@main git+https://github.com/dbt-labs/dbt-postgres.git@main # black must match what's in .pre-commit-config.yaml to be sure local env matches CI black==24.3.0 From 6e30d29c7fddd3691380df9b1bb8653f2cbf8218 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 22 Oct 2024 13:37:17 -0400 Subject: [PATCH 08/10] Add test case --- .../adapter/simple_snapshot/fixtures.py | 430 ++++++++++++++++++ .../simple_snapshot/test_various_configs.py | 278 +++++++++++ 2 files changed, 708 insertions(+) create mode 100644 tests/functional/adapter/simple_snapshot/fixtures.py create mode 100644 tests/functional/adapter/simple_snapshot/test_various_configs.py diff --git a/tests/functional/adapter/simple_snapshot/fixtures.py b/tests/functional/adapter/simple_snapshot/fixtures.py new file mode 100644 index 00000000000..cec28a7d64d --- /dev/null +++ b/tests/functional/adapter/simple_snapshot/fixtures.py @@ -0,0 +1,430 @@ +create_seed_sql = """ +create table {schema}.seed ( + id INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + updated_at TIMESTAMP +); +""" + +create_snapshot_expected_sql = """ +create table {schema}.snapshot_expected ( + id INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + + -- snapshotting fields + updated_at TIMESTAMP, + test_valid_from TIMESTAMP, + test_valid_to TIMESTAMP, + test_scd_id TEXT, + test_updated_at TIMESTAMP +); +""" + + +seed_insert_sql = """ +-- seed inserts +-- use the same email for two users to verify that duplicated check_cols values +-- are handled appropriately +insert into {schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values +(1, 'Judith', 'Kennedy', '(not provided)', 'Female', '54.60.24.128', '2015-12-24 12:19:28'), +(2, 'Arthur', 'Kelly', '(not provided)', 'Male', '62.56.24.215', '2015-10-28 16:22:15'), +(3, 'Rachel', 'Moreno', 'rmoreno2@msu.edu', 'Female', '31.222.249.23', '2016-04-05 02:05:30'), +(4, 'Ralph', 'Turner', 'rturner3@hp.com', 'Male', '157.83.76.114', '2016-08-08 00:06:51'), +(5, 'Laura', 'Gonzales', 'lgonzales4@howstuffworks.com', 'Female', '30.54.105.168', '2016-09-01 08:25:38'), +(6, 'Katherine', 'Lopez', 'klopez5@yahoo.co.jp', 'Female', '169.138.46.89', '2016-08-30 18:52:11'), +(7, 'Jeremy', 'Hamilton', 'jhamilton6@mozilla.org', 'Male', '231.189.13.133', '2016-07-17 02:09:46'), +(8, 'Heather', 'Rose', 'hrose7@goodreads.com', 'Female', '87.165.201.65', '2015-12-29 22:03:56'), +(9, 'Gregory', 'Kelly', 'gkelly8@trellian.com', 'Male', '154.209.99.7', '2016-03-24 21:18:16'), +(10, 'Rachel', 'Lopez', 'rlopez9@themeforest.net', 'Female', '237.165.82.71', '2016-08-20 15:44:49'), +(11, 'Donna', 'Welch', 'dwelcha@shutterfly.com', 'Female', '103.33.110.138', '2016-02-27 01:41:48'), +(12, 'Russell', 'Lawrence', 'rlawrenceb@qq.com', 'Male', '189.115.73.4', '2016-06-11 03:07:09'), +(13, 'Michelle', 'Montgomery', 'mmontgomeryc@scientificamerican.com', 'Female', '243.220.95.82', '2016-06-18 16:27:19'), +(14, 'Walter', 'Castillo', 'wcastillod@pagesperso-orange.fr', 'Male', '71.159.238.196', '2016-10-06 01:55:44'), +(15, 'Robin', 'Mills', 'rmillse@vkontakte.ru', 'Female', '172.190.5.50', '2016-10-31 11:41:21'), +(16, 'Raymond', 'Holmes', 'rholmesf@usgs.gov', 'Male', '148.153.166.95', '2016-10-03 08:16:38'), +(17, 'Gary', 'Bishop', 'gbishopg@plala.or.jp', 'Male', '161.108.182.13', '2016-08-29 19:35:20'), +(18, 'Anna', 'Riley', 'arileyh@nasa.gov', 'Female', '253.31.108.22', '2015-12-11 04:34:27'), +(19, 'Sarah', 'Knight', 'sknighti@foxnews.com', 'Female', '222.220.3.177', '2016-09-26 00:49:06'), +(20, 'Phyllis', 'Fox', null, 'Female', '163.191.232.95', '2016-08-21 10:35:19'); +""" + + +populate_snapshot_expected_sql = """ +-- populate snapshot table +insert into {schema}.snapshot_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + null::timestamp as test_valid_to, + updated_at as test_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id +from {schema}.seed; +""" + +populate_snapshot_expected_valid_to_current_sql = """ +-- populate snapshot table +insert into {schema}.snapshot_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + date('2099-12-31') as test_valid_to, + updated_at as test_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id +from {schema}.seed; +""" + +snapshot_actual_sql = """ +{% snapshot snapshot_actual %} + + {{ + config( + unique_key='id || ' ~ "'-'" ~ ' || first_name', + ) + }} + + select * from {{target.database}}.{{target.schema}}.seed + +{% endsnapshot %} +""" + +snapshots_yml = """ +snapshots: + - name: snapshot_actual + config: + strategy: timestamp + updated_at: updated_at + snapshot_meta_column_names: + dbt_valid_to: test_valid_to + dbt_valid_from: test_valid_from + dbt_scd_id: test_scd_id + dbt_updated_at: test_updated_at +""" + +snapshots_no_column_names_yml = """ +snapshots: + - name: snapshot_actual + config: + strategy: timestamp + updated_at: updated_at +""" + +ref_snapshot_sql = """ +select * from {{ ref('snapshot_actual') }} +""" + + +invalidate_sql = """ +-- update records 11 - 21. Change email and updated_at field +update {schema}.seed set + updated_at = updated_at + interval '1 hour', + email = case when id = 20 then 'pfoxj@creativecommons.org' else 'new_' || email end +where id >= 10 and id <= 20; + + +-- invalidate records 11 - 21 +update {schema}.snapshot_expected set + test_valid_to = updated_at + interval '1 hour' +where id >= 10 and id <= 20; + +""" + +update_sql = """ +-- insert v2 of the 11 - 21 records + +insert into {schema}.snapshot_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + null::timestamp as test_valid_to, + updated_at as test_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id +from {schema}.seed +where id >= 10 and id <= 20; +""" + +# valid_to_current fixtures + +snapshots_valid_to_current_yml = """ +snapshots: + - name: snapshot_actual + config: + strategy: timestamp + updated_at: updated_at + dbt_valid_to_current: "date('2099-12-31')" + snapshot_meta_column_names: + dbt_valid_to: test_valid_to + dbt_valid_from: test_valid_from + dbt_scd_id: test_scd_id + dbt_updated_at: test_updated_at +""" + +update_with_current_sql = """ +-- insert v2 of the 11 - 21 records + +insert into {schema}.snapshot_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + date('2099-12-31') as test_valid_to, + updated_at as test_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id +from {schema}.seed +where id >= 10 and id <= 20; +""" + + +# multi-key snapshot fixtures + +create_multi_key_seed_sql = """ +create table {schema}.seed ( + id1 INTEGER, + id2 INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + updated_at TIMESTAMP +); +""" + + +create_multi_key_snapshot_expected_sql = """ +create table {schema}.snapshot_expected ( + id1 INTEGER, + id2 INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + + -- snapshotting fields + updated_at TIMESTAMP, + test_valid_from TIMESTAMP, + test_valid_to TIMESTAMP, + test_scd_id TEXT, + test_updated_at TIMESTAMP +); +""" + +seed_multi_key_insert_sql = """ +-- seed inserts +-- use the same email for two users to verify that duplicated check_cols values +-- are handled appropriately +insert into {schema}.seed (id1, id2, first_name, last_name, email, gender, ip_address, updated_at) values +(1, 100, 'Judith', 'Kennedy', '(not provided)', 'Female', '54.60.24.128', '2015-12-24 12:19:28'), +(2, 200, 'Arthur', 'Kelly', '(not provided)', 'Male', '62.56.24.215', '2015-10-28 16:22:15'), +(3, 300, 'Rachel', 'Moreno', 'rmoreno2@msu.edu', 'Female', '31.222.249.23', '2016-04-05 02:05:30'), +(4, 400, 'Ralph', 'Turner', 'rturner3@hp.com', 'Male', '157.83.76.114', '2016-08-08 00:06:51'), +(5, 500, 'Laura', 'Gonzales', 'lgonzales4@howstuffworks.com', 'Female', '30.54.105.168', '2016-09-01 08:25:38'), +(6, 600, 'Katherine', 'Lopez', 'klopez5@yahoo.co.jp', 'Female', '169.138.46.89', '2016-08-30 18:52:11'), +(7, 700, 'Jeremy', 'Hamilton', 'jhamilton6@mozilla.org', 'Male', '231.189.13.133', '2016-07-17 02:09:46'), +(8, 800, 'Heather', 'Rose', 'hrose7@goodreads.com', 'Female', '87.165.201.65', '2015-12-29 22:03:56'), +(9, 900, 'Gregory', 'Kelly', 'gkelly8@trellian.com', 'Male', '154.209.99.7', '2016-03-24 21:18:16'), +(10, 1000, 'Rachel', 'Lopez', 'rlopez9@themeforest.net', 'Female', '237.165.82.71', '2016-08-20 15:44:49'), +(11, 1100, 'Donna', 'Welch', 'dwelcha@shutterfly.com', 'Female', '103.33.110.138', '2016-02-27 01:41:48'), +(12, 1200, 'Russell', 'Lawrence', 'rlawrenceb@qq.com', 'Male', '189.115.73.4', '2016-06-11 03:07:09'), +(13, 1300, 'Michelle', 'Montgomery', 'mmontgomeryc@scientificamerican.com', 'Female', '243.220.95.82', '2016-06-18 16:27:19'), +(14, 1400, 'Walter', 'Castillo', 'wcastillod@pagesperso-orange.fr', 'Male', '71.159.238.196', '2016-10-06 01:55:44'), +(15, 1500, 'Robin', 'Mills', 'rmillse@vkontakte.ru', 'Female', '172.190.5.50', '2016-10-31 11:41:21'), +(16, 1600, 'Raymond', 'Holmes', 'rholmesf@usgs.gov', 'Male', '148.153.166.95', '2016-10-03 08:16:38'), +(17, 1700, 'Gary', 'Bishop', 'gbishopg@plala.or.jp', 'Male', '161.108.182.13', '2016-08-29 19:35:20'), +(18, 1800, 'Anna', 'Riley', 'arileyh@nasa.gov', 'Female', '253.31.108.22', '2015-12-11 04:34:27'), +(19, 1900, 'Sarah', 'Knight', 'sknighti@foxnews.com', 'Female', '222.220.3.177', '2016-09-26 00:49:06'), +(20, 2000, 'Phyllis', 'Fox', null, 'Female', '163.191.232.95', '2016-08-21 10:35:19'); +""" + +populate_multi_key_snapshot_expected_sql = """ +-- populate snapshot table +insert into {schema}.snapshot_expected ( + id1, + id2, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id1, + id2, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + null::timestamp as test_valid_to, + updated_at as test_updated_at, + md5(id1::text || '|' || id2::text || '|' || updated_at::text) as test_scd_id +from {schema}.seed; +""" + +model_seed_sql = """ +select * from {{target.database}}.{{target.schema}}.seed +""" + +snapshots_multi_key_yml = """ +snapshots: + - name: snapshot_actual + relation: "ref('seed')" + config: + strategy: timestamp + updated_at: updated_at + unique_key: + - id1 + - id2 + snapshot_meta_column_names: + dbt_valid_to: test_valid_to + dbt_valid_from: test_valid_from + dbt_scd_id: test_scd_id + dbt_updated_at: test_updated_at +""" + +invalidate_multi_key_sql = """ +-- update records 11 - 21. Change email and updated_at field +update {schema}.seed set + updated_at = updated_at + interval '1 hour', + email = case when id1 = 20 then 'pfoxj@creativecommons.org' else 'new_' || email end +where id1 >= 10 and id1 <= 20; + + +-- invalidate records 11 - 21 +update {schema}.snapshot_expected set + test_valid_to = updated_at + interval '1 hour' +where id1 >= 10 and id1 <= 20; + +""" + +update_multi_key_sql = """ +-- insert v2 of the 11 - 21 records + +insert into {schema}.snapshot_expected ( + id1, + id2, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id1, + id2, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + null::timestamp as test_valid_to, + updated_at as test_updated_at, + md5(id1::text || '|' || id2::text || '|' || updated_at::text) as test_scd_id +from {schema}.seed +where id1 >= 10 and id1 <= 20; +""" diff --git a/tests/functional/adapter/simple_snapshot/test_various_configs.py b/tests/functional/adapter/simple_snapshot/test_various_configs.py new file mode 100644 index 00000000000..805a641aa4e --- /dev/null +++ b/tests/functional/adapter/simple_snapshot/test_various_configs.py @@ -0,0 +1,278 @@ +import datetime + +import pytest + +from dbt.tests.util import ( + check_relations_equal, + get_manifest, + run_dbt, + run_dbt_and_capture, + run_sql_with_adapter, + update_config_file, +) +from tests.functional.adapter.simple_snapshot.fixtures import ( + create_multi_key_seed_sql, + create_multi_key_snapshot_expected_sql, + create_seed_sql, + create_snapshot_expected_sql, + invalidate_multi_key_sql, + invalidate_sql, + model_seed_sql, + populate_multi_key_snapshot_expected_sql, + populate_snapshot_expected_sql, + populate_snapshot_expected_valid_to_current_sql, + ref_snapshot_sql, + seed_insert_sql, + seed_multi_key_insert_sql, + snapshot_actual_sql, + snapshots_multi_key_yml, + snapshots_no_column_names_yml, + snapshots_valid_to_current_yml, + snapshots_yml, + update_multi_key_sql, + update_sql, + update_with_current_sql, +) + + +class BaseSnapshotColumnNames: + @pytest.fixture(scope="class") + def snapshots(self): + return {"snapshot.sql": snapshot_actual_sql} + + @pytest.fixture(scope="class") + def models(self): + return { + "snapshots.yml": snapshots_yml, + "ref_snapshot.sql": ref_snapshot_sql, + } + + def test_snapshot_column_names(self, project): + project.run_sql(create_seed_sql) + project.run_sql(create_snapshot_expected_sql) + project.run_sql(seed_insert_sql) + project.run_sql(populate_snapshot_expected_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + project.run_sql(invalidate_sql) + project.run_sql(update_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + # run_dbt(["test"]) + check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) + + +class TestSnapshotColumnNames(BaseSnapshotColumnNames): + pass + + +class BaseSnapshotColumnNamesFromDbtProject: + @pytest.fixture(scope="class") + def snapshots(self): + return {"snapshot.sql": snapshot_actual_sql} + + @pytest.fixture(scope="class") + def models(self): + return { + "snapshots.yml": snapshots_no_column_names_yml, + "ref_snapshot.sql": ref_snapshot_sql, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "snapshots": { + "test": { + "+snapshot_meta_column_names": { + "dbt_valid_to": "test_valid_to", + "dbt_valid_from": "test_valid_from", + "dbt_scd_id": "test_scd_id", + "dbt_updated_at": "test_updated_at", + } + } + } + } + + def test_snapshot_column_names_from_project(self, project): + project.run_sql(create_seed_sql) + project.run_sql(create_snapshot_expected_sql) + project.run_sql(seed_insert_sql) + project.run_sql(populate_snapshot_expected_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + project.run_sql(invalidate_sql) + project.run_sql(update_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + # run_dbt(["test"]) + check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) + + +class TestSnapshotColumnNamesFromDbtProject(BaseSnapshotColumnNamesFromDbtProject): + pass + + +class BaseSnapshotInvalidColumnNames: + @pytest.fixture(scope="class") + def snapshots(self): + return {"snapshot.sql": snapshot_actual_sql} + + @pytest.fixture(scope="class") + def models(self): + return { + "snapshots.yml": snapshots_no_column_names_yml, + "ref_snapshot.sql": ref_snapshot_sql, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "snapshots": { + "test": { + "+snapshot_meta_column_names": { + "dbt_valid_to": "test_valid_to", + "dbt_valid_from": "test_valid_from", + "dbt_scd_id": "test_scd_id", + "dbt_updated_at": "test_updated_at", + } + } + } + } + + def test_snapshot_invalid_column_names(self, project): + project.run_sql(create_seed_sql) + project.run_sql(create_snapshot_expected_sql) + project.run_sql(seed_insert_sql) + project.run_sql(populate_snapshot_expected_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + manifest = get_manifest(project.project_root) + snapshot_node = manifest.nodes["snapshot.test.snapshot_actual"] + snapshot_node.config.snapshot_meta_column_names == { + "dbt_valid_to": "test_valid_to", + "dbt_valid_from": "test_valid_from", + "dbt_scd_id": "test_scd_id", + "dbt_updated_at": "test_updated_at", + } + + project.run_sql(invalidate_sql) + project.run_sql(update_sql) + + # Change snapshot_meta_columns and look for an error + different_columns = { + "snapshots": { + "test": { + "+snapshot_meta_column_names": { + "dbt_valid_to": "test_valid_to", + "dbt_updated_at": "test_updated_at", + } + } + } + } + update_config_file(different_columns, "dbt_project.yml") + + results, log_output = run_dbt_and_capture(["snapshot"], expect_pass=False) + assert len(results) == 1 + assert "Compilation Error in snapshot snapshot_actual" in log_output + assert "Snapshot target is missing configured columns" in log_output + + +class TestSnapshotInvalidColumnNames(BaseSnapshotInvalidColumnNames): + pass + + +class BaseSnapshotDbtValidToCurrent: + @pytest.fixture(scope="class") + def snapshots(self): + return {"snapshot.sql": snapshot_actual_sql} + + @pytest.fixture(scope="class") + def models(self): + return { + "snapshots.yml": snapshots_valid_to_current_yml, + "ref_snapshot.sql": ref_snapshot_sql, + } + + def test_valid_to_current(self, project): + project.run_sql(create_seed_sql) + project.run_sql(create_snapshot_expected_sql) + project.run_sql(seed_insert_sql) + project.run_sql(populate_snapshot_expected_valid_to_current_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + manifest = get_manifest(project.project_root) + + original_snapshot = run_sql_with_adapter( + project.adapter, + "select id, test_scd_id, test_valid_to from {schema}.snapshot_actual", + "all", + ) + assert original_snapshot[0][2] == datetime.datetime(2099, 12, 31, 0, 0) + assert original_snapshot[9][2] == datetime.datetime(2099, 12, 31, 0, 0) + + project.run_sql(invalidate_sql) + project.run_sql(update_with_current_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + updated_snapshot = run_sql_with_adapter( + project.adapter, + "select id, test_scd_id, test_valid_to from {schema}.snapshot_actual", + "all", + ) + assert updated_snapshot[0][2] == datetime.datetime(2099, 12, 31, 0, 0) + # Original row that was updated now has a non-current (2099/12/31) date + assert updated_snapshot[9][2] == datetime.datetime(2016, 8, 20, 16, 44, 49) + # Updated row has a current date + assert updated_snapshot[20][2] == datetime.datetime(2099, 12, 31, 0, 0) + + check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) + + +class TestSnapshotDbtValidToCurrent(BaseSnapshotDbtValidToCurrent): + pass + + +# This uses snapshot_meta_column_names, yaml-only snapshot def, +# and multiple keys +class BaseSnapshotMultiUniqueKey: + @pytest.fixture(scope="class") + def models(self): + return { + "seed.sql": model_seed_sql, + "snapshots.yml": snapshots_multi_key_yml, + "ref_snapshot.sql": ref_snapshot_sql, + } + + def test_multi_column_unique_key(self, project): + project.run_sql(create_multi_key_seed_sql) + project.run_sql(create_multi_key_snapshot_expected_sql) + project.run_sql(seed_multi_key_insert_sql) + project.run_sql(populate_multi_key_snapshot_expected_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + project.run_sql(invalidate_multi_key_sql) + project.run_sql(update_multi_key_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + # run_dbt(["test"]) + check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) + + +class TestSnapshotMultiUniqueKey(BaseSnapshotMultiUniqueKey): + pass From f4211074c39c1c7981e3f22ed6daa0e6c76af7bd Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 22 Oct 2024 13:41:28 -0400 Subject: [PATCH 09/10] Bump dbt-common and dbt-adapters versions --- core/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/setup.py b/core/setup.py index b7a8dabd14e..876cdd04cc1 100644 --- a/core/setup.py +++ b/core/setup.py @@ -71,8 +71,8 @@ "dbt-extractor>=0.5.0,<=0.6", "dbt-semantic-interfaces>=0.7.3,<0.8", # Minor versions for these are expected to be backwards-compatible - "dbt-common>=1.9.0,<2.0", - "dbt-adapters>=1.7.0,<2.0", + "dbt-common>=1.11.0,<2.0", + "dbt-adapters>=1.7.1,<2.0", # ---- # Expect compatibility with all new versions of these packages, so lower bounds only. "packaging>20.9", From 885d9b4d2b1039cb49e12c92a2774405f7030feb Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 22 Oct 2024 14:28:37 -0400 Subject: [PATCH 10/10] Make it work --- core/setup.py | 2 +- .../functional/adapter/simple_snapshot/test_various_configs.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/core/setup.py b/core/setup.py index 876cdd04cc1..b787ce8a923 100644 --- a/core/setup.py +++ b/core/setup.py @@ -72,7 +72,7 @@ "dbt-semantic-interfaces>=0.7.3,<0.8", # Minor versions for these are expected to be backwards-compatible "dbt-common>=1.11.0,<2.0", - "dbt-adapters>=1.7.1,<2.0", + "dbt-adapters>=1.7.0,<2.0", # ---- # Expect compatibility with all new versions of these packages, so lower bounds only. "packaging>20.9", diff --git a/tests/functional/adapter/simple_snapshot/test_various_configs.py b/tests/functional/adapter/simple_snapshot/test_various_configs.py index 805a641aa4e..d288d2934df 100644 --- a/tests/functional/adapter/simple_snapshot/test_various_configs.py +++ b/tests/functional/adapter/simple_snapshot/test_various_configs.py @@ -210,7 +210,6 @@ def test_valid_to_current(self, project): results = run_dbt(["snapshot"]) assert len(results) == 1 - manifest = get_manifest(project.project_root) original_snapshot = run_sql_with_adapter( project.adapter,