Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Null-safe equality for unique_key within delete+insert and merge incremental strategies #110

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20240227105926.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: Fix incremental merge to use an `is null` check to allow for handling of nullable
fields in the unique_key
time: 2024-02-27T10:59:59.202000+00:00
custom:
Author: amardatar
Issue: "7597"
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@
select 'NY','New York','Manhattan','2021-04-01'
union all
select 'PA','Philadelphia','Philadelphia','2021-05-21'
union all
select 'CO','Denver',null,'2021-06-18'

"""

Expand All @@ -265,6 +267,8 @@
select 'NY','New York','Manhattan','2021-04-01'
union all
select 'PA','Philadelphia','Philadelphia','2021-05-21'
union all
select 'CO','Denver',null,'2021-06-18'

"""

Expand All @@ -288,6 +292,7 @@
NY,Kings,Brooklyn,2021-04-02
NY,New York,Manhattan,2021-04-01
PA,Philadelphia,Philadelphia,2021-05-21
CO,Denver,,2021-06-18
"""

seeds__add_new_rows_sql = """
Expand Down Expand Up @@ -439,7 +444,7 @@ def fail_to_build_inc_missing_unique_key_column(self, incremental_model_name):
def test__no_unique_keys(self, project):
"""with no unique keys, seed and model should match"""

expected_fields = self.get_expected_fields(relation="seed", seed_rows=8)
expected_fields = self.get_expected_fields(relation="seed", seed_rows=9)
test_case_fields = self.get_test_fields(
project, seed="seed", incremental_model="no_unique_key", update_sql_file="add_new_rows"
)
Expand All @@ -449,7 +454,7 @@ def test__no_unique_keys(self, project):
def test__empty_str_unique_key(self, project):
"""with empty string for unique key, seed and model should match"""

expected_fields = self.get_expected_fields(relation="seed", seed_rows=8)
expected_fields = self.get_expected_fields(relation="seed", seed_rows=9)
test_case_fields = self.get_test_fields(
project,
seed="seed",
Expand All @@ -462,7 +467,7 @@ def test__one_unique_key(self, project):
"""with one unique key, model will overwrite existing row"""

expected_fields = self.get_expected_fields(
relation="one_str__overwrite", seed_rows=7, opt_model_count=1
relation="one_str__overwrite", seed_rows=8, opt_model_count=1
)
test_case_fields = self.get_test_fields(
project,
Expand All @@ -487,7 +492,7 @@ def test__bad_unique_key(self, project):
def test__empty_unique_key_list(self, project):
"""with no unique keys, seed and model should match"""

expected_fields = self.get_expected_fields(relation="seed", seed_rows=8)
expected_fields = self.get_expected_fields(relation="seed", seed_rows=9)
test_case_fields = self.get_test_fields(
project,
seed="seed",
Expand All @@ -500,7 +505,7 @@ def test__unary_unique_key_list(self, project):
"""with one unique key, model will overwrite existing row"""

expected_fields = self.get_expected_fields(
relation="unique_key_list__inplace_overwrite", seed_rows=7, opt_model_count=1
relation="unique_key_list__inplace_overwrite", seed_rows=8, opt_model_count=1
)
test_case_fields = self.get_test_fields(
project,
Expand All @@ -515,7 +520,7 @@ def test__duplicated_unary_unique_key_list(self, project):
"""with two of the same unique key, model will overwrite existing row"""

expected_fields = self.get_expected_fields(
relation="unique_key_list__inplace_overwrite", seed_rows=7, opt_model_count=1
relation="unique_key_list__inplace_overwrite", seed_rows=8, opt_model_count=1
)
test_case_fields = self.get_test_fields(
project,
Expand All @@ -530,7 +535,7 @@ def test__trinary_unique_key_list(self, project):
"""with three unique keys, model will overwrite existing row"""

expected_fields = self.get_expected_fields(
relation="unique_key_list__inplace_overwrite", seed_rows=7, opt_model_count=1
relation="unique_key_list__inplace_overwrite", seed_rows=8, opt_model_count=1
)
test_case_fields = self.get_test_fields(
project,
Expand All @@ -545,7 +550,7 @@ def test__trinary_unique_key_list_no_update(self, project):
"""even with three unique keys, adding distinct rows to seed does not
cause seed and model to diverge"""

expected_fields = self.get_expected_fields(relation="seed", seed_rows=8)
expected_fields = self.get_expected_fields(relation="seed", seed_rows=9)
test_case_fields = self.get_test_fields(
project,
seed="seed",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@
{% if unique_key is sequence and unique_key is not mapping and unique_key is not string %}
{% for key in unique_key %}
{% set this_key_match %}
DBT_INTERNAL_SOURCE.{{ key }} = DBT_INTERNAL_DEST.{{ key }}
DBT_INTERNAL_SOURCE.{{ key }} = DBT_INTERNAL_DEST.{{ key }} or (DBT_INTERNAL_SOURCE.{{ key }} is null and DBT_INTERNAL_DEST.{{ key }} is null)
{% endset %}
{% do predicates.append(this_key_match) %}
{% endfor %}
{% else %}
{% set unique_key_match %}
DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }}
DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }} or (DBT_INTERNAL_SOURCE.{{ unique_key }} is null and DBT_INTERNAL_DEST.{{ unique_key }} is null)
{% endset %}
{% do predicates.append(unique_key_match) %}
{% endif %}
Expand Down Expand Up @@ -62,12 +62,12 @@

{% if unique_key %}
{% if unique_key is sequence and unique_key is not string %}
delete from {{target }}
delete from {{ target }}
using {{ source }}
where (
{% for key in unique_key %}
{{ source }}.{{ key }} = {{ target }}.{{ key }}
{{ "and " if not loop.last}}
({{ source }}.{{ key }} = {{ target }}.{{ key }} or ({{ source }}.{{ key }} is null and {{ target }}.{{ key }} is null))
{{ "and " if not loop.last }}
{% endfor %}
{% if incremental_predicates %}
{% for predicate in incremental_predicates %}
Expand Down