Skip to content

Commit 12bf286

Browse files
authored
[Issue #2405] Account for updates when last_upd_date is null (#2406)
## Summary Fixes #2405 ### Time to review: __5 mins__ ## Changes proposed Fallback to using `created_date` when `last_upd_date` is null ## Context for reviewers The legacy Oracle database has a last_upd_date timestamp that we use for finding updates to our system. This field is always null when a record is first inserted, and only on receiving at least one update will it be populated. This means that if a record is inserted, we copy over last_upd_date=null and then an update happens, we try to do effectively: where null < {some timestamp} which means we find nothing when trying to update. What we instead should do is fallback to using the created_date timestamp if the update date is null. ## Additional information Found this when I realized there were cases where we just weren't processing updates. Turns out, the ELT process doesn't handle updates pretty much at all right now (this is the common case)
1 parent 2a7f4c1 commit 12bf286

File tree

3 files changed

+27
-10
lines changed

3 files changed

+27
-10
lines changed

api/src/data_migration/load/sql.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,15 @@ def build_select_updated_rows_sql(
6767
== sqlalchemy.tuple_(*source_table.primary_key.columns),
6868
)
6969
# `WHERE ...`
70-
.where(destination_table.c.last_upd_date < source_table.c.last_upd_date)
70+
# NOTE: The legacy system doesn't populate the last_upd_date unless at least one update
71+
# has occurred, so we need to fallback to the created_date otherwise it will always be
72+
# null on the destination table side
73+
.where(
74+
sqlalchemy.func.coalesce(
75+
destination_table.c.last_upd_date, destination_table.c.created_date
76+
)
77+
< source_table.c.last_upd_date
78+
)
7179
.order_by(*source_table.primary_key.columns)
7280
)
7381

api/tests/src/data_migration/load/test_load_oracle_data_task.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ def test_load_data(self, db_session, foreign_tables, staging_tables, enable_fact
8585
source_record4 = ForeignTopportunityFactory.create(
8686
opportunity_id=4, oppnumber="A-4-update", cfdas=[], last_upd_date=time3
8787
)
88+
source_record5 = ForeignTopportunityFactory.create(
89+
opportunity_id=6, oppnumber="A-6-update", cfdas=[], last_upd_date=time3
90+
)
8891

8992
## Destination records
9093
# unchanged:
@@ -95,6 +98,9 @@ def test_load_data(self, db_session, foreign_tables, staging_tables, enable_fact
9598
StagingTopportunityFactory.create(
9699
opportunity_id=4, oppnumber="A-4", cfdas=[], last_upd_date=time1
97100
)
101+
StagingTopportunityFactory.create(
102+
opportunity_id=6, oppnumber="A-6", cfdas=[], last_upd_date=None
103+
)
98104
# delete:
99105
StagingTopportunityFactory.create(
100106
opportunity_id=5, oppnumber="A-5", cfdas=[], last_upd_date=time2
@@ -109,8 +115,8 @@ def test_load_data(self, db_session, foreign_tables, staging_tables, enable_fact
109115
# this prevents some weirdness with the value comparison we'll do
110116
db_session.expire_all()
111117

112-
assert db_session.query(source_table).count() == 4
113-
assert db_session.query(destination_table).count() == 5
118+
assert db_session.query(source_table).count() == 5
119+
assert db_session.query(destination_table).count() == 6
114120

115121
destination_records = (
116122
db_session.query(destination_table).order_by(destination_table.c.opportunity_id).all()
@@ -123,13 +129,14 @@ def test_load_data(self, db_session, foreign_tables, staging_tables, enable_fact
123129
)
124130
validate_copied_value(source_table, source_record4, destination_records[3])
125131
validate_copied_value(source_table, None, destination_records[4], is_delete=True)
132+
validate_copied_value(source_table, source_record5, destination_records[5])
126133

127134
assert task.metrics["count.delete.topportunity"] == 1
128135
assert task.metrics["count.insert.topportunity"] == 2
129-
assert task.metrics["count.update.topportunity"] == 1
136+
assert task.metrics["count.update.topportunity"] == 2
130137
assert task.metrics["count.delete.total"] == 1
131138
assert task.metrics["count.insert.total"] == 2
132-
assert task.metrics["count.update.total"] == 1
139+
assert task.metrics["count.update.total"] == 2
133140

134141
def test_raises_if_table_dicts_different(self, db_session, foreign_tables, staging_tables):
135142
with pytest.raises(

api/tests/src/data_migration/load/test_sql.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def source_table(sqlalchemy_metadata):
2222
sqlalchemy.Column("id2", sqlalchemy.Integer, primary_key=True),
2323
sqlalchemy.Column("x", sqlalchemy.Text),
2424
sqlalchemy.Column("last_upd_date", sqlalchemy.TIMESTAMP),
25+
sqlalchemy.Column("created_date", sqlalchemy.TIMESTAMP),
2526
)
2627

2728

@@ -35,6 +36,7 @@ def destination_table(sqlalchemy_metadata):
3536
sqlalchemy.Column("x", sqlalchemy.Text),
3637
sqlalchemy.Column("is_deleted", sqlalchemy.Boolean),
3738
sqlalchemy.Column("last_upd_date", sqlalchemy.TIMESTAMP),
39+
sqlalchemy.Column("created_date", sqlalchemy.TIMESTAMP),
3840
)
3941

4042

@@ -59,17 +61,17 @@ def test_build_select_updated_rows_sql(source_table, destination_table):
5961
"JOIN test_source_table ON "
6062
"(test_destination_table.id1, test_destination_table.id2) = "
6163
"(test_source_table.id1, test_source_table.id2) \n"
62-
"WHERE test_destination_table.last_upd_date < test_source_table.last_upd_date "
64+
"WHERE coalesce(test_destination_table.last_upd_date, test_destination_table.created_date) < test_source_table.last_upd_date "
6365
"ORDER BY test_source_table.id1, test_source_table.id2"
6466
)
6567

6668

6769
def test_build_insert_select_sql(source_table, destination_table):
6870
insert = sql.build_insert_select_sql(source_table, destination_table, [(1, 2), (3, 4), (5, 6)])
6971
assert str(insert) == (
70-
"INSERT INTO test_destination_table (id1, id2, x, last_upd_date, is_deleted) "
72+
"INSERT INTO test_destination_table (id1, id2, x, last_upd_date, created_date, is_deleted) "
7173
"SELECT test_source_table.id1, test_source_table.id2, test_source_table.x, "
72-
"test_source_table.last_upd_date, FALSE AS is_deleted \n"
74+
"test_source_table.last_upd_date, test_source_table.created_date, FALSE AS is_deleted \n"
7375
"FROM test_source_table \n"
7476
"WHERE (test_source_table.id1, test_source_table.id2) IN (__[POSTCOMPILE_param_1])"
7577
)
@@ -80,8 +82,8 @@ def test_build_update_sql(source_table, destination_table):
8082
assert str(update) == (
8183
"UPDATE test_destination_table "
8284
"SET id1=test_source_table.id1, id2=test_source_table.id2, x=test_source_table.x, "
83-
"last_upd_date=test_source_table.last_upd_date FROM test_source_table "
84-
"WHERE (test_destination_table.id1, test_destination_table.id2) = "
85+
"last_upd_date=test_source_table.last_upd_date, created_date=test_source_table.created_date "
86+
"FROM test_source_table WHERE (test_destination_table.id1, test_destination_table.id2) = "
8587
"(test_source_table.id1, test_source_table.id2) AND "
8688
"(test_source_table.id1, test_source_table.id2) "
8789
"IN (__[POSTCOMPILE_param_1])"

0 commit comments

Comments
 (0)