Skip to content

Commit 2dd4994

Browse files
authored
Merge pull request #23953 from benesch/dbt-materialize-unknown-types
dbt-materialize: more gracefully handle contracts on unknown types
2 parents fb26da5 + e837a48 commit 2dd4994

File tree

3 files changed

+127
-4
lines changed

3 files changed

+127
-4
lines changed

misc/dbt-materialize/CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# dbt-materialize Changelog
22

3+
## Unreleased
4+
5+
* Backport [dbt-core #8887](https://github.com/dbt-labs/dbt-core/pull/8887) to
6+
unblock users using any custom type with data contracts.
7+
38
## 1.7.1 - 2023-12-14
49

510
* Remove the dependency of data contracts pre-flight checks on the existence of

misc/dbt-materialize/dbt/adapters/materialize/connections.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from typing import Optional
1919

2020
import psycopg2
21+
from psycopg2.extensions import string_types
2122
from psycopg2.extras import register_uuid
2223

2324
import dbt.adapters.postgres.connections
@@ -31,6 +32,12 @@
3132

3233
logger = AdapterLogger("Materialize")
3334

35+
# NOTE(morsapaes): registering the UUID type produces nicer error messages when
36+
# data contracts fail on a UUID type. See comment in the
37+
# `data_type_code_to_name` method for details. We may be able to remove this
38+
# when dbt-core#8900 lands.
39+
register_uuid()
40+
3441
# Override the psycopg2 connect function in order to inject Materialize-specific
3542
# session parameter defaults.
3643
#
@@ -50,10 +57,6 @@ def connect(**kwargs):
5057
]
5158
kwargs["options"] = " ".join(options)
5259

53-
# NOTE(morsapaes): work around dbt-core #8353 while #8900 doesn't land to
54-
# unblock users using UUID types.
55-
register_uuid()
56-
5760
return _connect(**kwargs)
5861

5962

@@ -135,6 +138,40 @@ def cancel(self, connection):
135138
# probably bad, re-raise it
136139
raise
137140

141+
# NOTE(benesch): this is a backport, with modifications, of dbt-core#8887.
142+
# TODO(benesch): consider removing this when v1.8 ships with this code.
143+
@classmethod
144+
def data_type_code_to_name(cls, type_code: int) -> str:
145+
if type_code in string_types:
146+
return string_types[type_code].name
147+
else:
148+
# The type is unknown to psycopg2, so make up a unique name based on
149+
# the type's OID. Here are the consequences for data contracts that
150+
# reference unknown types:
151+
#
152+
# * Data contracts that are valid work flawlessly. Take the
153+
# `mz_timestamp` type, for example, which is unknown to psycopg2
154+
# because it is a special Materialize type. It has OID 16552. If
155+
# the data contract specifies a column of type `mz_timestamp`
156+
# and the model's column is actually of type `mz_timestamp`, the
157+
# contract will validate successfully and the user will have no
158+
# idea that under the hood dbt validated these two strings
159+
# against one another:
160+
#
161+
# expected: `custom type unknown to dbt (OID 16552)`
162+
# actual: `custom type unknown to dbt (OID 16552)`
163+
#
164+
# * Data contracts that are invalid produce an ugly error message.
165+
# If the contract specifies the `timestamp` type but the model's
166+
# column is actually of type `mz_timestamp`, dbt will complain
167+
# with an error message like "expected type DATETIME, got custom
168+
# type unknown to dbt (OID 16552)".
169+
#
170+
# Still, this is much better than the built-in behavior with dbt
171+
# 1.7, which is to raise "Unhandled error while executing:
172+
# 16552". See dbt-core#8353 for details.
173+
return f"custom type unknown to dbt (OID {type_code})"
174+
138175
# Disable transactions. Materialize transactions do not support arbitrary
139176
# queries in transactions and therefore many of dbt's internal macros
140177
# produce invalid transactions.
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import pytest
2+
from dbt.tests.util import run_dbt, run_dbt_and_capture
3+
4+
# NOTE(benesch): these tests are backported, with modifications, from
5+
# dbt-core#8887.
6+
7+
# TODO(benesch): consider removing these tests when v1.8 ships with these tests
8+
# as part of core.
9+
10+
my_timestamp_model_sql = """
11+
select
12+
'2023-01-01T00:00:00'::timestamp as ts
13+
"""
14+
15+
my_mz_timestamp_model_sql = """
16+
select
17+
'1672531200000'::mz_timestamp as ts
18+
"""
19+
20+
model_schema_timestamp_yml = """
21+
models:
22+
- name: my_model
23+
config:
24+
contract:
25+
enforced: true
26+
columns:
27+
- name: ts
28+
data_type: timestamp
29+
"""
30+
31+
model_schema_mz_timestamp_yml = """
32+
models:
33+
- name: my_model
34+
config:
35+
contract:
36+
enforced: true
37+
columns:
38+
- name: ts
39+
data_type: mz_timestamp
40+
"""
41+
42+
43+
class TestModelContractUnrecognizedTypeCode1:
44+
@pytest.fixture(scope="class")
45+
def models(self):
46+
return {
47+
"my_model.sql": my_mz_timestamp_model_sql,
48+
"schema.yml": model_schema_mz_timestamp_yml,
49+
}
50+
51+
def test_nonstandard_data_type(self, project):
52+
run_dbt(["run"], expect_pass=True)
53+
54+
55+
class TestModelContractUnrecognizedTypeCodeActualMismatch:
56+
@pytest.fixture(scope="class")
57+
def models(self):
58+
return {
59+
"my_model.sql": my_mz_timestamp_model_sql,
60+
"schema.yml": model_schema_timestamp_yml,
61+
}
62+
63+
def test_nonstandard_data_type(self, project):
64+
expected_msg = "custom type unknown to dbt (OID 16552) | DATETIME | data type mismatch"
65+
_, logs = run_dbt_and_capture(["run"], expect_pass=False)
66+
assert expected_msg in logs
67+
68+
69+
class TestModelContractUnrecognizedTypeCodeExpectedMismatch:
70+
@pytest.fixture(scope="class")
71+
def models(self):
72+
return {
73+
"my_model.sql": my_timestamp_model_sql,
74+
"schema.yml": model_schema_mz_timestamp_yml,
75+
}
76+
77+
def test_nonstandard_data_type(self, project):
78+
expected_msg = "DATETIME | custom type unknown to dbt (OID 16552) | data type mismatch"
79+
_, logs = run_dbt_and_capture(["run"], expect_pass=False)
80+
print(logs)
81+
assert expected_msg in logs

0 commit comments

Comments
 (0)