Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 0b74046

Browse files
sungchun12Sung Won Chung
and
Sung Won Chung
authored
Track data-diff usage in MotherDuck (#800)
* Update DuckDB connection parameters * remove submods * tracking logic * conditional connection * semver parsing * motherduck test configs * remove submods * add motherduck dbt test * passing motherduck tests * more readable config * remove submods * user agent spec * previous presto version --------- Co-authored-by: Sung Won Chung <[email protected]>
1 parent 71a1b3d commit 0b74046

File tree

9 files changed

+1112
-959
lines changed

9 files changed

+1112
-959
lines changed

.github/workflows/ci.yml

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ jobs:
6868
DATADIFF_CLICKHOUSE_URI: 'clickhouse://clickhouse:Password1@localhost:9000/clickhouse'
6969
DATADIFF_VERTICA_URI: 'vertica://vertica:Password1@localhost:5433/vertica'
7070
DATADIFF_REDSHIFT_URI: '${{ secrets.DATADIFF_REDSHIFT_URI }}'
71+
MOTHERDUCK_TOKEN: '${{ secrets.MOTHERDUCK_TOKEN }}'
7172
run: |
7273
chmod +x tests/waiting_for_stack_up.sh
7374
./tests/waiting_for_stack_up.sh && TEST_ACROSS_ALL_DBS=0 poetry run unittest-parallel -j 16

.github/workflows/ci_full.yml

+1
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ jobs:
6464
DATADIFF_VERTICA_URI: 'vertica://vertica:Password1@localhost:5433/vertica'
6565
# DATADIFF_BIGQUERY_URI: '${{ secrets.DATADIFF_BIGQUERY_URI }}'
6666
DATADIFF_REDSHIFT_URI: '${{ secrets.DATADIFF_REDSHIFT_URI }}'
67+
MOTHERDUCK_TOKEN: '${{ secrets.MOTHERDUCK_TOKEN }}'
6768
run: |
6869
chmod +x tests/waiting_for_stack_up.sh
6970
./tests/waiting_for_stack_up.sh && poetry run unittest-parallel -j 16

data_diff/databases/duckdb.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from typing import Any, ClassVar, Dict, Union, Type
22

33
import attrs
4+
from packaging.version import parse as parse_version
45

56
from data_diff.utils import match_regexps
67
from data_diff.abcs.database_types import (
@@ -27,6 +28,7 @@
2728
CHECKSUM_OFFSET,
2829
)
2930
from data_diff.databases.base import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS
31+
from data_diff.version import __version__
3032

3133

3234
@import_helper("duckdb")
@@ -148,9 +150,21 @@ def close(self):
148150
def create_connection(self):
149151
ddb = import_duckdb()
150152
try:
151-
return ddb.connect(self._args["filepath"])
153+
# custom_user_agent is only available in duckdb >= 0.9.2
154+
if parse_version(ddb.__version__) >= parse_version("0.9.2"):
155+
custom_user_agent = f"data-diff/v{__version__}"
156+
config = {"custom_user_agent": custom_user_agent}
157+
connection = ddb.connect(database=self._args["filepath"], config=config)
158+
custom_user_agent_results = connection.sql("PRAGMA USER_AGENT;").fetchall()
159+
custom_user_agent_filtered = custom_user_agent_results[0][0]
160+
assert custom_user_agent in custom_user_agent_filtered
161+
else:
162+
connection = ddb.connect(database=self._args["filepath"])
163+
return connection
152164
except ddb.OperationalError as e:
153165
raise ConnectError(*e.args) from e
166+
except AssertionError:
167+
raise ConnectError("Assertion failed: Custom user agent is invalid.") from None
154168

155169
def select_table_schema(self, path: DbPath) -> str:
156170
database, schema, table = self._normalize_table_path(path)

poetry.lock

+1,068-952
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,10 @@ psycopg2 = "*"
5656
snowflake-connector-python = ">=3.0.2,<4.0.0"
5757
cryptography = "*"
5858
trino = "^0.314.0"
59-
presto-python-client = "*"
59+
presto-python-client = "0.8.3"
6060
clickhouse-driver = "*"
6161
vertica-python = "*"
62-
duckdb = "^0.7.0"
62+
duckdb = "^0.9.0"
6363
dbt-core = "^1.0.0"
6464
ruff = "^0.1.4"
6565
# google-cloud-bigquery = "*"
-1.5 MB
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
jaffle_shop:
2+
target: dev_motherduck
3+
outputs:
4+
dev_motherduck:
5+
type: duckdb
6+
path: 'md:jaffle_shop?motherduck_token={{ env_var("MOTHERDUCK_TOKEN") }}'
7+
schema: dev

tests/dbt_artifacts/profiles.yml

-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,3 @@ jaffle_shop:
55
type: duckdb
66
path: "./tests/dbt_artifacts/jaffle_shop.duckdb"
77
schema: dev
8-
different_dev:
9-
type: duckdb
10-
path: "./tests/dbt_artifacts/jaffle_shop.duckdb"
11-
schema: "{{ env_var('some_env_var') }}"

tests/test_dbt.py

+18
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,24 @@ def test_integration_basic_dbt(self):
4545
# 1 with a diff
4646
assert diff_string.count(" Rows Added Rows Removed") == 1
4747

48+
def test_integration_motherduck_dbt(self):
49+
artifacts_path = os.getcwd() + "/tests/dbt_artifacts"
50+
test_project_path = os.environ.get("DATA_DIFF_DBT_PROJ") or artifacts_path
51+
test_profiles_path = os.environ.get("DATA_DIFF_DBT_PROJ") or artifacts_path + "/motherduck"
52+
diff = run_datadiff_cli(
53+
"--dbt", "--dbt-project-dir", test_project_path, "--dbt-profiles-dir", test_profiles_path
54+
)
55+
56+
# assertions for the diff that exists in tests/dbt_artifacts/jaffle_shop.duckdb
57+
if test_project_path == artifacts_path:
58+
diff_string = b"".join(diff).decode("utf-8")
59+
# 5 diffs were ran
60+
assert diff_string.count("<>") == 5
61+
# 4 with no diffs
62+
assert diff_string.count("No row differences") == 4
63+
# 1 with a diff
64+
assert diff_string.count(" Rows Added Rows Removed") == 1
65+
4866
def test_integration_cloud_dbt(self):
4967
project_dir = os.environ.get("DATA_DIFF_DBT_PROJ")
5068
if project_dir is not None:

0 commit comments

Comments
 (0)