Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 975efa4

Browse files
authored
Merge pull request #589 from dlawin/DX-724
Add custom exceptions for raises in dbt_parser
2 parents c152ae2 + 8d83250 commit 975efa4

File tree

4 files changed

+146
-42
lines changed

4 files changed

+146
-42
lines changed

data_diff/dbt_parser.py

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,21 @@
1010
from dbt_artifacts_parser.parser import parse_run_results, parse_manifest
1111
from dbt.config.renderer import ProfileRenderer
1212

13+
from data_diff.errors import (
14+
DataDiffDbtBigQueryOauthOnlyError,
15+
DataDiffDbtConnectionNotImplementedError,
16+
DataDiffDbtCoreNoRunnerError,
17+
DataDiffDbtNoSuccessfulModelsInRunError,
18+
DataDiffDbtProfileNotFoundError,
19+
DataDiffDbtProjectVarsNotFoundError,
20+
DataDiffDbtRedshiftPasswordOnlyError,
21+
DataDiffDbtRunResultsVersionError,
22+
DataDiffDbtSelectNoMatchingModelsError,
23+
DataDiffDbtSelectUnexpectedError,
24+
DataDiffDbtSelectVersionTooLowError,
25+
DataDiffDbtSnowflakeSetConnectionError,
26+
)
27+
1328
from .utils import getLogger, get_from_dict_with_raise
1429

1530

@@ -91,9 +106,11 @@ def __init__(self, profiles_dir_override: str, project_dir_override: str) -> Non
91106

92107
def get_datadiff_variables(self) -> dict:
93108
doc_url = "https://docs.datafold.com/development_testing/open_source#configure-your-dbt-project"
94-
error_message = f"vars: data_diff: section not found in dbt_project.yml.\n\nTo solve this, please configure your dbt project: \n{doc_url}\n"
95-
vars = get_from_dict_with_raise(self.project_dict, "vars", error_message)
96-
return get_from_dict_with_raise(vars, "data_diff", error_message)
109+
exception = DataDiffDbtProjectVarsNotFoundError(
110+
f"vars: data_diff: section not found in dbt_project.yml.\n\nTo solve this, please configure your dbt project: \n{doc_url}\n"
111+
)
112+
vars_dict = get_from_dict_with_raise(self.project_dict, "vars", exception)
113+
return get_from_dict_with_raise(vars_dict, "data_diff", exception)
97114

98115
def get_datadiff_model_config(self, model_meta: dict) -> TDatadiffModelConfig:
99116
where_filter = None
@@ -118,11 +135,11 @@ def get_models(self, dbt_selection: Optional[str] = None):
118135
return self.get_dbt_selection_models(dbt_selection)
119136
# edge case if running data-diff from a separate env than dbt (likely local development)
120137
else:
121-
raise Exception(
138+
raise DataDiffDbtCoreNoRunnerError(
122139
"data-diff is using a dbt-core version < 1.5, update the environment's dbt-core version via pip install 'dbt-core>=1.5' in order to use `--select`"
123140
)
124141
else:
125-
raise Exception(
142+
raise DataDiffDbtSelectVersionTooLowError(
126143
f"The `--select` feature requires dbt >= 1.5, but your project's manifest.json is from dbt v{dbt_version}. Please follow these steps to use the `--select` feature: \n 1. Update your dbt-core version via pip install 'dbt-core>=1.5'. Details: https://docs.getdbt.com/docs/core/pip-install#change-dbt-core-versions \n 2. Execute any `dbt` command (`run`, `compile`, `build`) to create a new manifest.json."
127144
)
128145
else:
@@ -152,15 +169,17 @@ def get_dbt_selection_models(self, dbt_selection: str) -> List[str]:
152169
)
153170
if results.exception:
154171
raise results.exception
155-
elif results.success and results.result:
172+
173+
if results.success and results.result:
156174
model_list = [json.loads(model)["unique_id"] for model in results.result]
157175
models = [self.manifest_obj.nodes.get(x) for x in model_list]
158176
return models
159-
elif not results.result:
160-
raise Exception(f"No dbt models found for `--select {dbt_selection}`")
161-
else:
162-
logger.debug(str(results))
163-
raise Exception("Encountered an unexpected error while finding `--select` models")
177+
178+
if not results.result:
179+
raise DataDiffDbtSelectNoMatchingModelsError(f"No dbt models found for `--select {dbt_selection}`")
180+
181+
logger.debug(str(results))
182+
raise DataDiffDbtSelectUnexpectedError("Encountered an unexpected error while finding `--select` models")
164183

165184
def get_run_results_models(self):
166185
with open(self.project_dir / RUN_RESULTS_PATH) as run_results:
@@ -174,16 +193,18 @@ def get_run_results_models(self):
174193
self.profiles_dir = legacy_profiles_dir()
175194

176195
if dbt_version < parse_version(LOWER_DBT_V):
177-
raise Exception(f"Found dbt: v{dbt_version} Expected the dbt project's version to be >= {LOWER_DBT_V}")
178-
elif dbt_version >= parse_version(UPPER_DBT_V):
196+
raise DataDiffDbtRunResultsVersionError(
197+
f"Found dbt: v{dbt_version} Expected the dbt project's version to be >= {LOWER_DBT_V}"
198+
)
199+
if dbt_version >= parse_version(UPPER_DBT_V):
179200
logger.warning(
180201
f"{dbt_version} is a recent version of dbt and may not be fully tested with data-diff! \nPlease report any issues to https://github.com/datafold/data-diff/issues"
181202
)
182203

183204
success_models = [x.unique_id for x in run_results_obj.results if x.status.name == "success"]
184205
models = [self.manifest_obj.nodes.get(x) for x in success_models]
185206
if not models:
186-
raise ValueError("Expected > 0 successful models runs from the last dbt command.")
207+
raise DataDiffDbtNoSuccessfulModelsInRunError("Expected > 0 successful models runs from the last dbt command.")
187208

188209
return models
189210

@@ -209,25 +230,35 @@ def get_connection_creds(self) -> Tuple[Dict[str, str], str]:
209230
dbt_profile_var = self.project_dict.get("profile")
210231

211232
profile = get_from_dict_with_raise(
212-
profiles, dbt_profile_var, f"No profile '{dbt_profile_var}' found in '{profiles_path}'."
233+
profiles,
234+
dbt_profile_var,
235+
DataDiffDbtProfileNotFoundError(f"No profile '{dbt_profile_var}' found in '{profiles_path}'."),
213236
)
214237
# values can contain env_vars
215238
rendered_profile = ProfileRenderer().render_data(profile)
216239
profile_target = get_from_dict_with_raise(
217-
rendered_profile, "target", f"No target found in profile '{dbt_profile_var}' in '{profiles_path}'."
240+
rendered_profile,
241+
"target",
242+
DataDiffDbtProfileNotFoundError(f"No target found in profile '{dbt_profile_var}' in '{profiles_path}'."),
218243
)
219244
outputs = get_from_dict_with_raise(
220-
rendered_profile, "outputs", f"No outputs found in profile '{dbt_profile_var}' in '{profiles_path}'."
245+
rendered_profile,
246+
"outputs",
247+
DataDiffDbtProfileNotFoundError(f"No outputs found in profile '{dbt_profile_var}' in '{profiles_path}'."),
221248
)
222249
credentials = get_from_dict_with_raise(
223250
outputs,
224251
profile_target,
225-
f"No credentials found for target '{profile_target}' in profile '{dbt_profile_var}' in '{profiles_path}'.",
252+
DataDiffDbtProfileNotFoundError(
253+
f"No credentials found for target '{profile_target}' in profile '{dbt_profile_var}' in '{profiles_path}'."
254+
),
226255
)
227256
conn_type = get_from_dict_with_raise(
228257
credentials,
229258
"type",
230-
f"No type found for target '{profile_target}' in profile '{dbt_profile_var}' in '{profiles_path}'.",
259+
DataDiffDbtProfileNotFoundError(
260+
f"No type found for target '{profile_target}' in profile '{dbt_profile_var}' in '{profiles_path}'."
261+
),
231262
)
232263
conn_type = conn_type.lower()
233264

@@ -253,7 +284,7 @@ def set_connection(self):
253284

254285
if credentials.get("private_key_path") is not None:
255286
if credentials.get("password") is not None:
256-
raise Exception("Cannot use password and key at the same time")
287+
raise DataDiffDbtSnowflakeSetConnectionError("Cannot use password and key at the same time")
257288
conn_info["key"] = credentials.get("private_key_path")
258289
conn_info["private_key_passphrase"] = credentials.get("private_key_passphrase")
259290
elif credentials.get("authenticator") is not None:
@@ -262,13 +293,13 @@ def set_connection(self):
262293
elif credentials.get("password") is not None:
263294
conn_info["password"] = credentials.get("password")
264295
else:
265-
raise Exception("Snowflake: unsupported auth method")
296+
raise DataDiffDbtSnowflakeSetConnectionError("Snowflake: unsupported auth method")
266297
elif conn_type == "bigquery":
267298
method = credentials.get("method")
268299
# there are many connection types https://docs.getdbt.com/reference/warehouse-setups/bigquery-setup#oauth-via-gcloud
269300
# this assumes that the user is auth'd via `gcloud auth application-default login`
270301
if method is None or method != "oauth":
271-
raise Exception("Oauth is the current method supported for Big Query.")
302+
raise DataDiffDbtBigQueryOauthOnlyError("Oauth is the current method supported for Big Query.")
272303
conn_info = {
273304
"driver": conn_type,
274305
"project": credentials.get("project"),
@@ -284,7 +315,7 @@ def set_connection(self):
284315
if (credentials.get("pass") is None and credentials.get("password") is None) or credentials.get(
285316
"method"
286317
) == "iam":
287-
raise Exception("Only password authentication is currently supported for Redshift.")
318+
raise DataDiffDbtRedshiftPasswordOnlyError("Only password authentication is currently supported for Redshift.")
288319
conn_info = {
289320
"driver": conn_type,
290321
"host": credentials.get("host"),
@@ -315,7 +346,7 @@ def set_connection(self):
315346
}
316347
self.threads = credentials.get("threads")
317348
else:
318-
raise NotImplementedError(f"Provider {conn_type} is not yet supported for dbt diffs")
349+
raise DataDiffDbtConnectionNotImplementedError(f"Provider {conn_type} is not yet supported for dbt diffs")
319350

320351
self.connection = conn_info
321352

data_diff/errors.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
class DataDiffDbtProjectVarsNotFoundError(Exception):
2+
"Raised when an expected dbt_project.yml section is missing."
3+
4+
5+
class DataDiffDbtProfileNotFoundError(Exception):
6+
"Raised when an expected profiles.yml section is missing."
7+
8+
9+
class DataDiffDbtNoSuccessfulModelsInRunError(Exception):
10+
"Raised when there are no successful model runs in the run_results.json"
11+
12+
13+
class DataDiffDbtRunResultsVersionError(Exception):
14+
"Raised when the dbt version in run_results.json is lower than the minimum version."
15+
16+
17+
class DataDiffDbtSelectNoMatchingModelsError(Exception):
18+
"Raised when the `--select` flag returns no models."
19+
20+
21+
class DataDiffDbtSelectUnexpectedError(Exception):
22+
"Catch all for unexpected dbt list --select results."
23+
24+
25+
class DataDiffDbtSnowflakeSetConnectionError(Exception):
26+
"Raised when a dbt snowflake profile has unexpected values."
27+
28+
29+
class DataDiffDbtBigQueryOauthOnlyError(Exception):
30+
"Raised when trying to use a method other than oauth with BigQuery."
31+
32+
33+
class DataDiffDbtRedshiftPasswordOnlyError(Exception):
34+
"Raised when using a non-password connection method with Redshift."
35+
36+
37+
class DataDiffDbtConnectionNotImplementedError(Exception):
38+
"Raised when trying to use an unsupported dbt connection method."
39+
40+
41+
class DataDiffDbtCoreNoRunnerError(Exception):
42+
"Raised when the manifest version >= 1.5, but the dbt-core package is < 1.5. This is an edge case most likely to occur in development."
43+
44+
45+
class DataDiffDbtSelectVersionTooLowError(Exception):
46+
"Raised when attempting to use `--select` with a dbt-core version < 1.5."

data_diff/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,12 @@ def truncate_error(error: str):
8484
return re.sub("'(.*?)'", "'***'", first_line)
8585

8686

87-
def get_from_dict_with_raise(dictionary: Dict, key: str, error_message: str):
87+
def get_from_dict_with_raise(dictionary: Dict, key: str, exception: Exception):
88+
if dictionary is None:
89+
raise exception
8890
result = dictionary.get(key)
8991
if result is None:
90-
raise ValueError(error_message)
92+
raise exception
9193
return result
9294

9395

0 commit comments

Comments
 (0)