Skip to content

Commit

Permalink
feat: add clickhouse and greenplum support (#460)
Browse files Browse the repository at this point in the history
Co-authored-by: Grace Goheen <[email protected]>
Co-authored-by: Benoit Perigaud <[email protected]>
  • Loading branch information
3 people authored Sep 3, 2024
1 parent eaf8386 commit 34209f6
Show file tree
Hide file tree
Showing 35 changed files with 169 additions and 76 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,16 @@ Currently, the following adapters are supported:
- DuckDB
- Trino (tested with Iceberg connector)
- AWS Athena (tested manually)
- Greenplum (tested manually)
- ClickHouse (tested manually)

## Using This Package

### Cloning via dbt Package Hub

Check [dbt Hub](https://hub.getdbt.com/dbt-labs/dbt_project_evaluator/latest/) for the latest installation instructions, or [read the docs](https://docs.getdbt.com/docs/package-management) for more information on installing packages.

### Additional setup for Databricks/Spark/DuckDB
### Additional setup for Databricks/Spark/DuckDB/Redshift/ClickHouse

In your `dbt_project.yml`, add the following config:

Expand Down
4 changes: 2 additions & 2 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ models:
+materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks'] else 'view' }}"
int_all_dag_relationships:
# required for BigQuery, Redshift, and Databricks for performance/memory reasons
+materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks'] else 'view' }}"
+materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks', 'clickhouse'] else 'view' }}"
dag:
+materialized: table
staging:
Expand Down Expand Up @@ -86,7 +86,7 @@ vars:

# -- Execution variables --
insert_batch_size: "{{ 500 if target.type in ['athena', 'bigquery'] else 10000 }}"
max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else 4 if target.type in ['athena', 'trino'] else -1 }}"
max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else 4 if target.type in ['athena', 'trino', 'clickhouse'] else -1 }}"

# -- Code complexity variables --
comment_chars: ["--"]
Expand Down
2 changes: 2 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ Currently, the following adapters are supported:
- DuckDB
- Trino (tested with Iceberg connector)
- AWS Athena (tested manually)
- Greenplum (tested manually)
- ClickHouse (tested manually)

## Using This Package

Expand Down
10 changes: 10 additions & 0 deletions integration_tests/ci/sample.profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,13 @@ integration_tests:
threads: 5
session_properties:
query_max_stage_count: 275

clickhouse:
type: clickhouse
host: "{{ env_var('CLICKHOUSE_TEST_HOST') }}"
port: "{{ env_var('CLICKHOUSE_TEST_PORT') | as_number }}"
user: "{{ env_var('CLICKHOUSE_TEST_USER') }}"
password: "{{ env_var('CLICKHOUSE_TEST_PASS') }}"
dbname: "{{ env_var('CLICKHOUSE_TEST_DBNAME') }}"
schema: dbt_project_evaluator_integration_tests_clickhouse
threads: 5
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ models:
enforced: true
columns:
- name: id
data_type: integer
data_type: "{{ 'UInt8' if target.type in ['clickhouse'] else 'integer' }}"
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
-- {{ source('fake_source', 'fake_source') }}
select 1 as id
select 1 as id
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ models:
enforced: true
columns:
- name: id
data_type: int
data_type: "{{ 'UInt8' if target.type in ['clickhouse'] else 'int' }}"
constraints:
- type: not_null
tests:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
)
}}

-- {{ ref('int_model_5') }}
select * from {{ ref('stg_model_4') }}
-- {{ ref('int_model_5') }}
2 changes: 1 addition & 1 deletion integration_tests/models/reports/reports.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ models:
enforced: true
columns:
- name: id
data_type: integer
data_type: "{{ 'UInt8' if target.type in ['clickhouse'] else 'integer' }}"
12 changes: 11 additions & 1 deletion integration_tests_2/ci/sample.profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,14 @@ integration_tests:
host: "{{ env_var('DATABRICKS_TEST_HOST') }}"
http_path: "{{ env_var('DATABRICKS_TEST_HTTP_PATH') }}"
token: "{{ env_var('DATABRICKS_TEST_ACCESS_TOKEN') }}"
threads: 10
threads: 10

clickhouse:
type: clickhouse
host: "{{ env_var('CLICKHOUSE_TEST_HOST') }}"
port: "{{ env_var('CLICKHOUSE_TEST_PORT') | as_number }}"
user: "{{ env_var('CLICKHOUSE_TEST_USER') }}"
password: "{{ env_var('CLICKHOUSE_TEST_PASS') }}"
dbname: "{{ env_var('CLICKHOUSE_TEST_DBNAME') }}"
schema: dbt_project_evaluator_integration_tests_clickhouse
threads: 5
2 changes: 1 addition & 1 deletion integration_tests_2/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ models:
vars:
max_depth_dag: 2
chained_views_threshold: 2
primary_key_test_macros: [["my_package.test_my_test", "dbt.test_not_null"]]
primary_key_test_macros: [["my_package.test_my_test", "dbt.test_not_null"]]
54 changes: 54 additions & 0 deletions macros/cross_db_shim/clickhouse_shims.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{%- macro clickhouse__type_string() -%}
{{ 'Nullable(String)' }}
{%- endmacro %}

{%- macro clickhouse__type_int() -%}
{{ 'Nullable(Int32)' }}
{%- endmacro %}

{%- macro clickhouse__type_float() -%}
{{ 'Nullable(Float32)' }}
{%- endmacro %}

{%- macro clickhouse__type_boolean() -%}
{{ 'Nullable(Bool)' }}
{%- endmacro %}

{% macro clickhouse__replace(string_text, pattern, replacement) -%}
replaceAll(assumeNotNull({{string_text}}), {{pattern}}, {{replacement}})
{%- endmacro %}

{% macro clickhouse__split_part(string_text, delimiter_text, part_number) -%}
splitByChar({{delimiter_text}}, assumeNotNull({{string_text}}))[{{part_number}}]
{%- endmacro %}

{% macro clickhouse__listagg(measure, delimiter_text, order_by_clause, limit_num) -%}
{% if order_by_clause and ' by ' in order_by_clause -%}
{% set order_by_field = order_by_clause.split(' by ')[1] %}
{% set arr = "arrayMap(x -> x.1, arrayReverseSort(x -> x.2, arrayZip(array_agg({}), array_agg({}))))".format(arr, order_by_field) %}
{% else -%}
{% set arr = "array_agg({})".format(measure) %}
{%- endif %}

{% if limit_num -%}
arrayStringConcat(arraySlice({{ arr }}, 1, {{ limit_num }}), {{delimiter_text}})
{% else -%}
arrayStringConcat({{ arr }}, {{delimiter_text}})
{%- endif %}
{%- endmacro %}

{% macro clickhouse__load_csv_rows(model, agate_table) %}
{% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %}
{% set data_sql = adapter.get_csv_data(agate_table) %}

{% if data_sql %}
{% set sql -%}
insert into {{ this.render() }} ({{ cols_sql }})
{{ adapter.get_model_query_settings(model) }}
format CSV
{{ data_sql }}
{%- endset %}

{% do adapter.add_query(sql, bindings=agate_table, abridge_sql_log=True) %}
{% endif %}
{% endmacro %}
10 changes: 7 additions & 3 deletions macros/recursive_dag.sql
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ all_relationships (
is_excluded as child_is_excluded,
0 as distance,
{{ dbt.array_construct(['resource_name']) }} as path,
cast(null as boolean) as is_dependent_on_chain_of_views
cast(null as {{ dbt.type_boolean() }}) as is_dependent_on_chain_of_views

from direct_relationships
-- where direct_parent_id is null {# optional lever to change filtering of anchor clause to only include root resources #}
Expand Down Expand Up @@ -175,7 +175,7 @@ with direct_relationships as (
child_is_excluded,
0 as distance,
{{ dbt.array_construct(['resource_name']) }} as path,
cast(null as boolean) as is_dependent_on_chain_of_views
cast(null as {{ dbt.type_boolean() }}) as is_dependent_on_chain_of_views
from get_distinct
)

Expand Down Expand Up @@ -243,7 +243,7 @@ with direct_relationships as (
child.directory_path as child_directory_path,
child.file_name as child_file_name,
child.is_excluded as child_is_excluded,
all_relationships_unioned.distance,
cast(all_relationships_unioned.distance as {{ dbt.type_int() }}) as distance,
all_relationships_unioned.path,
all_relationships_unioned.is_dependent_on_chain_of_views

Expand All @@ -257,6 +257,10 @@ with direct_relationships as (
{% endmacro %}


{% macro clickhouse__recursive_dag() %}
{{ return(bigquery__recursive_dag()) }}
{% endmacro %}

{% macro spark__recursive_dag() %}
-- as of June 2022 databricks SQL doesn't support "with recursive" in the same way as other DWs
{{ return(bigquery__recursive_dag()) }}
Expand Down
4 changes: 2 additions & 2 deletions macros/unpack/get_exposure_values.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
wrap_string_with_quotes(node.name),
wrap_string_with_quotes(node.resource_type),
wrap_string_with_quotes(node.original_file_path | replace("\\","\\\\")),
"cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as boolean)",
"cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as " ~ dbt.type_boolean() ~ ")",
wrap_string_with_quotes(node.type),
wrap_string_with_quotes(node.maturity),
wrap_string_with_quotes(node.package_name),
Expand All @@ -35,4 +35,4 @@

{{ return(values) }}

{%- endmacro -%}
{%- endmacro -%}
2 changes: 1 addition & 1 deletion macros/unpack/get_metric_values.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
wrap_string_with_quotes(node.name),
wrap_string_with_quotes(node.resource_type),
wrap_string_with_quotes(node.original_file_path | replace("\\","\\\\")),
"cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as boolean)",
"cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as " ~ dbt.type_boolean() ~ ")",
wrap_string_with_quotes(node.type),
wrap_string_with_quotes(dbt.escape_single_quotes(node.label)),
wrap_string_with_quotes(node.package_name),
Expand Down
10 changes: 5 additions & 5 deletions macros/unpack/get_node_values.sql
Original file line number Diff line number Diff line change
Expand Up @@ -23,30 +23,30 @@
wrap_string_with_quotes(node.name),
wrap_string_with_quotes(node.resource_type),
wrap_string_with_quotes(node.original_file_path | replace("\\","\\\\")),
"cast(" ~ node.config.enabled | trim ~ " as boolean)",
"cast(" ~ node.config.enabled | trim ~ " as " ~ dbt.type_boolean() ~ ")",
wrap_string_with_quotes(node.config.materialized),
wrap_string_with_quotes(node.config.on_schema_change),
wrap_string_with_quotes(node.group),
wrap_string_with_quotes(node.access),
wrap_string_with_quotes(node.latest_version),
wrap_string_with_quotes(node.version),
wrap_string_with_quotes(node.deprecation_date),
"cast(" ~ contract | trim ~ " as boolean)",
"cast(" ~ contract | trim ~ " as " ~ dbt.type_boolean() ~ ")",
node.columns.values() | list | length,
node.columns.values() | list | selectattr('description') | list | length,
wrap_string_with_quotes(node.database),
wrap_string_with_quotes(node.schema),
wrap_string_with_quotes(node.package_name),
wrap_string_with_quotes(node.alias),
"cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as boolean)",
"cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as " ~ dbt.type_boolean() ~ ")",
"''" if not node.column_name else wrap_string_with_quotes(dbt.escape_single_quotes(node.column_name)),
wrap_string_with_quotes(node.meta | tojson),
wrap_string_with_quotes(dbt.escape_single_quotes(hard_coded_references)),
number_lines,
sql_complexity,
wrap_string_with_quotes(node.get('depends_on',{}).get('macros',[]) | tojson),
"cast(" ~ dbt_project_evaluator.is_not_empty_string(node.test_metadata) | trim ~ " as boolean)",
"cast(" ~ exclude_node ~ " as boolean)",
"cast(" ~ dbt_project_evaluator.is_not_empty_string(node.test_metadata) | trim ~ " as " ~ dbt.type_boolean() ~ ")",
"cast(" ~ exclude_node ~ " as " ~ dbt.type_boolean() ~ ")",
]
%}

Expand Down
10 changes: 5 additions & 5 deletions macros/unpack/get_source_values.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
wrap_string_with_quotes(node.alias),
wrap_string_with_quotes(node.resource_type),
wrap_string_with_quotes(node.source_name),
"cast(" ~ dbt_project_evaluator.is_not_empty_string(node.source_description) | trim ~ " as boolean)",
"cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as boolean)",
"cast(" ~ node.config.enabled ~ " as boolean)",
"cast(" ~ dbt_project_evaluator.is_not_empty_string(node.source_description) | trim ~ " as " ~ dbt.type_boolean() ~ ")",
"cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as " ~ dbt.type_boolean() ~ ")",
"cast(" ~ node.config.enabled ~ " as " ~ dbt.type_boolean() ~ ")",
wrap_string_with_quotes(node.loaded_at_field | replace("'", "_")),
"cast(" ~ ((node.freshness != None) and (dbt_project_evaluator.is_not_empty_string(node.freshness.warn_after.count)
or dbt_project_evaluator.is_not_empty_string(node.freshness.error_after.count))) | trim ~ " as boolean)",
Expand All @@ -32,7 +32,7 @@
wrap_string_with_quotes(node.loader),
wrap_string_with_quotes(node.identifier),
wrap_string_with_quotes(node.meta | tojson),
"cast(" ~ exclude_source ~ " as boolean)",
"cast(" ~ exclude_source ~ " as " ~ dbt.type_boolean() ~ ")",
]
%}

Expand All @@ -44,4 +44,4 @@

{{ return(values) }}

{%- endmacro -%}
{%- endmacro -%}
2 changes: 1 addition & 1 deletion models/marts/core/int_all_graph_resources.sql
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ joined as (
unioned_with_calc.file_name,
case
when unioned_with_calc.resource_type in ('test', 'source', 'metric', 'exposure', 'seed') then null
else naming_convention_prefixes.model_type
else nullif(naming_convention_prefixes.model_type, '')
end as model_type_prefix,
case
when unioned_with_calc.resource_type in ('test', 'source', 'metric', 'exposure', 'seed') then null
Expand Down
13 changes: 12 additions & 1 deletion models/marts/core/int_direct_relationships.sql
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,18 @@ direct_metrics_relationships as (
-- for all resources in the graph, find their direct parent
direct_relationships as (
select
all_graph_resources.*,
all_graph_resources.resource_id,
all_graph_resources.resource_name,
all_graph_resources.resource_type,
all_graph_resources.file_path,
all_graph_resources.directory_path,
all_graph_resources.file_name,
all_graph_resources.model_type,
all_graph_resources.materialized,
all_graph_resources.is_public,
all_graph_resources.access,
all_graph_resources.source_name,
all_graph_resources.is_excluded,
case
when all_graph_resources.resource_type = 'source' then null
when all_graph_resources.resource_type = 'exposure' then exposures.direct_parent_id
Expand Down
4 changes: 2 additions & 2 deletions models/marts/dag/fct_rejoining_of_upstream_concepts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ single_use_resources as (
triad_relationships as (
select
rejoined.parent,
rejoined.child as child,
rejoined.child,
direct_child.parent as parent_and_child
from rejoined
left join all_relationships as direct_child
Expand Down Expand Up @@ -66,4 +66,4 @@ final_filtered as (

select * from final_filtered

{{ filter_exceptions() }}
{{ filter_exceptions() }}
2 changes: 1 addition & 1 deletion models/marts/dag/fct_too_many_joins.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ final as (
select
child as resource_name,
child_file_path as file_path,
count(distinct parent) as join_count
cast(count(distinct parent) as {{ dbt.type_int() }}) as join_count
from all_dag_relationships
where distance = 1
group by 1, 2
Expand Down
4 changes: 2 additions & 2 deletions models/marts/documentation/fct_documentation_coverage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ conversion as (
final as (
select
{{ dbt.current_timestamp() if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at,
count(*) as total_models,
sum(is_described_model) as documented_models,
cast(count(*) as {{ dbt.type_int() }}) as total_models,
cast(sum(is_described_model) as {{ dbt.type_int() }}) as documented_models,
round(sum(is_described_model) * 100.00 / count(*), 2) as documentation_coverage_pct,
{% for model_type in var('model_types') %}
round(
Expand Down
4 changes: 2 additions & 2 deletions models/marts/structure/fct_model_naming_conventions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ inappropriate_model_names as (
from models
left join appropriate_prefixes
on models.model_type = appropriate_prefixes.model_type
where models.prefix_value is null
where nullif(models.prefix_value, '') is null

)

select * from inappropriate_model_names

{{ filter_exceptions() }}
{{ filter_exceptions() }}
4 changes: 2 additions & 2 deletions models/marts/tests/fct_missing_primary_key_tests.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ tests as (
final as (

select
*
resource_name, is_primary_key_tested, number_of_tests_on_model, number_of_constraints_on_model
from tests
where not(is_primary_key_tested)

)

select * from final

{{ filter_exceptions() }}
{{ filter_exceptions() }}
Loading

0 comments on commit 34209f6

Please sign in to comment.