Skip to content

Commit

Permalink
Merge pull request #211 from dbt-labs/making-metrics-smarter
Browse files Browse the repository at this point in the history
Making Metrics Smarter
  • Loading branch information
callum-mcdata authored Jan 18, 2023
2 parents 77f4919 + a6198ad commit 94effe2
Show file tree
Hide file tree
Showing 30 changed files with 590 additions and 232 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Under the Hood-20230117-092325.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Under the Hood
body: Adding grouping for query generation
time: 2023-01-17T09:23:25.796327-06:00
custom:
Author: callum-mcdata
Issue: "114"
PR: "211"
2 changes: 1 addition & 1 deletion integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ version: "1.0.0"
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: "dbt_metrics_integration_tests_postgres"
profile: "dbt_metrics_integration_tests_bigquery"

model-paths: ["models"]
analysis-paths: ["analyses"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,14 @@ metrics:
expression: customer_id
dimensions:
- had_discount
- order_country
- order_country
window:
count: 14
period: month
filters:
- field: had_discount
operator: 'is'
value: 'true'
- field: order_country
operator: '='
value: "'CA'"
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
select *
from
{{ metrics.calculate(metric('base_average_metric'),
grain='test',
grain='day',
dimensions=['had_discount'])
}}
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
select *
from
{{ metrics.calculate(metric('base_median_metric'),
{{ metrics.calculate(
[metric('base_median_metric'),metric('base_average_metric')],
grain='month',
dimensions=['had_discount'],
date_alias='date_test')
date_alias='dat')
}}
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
select *
from
{{ metrics.calculate(
metric('derived_metric'),
dimensions=['had_discount','order_country','is_weekend'],
start_date = '2022-01-01',
end_date = '2022-01-10'
[metric('derived_metric'),metric('base_count_distinct_metric')],
grain='day',
dimensions=['had_discount','order_country']
)
}}
22 changes: 14 additions & 8 deletions macros/get_metric_sql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ cleanliness -#}
within the final dataset in order to accomplish base + secondary calc functionality. -#}
{%- set relevant_periods = metrics.get_relevent_periods(grain, secondary_calculations) -%}

{# Setting a variable to denote if the user has provided any dimensions #}
{#- Setting a variable to denote if the user has provided any dimensions -#}
{%- if non_calendar_dimensions | length > 0 -%}
{%- set dimensions_provided = true -%}
{%- else -%}
Expand All @@ -34,9 +34,11 @@ within the final dataset in order to accomplish base + secondary calc functional
a custom calendar -#}
{%- set calendar_tbl = ref(var('dbt_metrics_calendar_model', "dbt_metrics_default_calendar")) -%}

{# Here we get the total dimension count for grouping #}
{#- Here we get the total dimension count for grouping -#}
{%- set total_dimension_count = metrics.get_total_dimension_count(grain, dimensions, calendar_dimensions, relevant_periods) -%}

{#- Here we are creating the metric grouping that we use to determine if metrics can be pulled from the same base query -#}
{%- set models_grouping = metrics.get_models_grouping(metric_tree=metric_tree,metrics_dictionary=metrics_dictionary) -%}
{#- ############
LET THE COMPOSITION BEGIN!
############ -#}
Expand All @@ -52,35 +54,38 @@ metrics there are -#}
{#- This filter forms the basis of how we construct the SQL -#}
{#- If composite, we begin by looping through each of the metric names that make
up the composite metric. -#}
{%- for metric_name in metric_tree["parent_set"] -%}

{%- for group_name, group_values in models_grouping.items() -%}

{{ metrics.build_metric_sql(
metric_dictionary=metrics_dictionary[metric_name],
metrics_dictionary=metrics_dictionary,
grain=grain,
dimensions=non_calendar_dimensions,
secondary_calculations=secondary_calculations,
start_date=start_date,
end_date=end_date,
calendar_tbl=calendar_tbl,
relevant_periods=relevant_periods,
calendar_dimensions=calendar_dimensions,
dimensions_provided=dimensions_provided,
total_dimension_count=total_dimension_count
total_dimension_count=total_dimension_count,
group_name=group_name,
group_values=group_values
)
}}

{%- endfor -%}

{%- if metric_tree["full_set"] | length > 1 -%}
{%- if models_grouping| length > 1 or metric_tree['derived_set'] | length > 0 -%}

{{ metrics.gen_joined_metrics_cte(
metric_tree=metric_tree,
metrics_dictionary=metrics_dictionary,
models_grouping=models_grouping,
grain=grain,
dimensions=non_calendar_dimensions,
calendar_dimensions=calendar_dimensions,
secondary_calculations=secondary_calculations,
relevant_periods=relevant_periods,
metrics_dictionary=metrics_dictionary,
total_dimension_count=total_dimension_count )
}}

Expand All @@ -89,6 +94,7 @@ up the composite metric. -#}
{{ metrics.gen_final_cte(
metric_tree=metric_tree,
metrics_dictionary=metrics_dictionary,
models_grouping=models_grouping,
grain=grain,
dimensions=non_calendar_dimensions,
calendar_dimensions=calendar_dimensions,
Expand Down
23 changes: 12 additions & 11 deletions macros/sql_gen/build_metric_sql.sql
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
{%- macro build_metric_sql(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, dimensions_provided, total_dimension_count) %}

{%- set treat_null_values_as_zero = metric_dictionary.get("config").get("treat_null_values_as_zero", True) -%}
{%- macro build_metric_sql(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, dimensions_provided, total_dimension_count, group_name, group_values) %}

{#- This is the SQL Gen part - we've broken each component out into individual macros -#}
{#- We broke this out so it can loop for composite metrics -#}
{{ metrics.gen_aggregate_cte(
metric_dictionary=metric_dictionary,
metrics_dictionary=metrics_dictionary,
grain=grain,
dimensions=dimensions,
secondary_calculations=secondary_calculations,
start_date=start_date,
end_date=end_date,
calendar_tbl=calendar_tbl,
relevant_periods=relevant_periods,
calendar_dimensions=calendar_dimensions,
total_dimension_count=total_dimension_count
total_dimension_count=total_dimension_count,
group_name=group_name,
group_values=group_values
) }}

{#- Diverging path for secondary calcs and needing to datespine -#}
Expand All @@ -22,14 +22,14 @@
{%- if dimensions_provided == true -%}

{{ metrics.gen_dimensions_cte(
metric_name=metric_dictionary.name,
group_name=group_name,
dimensions=dimensions
) }}

{%- endif -%}

{{ metrics.gen_spine_time_cte(
metric_name=metric_dictionary.name,
group_name=group_name,
grain=grain,
dimensions=dimensions,
secondary_calculations=secondary_calculations,
Expand All @@ -41,15 +41,16 @@
{%- endif -%}

{{ metrics.gen_metric_cte(
metric_name=metric_dictionary.name,
metrics_dictionary=metrics_dictionary,
group_name=group_name,
group_values=group_values,
grain=grain,
dimensions=dimensions,
secondary_calculations=secondary_calculations,
start_date=start_date,
end_date=end_date,
relevant_periods=relevant_periods,
calendar_dimensions=calendar_dimensions,
treat_null_values_as_zero=treat_null_values_as_zero
calendar_dimensions=calendar_dimensions
)}}

{%- endmacro -%}
29 changes: 20 additions & 9 deletions macros/sql_gen/gen_aggregate_cte.sql
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{%- macro gen_aggregate_cte(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count) -%}
{{ return(adapter.dispatch('gen_aggregate_cte', 'metrics')(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count)) }}
{%- macro gen_aggregate_cte(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, group_name, group_values) -%}
{{ return(adapter.dispatch('gen_aggregate_cte', 'metrics')(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, group_name, group_values)) }}
{%- endmacro -%}

{%- macro default__gen_aggregate_cte(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count) %}
{%- macro default__gen_aggregate_cte(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, group_name, group_values) %}

, {{metric_dictionary.name}}__aggregate as (
, {{group_name}}__aggregate as (
{# This is the most important CTE. Instead of joining all relevant information
and THEN aggregating, we are instead aggregating from the beginning and then
joining downstream for performance. Additionally, we're using a subquery instead
Expand Down Expand Up @@ -42,22 +42,33 @@

{#- This line performs the relevant aggregation by calling the
gen_primary_metric_aggregate macro. Take a look at that one if you're curious -#}
{{ metrics.gen_primary_metric_aggregate(metric_dictionary.calculation_method, 'property_to_aggregate') }} as {{ metric_dictionary.name }}
{%- for metric_name in group_values.metric_names -%}
{{ metrics.gen_primary_metric_aggregate(metrics_dictionary[metric_name].calculation_method, 'property_to_aggregate__'~metric_name) }} as {{ metric_name }}
{%- if not loop.last -%},{%- endif -%}
{%- endfor%}
from ({{ metrics.gen_base_query(
metric_dictionary=metric_dictionary,
metrics_dictionary=metrics_dictionary,
grain=grain,
dimensions=dimensions,
secondary_calculations=secondary_calculations,
start_date=start_date,
end_date=end_date,
calendar_tbl=calendar_tbl,
relevant_periods=relevant_periods,
calendar_dimensions=calendar_dimensions,
total_dimension_count=total_dimension_count) }}
total_dimension_count=total_dimension_count,
group_name=group_name,
group_values=group_values
)
}}
) as base_query
where 1=1
{%- if metric_dictionary.window is not none and grain %}
{#-
Given that we've already determined the metrics in metric_names share
the same windows & filters, we can base the conditional off of the first
value in the list because the order doesn't matter.
-#}
{%- if group_values.window is not none and grain %}
and date_{{grain}} = window_filter_date
{%- endif %}
{{ metrics.gen_group_by(grain, dimensions, calendar_dimensions, relevant_periods) }}
Expand Down
32 changes: 20 additions & 12 deletions macros/sql_gen/gen_base_query.sql
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
{% macro gen_base_query(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count) %}
{{ return(adapter.dispatch('gen_base_query', 'metrics')(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count)) }}
{% macro gen_base_query(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, group_name, group_values) %}
{{ return(adapter.dispatch('gen_base_query', 'metrics')(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, group_name, group_values)) }}
{% endmacro %}

{% macro default__gen_base_query(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count) %}
{% macro default__gen_base_query(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, group_name, group_values) %}
{# This is the "base" CTE which selects the fields we need to correctly
calculate the metric. -#}
select
{% if grain -%}
cast(base_model.{{metric_dictionary.timestamp}} as date) as metric_date_day,
calendar_table.date_{{ grain }} as date_{{grain}},
calendar_table.date_day as window_filter_date,
{#-
Given that we've already determined the metrics in metric_names share
the same windows & filters, we can base the conditional off of the first
value in the list because the order doesn't matter.
-#}
cast(base_model.{{group_values.timestamp}} as date) as metric_date_day,
calendar.date_{{ grain }} as date_{{grain}},
calendar.date_day as window_filter_date,
{%- if secondary_calculations | length > 0 %}
{%- for period in relevant_periods %}
calendar_table.date_{{ period }},
calendar.date_{{ period }},
{%- endfor -%}
{%- endif -%}
{%- endif -%}
Expand All @@ -21,18 +26,21 @@
base_model.{{ dim }},
{%- endfor %}
{%- for calendar_dim in calendar_dimensions -%}
calendar_table.{{ calendar_dim }},
calendar.{{ calendar_dim }},
{%- endfor -%}
{{ metrics.gen_property_to_aggregate(metric_dictionary, grain, dimensions, calendar_dimensions) }}
from {{ metric_dictionary.metric_model }} base_model
{%- for metric_name in group_values.metric_names -%}
{{ metrics.gen_property_to_aggregate(metrics_dictionary[metric_name], grain, dimensions, calendar_dimensions) }}
{%- if not loop.last -%},{%- endif -%}
{%- endfor%}
from {{ group_values.metric_model }} base_model
{# -#}
{%- if grain or calendar_dimensions|length > 0 -%}
{{ metrics.gen_calendar_table_join(metric_dictionary, calendar_tbl) }}
{{ metrics.gen_calendar_join(group_values) }}
{%- endif -%}
{# #}
where 1=1
{#- -#}
{{ metrics.gen_filters(metric_dictionary, start_date, end_date) }}
{{ metrics.gen_filters(group_values, start_date, end_date) }}
{# #}

{%- endmacro -%}
43 changes: 43 additions & 0 deletions macros/sql_gen/gen_calendar_join.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{% macro gen_calendar_join(group_values) %}
{{ return(adapter.dispatch('gen_calendar_join', 'metrics')(group_values)) }}
{%- endmacro -%}

{% macro default__gen_calendar_join(group_values) %}
left join calendar
{%- if group_values.window is not none %}
on cast(base_model.{{group_values.timestamp}} as date) > dateadd({{group_values.window.period}}, -{{group_values.window.count}}, calendar.date_day)
and cast(base_model.{{group_values.timestamp}} as date) <= calendar.date_day
{%- else %}
on cast(base_model.{{group_values.timestamp}} as date) = calendar.date_day
{% endif -%}
{% endmacro %}

{% macro bigquery__gen_calendar_join(group_values) %}
left join calendar
{%- if group_values.window is not none %}
on cast(base_model.{{group_values.timestamp}} as date) > date_sub(calendar.date_day, interval {{group_values.window.count}} {{group_values.window.period}})
and cast(base_model.{{group_values.timestamp}} as date) <= calendar.date_day
{%- else %}
on cast(base_model.{{group_values.timestamp}} as date) = calendar.date_day
{% endif -%}
{% endmacro %}

{% macro postgres__gen_calendar_join(group_values) %}
left join calendar
{%- if group_values.window is not none %}
on cast(base_model.{{group_values.timestamp}} as date) > calendar.date_day - interval '{{group_values.window.count}} {{group_values.window.period}}'
and cast(base_model.{{group_values.timestamp}} as date) <= calendar.date_day
{%- else %}
on cast(base_model.{{group_values.timestamp}} as date) = calendar.date_day
{% endif -%}
{% endmacro %}

{% macro redshift__gen_calendar_join(group_values) %}
left join calendar
{%- if group_values.window is not none %}
on cast(base_model.{{group_values.timestamp}} as date) > dateadd({{group_values.window.period}}, -{{group_values.window.count}}, calendar.date_day)
and cast(base_model.{{group_values.timestamp}} as date) <= calendar.date_day
{%- else %}
on cast(base_model.{{group_values.timestamp}} as date) = calendar.date_day
{% endif -%}
{% endmacro %}
Loading

0 comments on commit 94effe2

Please sign in to comment.