From 34a33a555df4877c593862b37ce116b78ce568fe Mon Sep 17 00:00:00 2001 From: Callum McCann Date: Thu, 12 Jan 2023 16:27:15 -0600 Subject: [PATCH 1/8] smarter metrics --- .../base_count_distinct_metric.yml | 12 +++- .../base_average_metric.sql | 2 +- .../base_median_metric.sql | 5 +- .../metric_testing_models/derived_metric.sql | 7 +- macros/calculate.sql | 3 + macros/get_metric_sql.sql | 22 +++--- macros/sql_gen/build_metric_sql.sql | 23 ++++--- macros/sql_gen/gen_aggregate_cte.sql | 29 +++++--- macros/sql_gen/gen_base_query.sql | 32 +++++---- macros/sql_gen/gen_calendar_join.sql | 43 ++++++++++++ macros/sql_gen/gen_calendar_table_join.sql | 43 ------------ macros/sql_gen/gen_dimensions_cte.sql | 10 +-- macros/sql_gen/gen_filters.sql | 18 ++--- macros/sql_gen/gen_final_cte.sql | 55 ++++++++------- macros/sql_gen/gen_joined_metrics_cte.sql | 54 +++++++-------- macros/sql_gen/gen_metric_cte.sql | 46 ++++++------- macros/sql_gen/gen_property_to_aggregate.sql | 24 +++---- macros/sql_gen/gen_spine_time_cte.sql | 12 ++-- macros/variables/get_faux_metric_tree.sql | 14 ++-- macros/variables/get_metric_definition.sql | 1 + macros/variables/get_metric_tree.sql | 20 +++--- macros/variables/get_model_group.sql | 69 +++++++++++++++++++ macros/variables/get_models_grouping.sql | 54 +++++++++++++++ macros/variables/update_faux_metric_tree.sql | 6 +- macros/variables/update_metric_tree.sql | 6 +- 25 files changed, 390 insertions(+), 220 deletions(-) create mode 100644 macros/sql_gen/gen_calendar_join.sql delete mode 100644 macros/sql_gen/gen_calendar_table_join.sql create mode 100644 macros/variables/get_model_group.sql create mode 100644 macros/variables/get_models_grouping.sql diff --git a/integration_tests/models/metric_definitions/base_count_distinct_metric.yml b/integration_tests/models/metric_definitions/base_count_distinct_metric.yml index b6980962..9d69db9d 100644 --- a/integration_tests/models/metric_definitions/base_count_distinct_metric.yml +++ b/integration_tests/models/metric_definitions/base_count_distinct_metric.yml @@ -10,4 +10,14 @@ metrics: expression: customer_id dimensions: - had_discount - - order_country \ No newline at end of file + - order_country + window: + count: 14 + period: month + filters: + - field: had_discount + operator: 'is' + value: 'true' + - field: order_country + operator: '=' + value: "'CA'" \ No newline at end of file diff --git a/integration_tests/models/metric_testing_models/base_average_metric.sql b/integration_tests/models/metric_testing_models/base_average_metric.sql index c1e9cd61..0f403362 100644 --- a/integration_tests/models/metric_testing_models/base_average_metric.sql +++ b/integration_tests/models/metric_testing_models/base_average_metric.sql @@ -1,6 +1,6 @@ select * from {{ metrics.calculate(metric('base_average_metric'), - grain='test', + grain='day', dimensions=['had_discount']) }} \ No newline at end of file diff --git a/integration_tests/models/metric_testing_models/base_median_metric.sql b/integration_tests/models/metric_testing_models/base_median_metric.sql index 1eb5cdff..25f5039f 100644 --- a/integration_tests/models/metric_testing_models/base_median_metric.sql +++ b/integration_tests/models/metric_testing_models/base_median_metric.sql @@ -1,7 +1,8 @@ select * from -{{ metrics.calculate(metric('base_median_metric'), +{{ metrics.calculate( + [metric('base_median_metric'),metric('base_average_metric')], grain='month', dimensions=['had_discount'], - date_alias='date_test') + date_alias='dat') }} \ No newline at end of file diff --git a/integration_tests/models/metric_testing_models/derived_metric.sql b/integration_tests/models/metric_testing_models/derived_metric.sql index a870acaa..ae1c683a 100644 --- a/integration_tests/models/metric_testing_models/derived_metric.sql +++ b/integration_tests/models/metric_testing_models/derived_metric.sql @@ -1,9 +1,8 @@ select * from {{ metrics.calculate( - metric('derived_metric'), - dimensions=['had_discount','order_country','is_weekend'], - start_date = '2022-01-01', - end_date = '2022-01-10' + [metric('derived_metric'),metric('base_count_distinct_metric')], + grain='day', + dimensions=['had_discount','order_country'] ) }} \ No newline at end of file diff --git a/macros/calculate.sql b/macros/calculate.sql index bcd79c64..66af10ee 100644 --- a/macros/calculate.sql +++ b/macros/calculate.sql @@ -20,6 +20,9 @@ {#- Here we are creating the metrics dictionary which contains all of the metric information needed for sql gen. -#} {%- set metrics_dictionary = metrics.get_metrics_dictionary(metric_tree=metric_tree) -%} + {#- Here we are creating the metric grouping that we use to determine if metrics can be pulled from the same base query -#} + {# {%- set metrics_grouping = metrics.get_metrics_grouping(metric_tree=metric_tree,metrics_dictionary=metrics_dictionary) -%} #} + {#- ############ VALIDATION - Make sure everything is good! ############ -#} diff --git a/macros/get_metric_sql.sql b/macros/get_metric_sql.sql index 6e308eab..d9f844e7 100644 --- a/macros/get_metric_sql.sql +++ b/macros/get_metric_sql.sql @@ -23,7 +23,7 @@ cleanliness -#} within the final dataset in order to accomplish base + secondary calc functionality. -#} {%- set relevant_periods = metrics.get_relevent_periods(grain, secondary_calculations) -%} -{# Setting a variable to denote if the user has provided any dimensions #} +{#- Setting a variable to denote if the user has provided any dimensions -#} {%- if non_calendar_dimensions | length > 0 -%} {%- set dimensions_provided = true -%} {%- else -%} @@ -34,9 +34,11 @@ within the final dataset in order to accomplish base + secondary calc functional a custom calendar -#} {%- set calendar_tbl = ref(var('dbt_metrics_calendar_model', "dbt_metrics_default_calendar")) -%} -{# Here we get the total dimension count for grouping #} +{#- Here we get the total dimension count for grouping -#} {%- set total_dimension_count = metrics.get_total_dimension_count(grain, dimensions, calendar_dimensions, relevant_periods) -%} +{#- Here we are creating the metric grouping that we use to determine if metrics can be pulled from the same base query -#} +{%- set models_grouping = metrics.get_models_grouping(metric_tree=metric_tree,metrics_dictionary=metrics_dictionary) -%} {#- ############ LET THE COMPOSITION BEGIN! ############ -#} @@ -52,35 +54,38 @@ metrics there are -#} {#- This filter forms the basis of how we construct the SQL -#} {#- If composite, we begin by looping through each of the metric names that make up the composite metric. -#} -{%- for metric_name in metric_tree["parent_set"] -%} + +{%- for model_name, model_values in models_grouping.items() -%} {{ metrics.build_metric_sql( - metric_dictionary=metrics_dictionary[metric_name], + metrics_dictionary=metrics_dictionary, grain=grain, dimensions=non_calendar_dimensions, secondary_calculations=secondary_calculations, start_date=start_date, end_date=end_date, - calendar_tbl=calendar_tbl, relevant_periods=relevant_periods, calendar_dimensions=calendar_dimensions, dimensions_provided=dimensions_provided, - total_dimension_count=total_dimension_count + total_dimension_count=total_dimension_count, + model_name=model_name, + model_values=model_values ) }} {%- endfor -%} -{%- if metric_tree["full_set"] | length > 1 -%} +{%- if models_grouping| length > 1 or metric_tree['derived_set'] | length > 0 -%} {{ metrics.gen_joined_metrics_cte( metric_tree=metric_tree, + metrics_dictionary=metrics_dictionary, + models_grouping=models_grouping, grain=grain, dimensions=non_calendar_dimensions, calendar_dimensions=calendar_dimensions, secondary_calculations=secondary_calculations, relevant_periods=relevant_periods, - metrics_dictionary=metrics_dictionary, total_dimension_count=total_dimension_count ) }} @@ -89,6 +94,7 @@ up the composite metric. -#} {{ metrics.gen_final_cte( metric_tree=metric_tree, metrics_dictionary=metrics_dictionary, + models_grouping=models_grouping, grain=grain, dimensions=non_calendar_dimensions, calendar_dimensions=calendar_dimensions, diff --git a/macros/sql_gen/build_metric_sql.sql b/macros/sql_gen/build_metric_sql.sql index 418f874d..93e25572 100644 --- a/macros/sql_gen/build_metric_sql.sql +++ b/macros/sql_gen/build_metric_sql.sql @@ -1,19 +1,19 @@ -{%- macro build_metric_sql(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, dimensions_provided, total_dimension_count) %} - - {%- set treat_null_values_as_zero = metric_dictionary.get("config").get("treat_null_values_as_zero", True) -%} +{%- macro build_metric_sql(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, dimensions_provided, total_dimension_count, model_name, model_values) %} + {#- This is the SQL Gen part - we've broken each component out into individual macros -#} {#- We broke this out so it can loop for composite metrics -#} {{ metrics.gen_aggregate_cte( - metric_dictionary=metric_dictionary, + metrics_dictionary=metrics_dictionary, grain=grain, dimensions=dimensions, secondary_calculations=secondary_calculations, start_date=start_date, end_date=end_date, - calendar_tbl=calendar_tbl, relevant_periods=relevant_periods, calendar_dimensions=calendar_dimensions, - total_dimension_count=total_dimension_count + total_dimension_count=total_dimension_count, + model_name=model_name, + model_values=model_values ) }} {#- Diverging path for secondary calcs and needing to datespine -#} @@ -22,14 +22,14 @@ {%- if dimensions_provided == true -%} {{ metrics.gen_dimensions_cte( - metric_name=metric_dictionary.name, + model_name=model_name, dimensions=dimensions ) }} {%- endif -%} {{ metrics.gen_spine_time_cte( - metric_name=metric_dictionary.name, + model_name=model_name, grain=grain, dimensions=dimensions, secondary_calculations=secondary_calculations, @@ -41,15 +41,16 @@ {%- endif -%} {{ metrics.gen_metric_cte( - metric_name=metric_dictionary.name, + metrics_dictionary=metrics_dictionary, + model_name=model_name, + model_values=model_values, grain=grain, dimensions=dimensions, secondary_calculations=secondary_calculations, start_date=start_date, end_date=end_date, relevant_periods=relevant_periods, - calendar_dimensions=calendar_dimensions, - treat_null_values_as_zero=treat_null_values_as_zero + calendar_dimensions=calendar_dimensions )}} {%- endmacro -%} diff --git a/macros/sql_gen/gen_aggregate_cte.sql b/macros/sql_gen/gen_aggregate_cte.sql index d721afd6..734c7da9 100644 --- a/macros/sql_gen/gen_aggregate_cte.sql +++ b/macros/sql_gen/gen_aggregate_cte.sql @@ -1,10 +1,10 @@ -{%- macro gen_aggregate_cte(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count) -%} - {{ return(adapter.dispatch('gen_aggregate_cte', 'metrics')(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count)) }} +{%- macro gen_aggregate_cte(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, model_name, model_values) -%} + {{ return(adapter.dispatch('gen_aggregate_cte', 'metrics')(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, model_name, model_values)) }} {%- endmacro -%} -{%- macro default__gen_aggregate_cte(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count) %} +{%- macro default__gen_aggregate_cte(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, model_name, model_values) %} -, {{metric_dictionary.name}}__aggregate as ( +, {{model_name}}__aggregate as ( {# This is the most important CTE. Instead of joining all relevant information and THEN aggregating, we are instead aggregating from the beginning and then joining downstream for performance. Additionally, we're using a subquery instead @@ -42,22 +42,33 @@ {#- This line performs the relevant aggregation by calling the gen_primary_metric_aggregate macro. Take a look at that one if you're curious -#} - {{ metrics.gen_primary_metric_aggregate(metric_dictionary.calculation_method, 'property_to_aggregate') }} as {{ metric_dictionary.name }} + {%- for metric_name in model_values.metric_names -%} + {{ metrics.gen_primary_metric_aggregate(metrics_dictionary[metric_name].calculation_method, 'property_to_aggregate__'~metric_name) }} as {{ metric_name }} + {%- if not loop.last -%},{%- endif -%} + {%- endfor%} from ({{ metrics.gen_base_query( - metric_dictionary=metric_dictionary, + metrics_dictionary=metrics_dictionary, grain=grain, dimensions=dimensions, secondary_calculations=secondary_calculations, start_date=start_date, end_date=end_date, - calendar_tbl=calendar_tbl, relevant_periods=relevant_periods, calendar_dimensions=calendar_dimensions, - total_dimension_count=total_dimension_count) }} + total_dimension_count=total_dimension_count, + model_name=model_name, + model_values=model_values + ) + }} ) as base_query where 1=1 - {%- if metric_dictionary.window is not none and grain %} + {#- + Given that we've already determined the metrics in metric_names share + the same windows & filters, we can base the conditional off of the first + value in the list because the order doesn't matter. + -#} + {%- if model_values.window is not none and grain %} and date_{{grain}} = window_filter_date {%- endif %} {{ metrics.gen_group_by(grain, dimensions, calendar_dimensions, relevant_periods) }} diff --git a/macros/sql_gen/gen_base_query.sql b/macros/sql_gen/gen_base_query.sql index 30bde372..0f815482 100644 --- a/macros/sql_gen/gen_base_query.sql +++ b/macros/sql_gen/gen_base_query.sql @@ -1,18 +1,23 @@ -{% macro gen_base_query(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count) %} - {{ return(adapter.dispatch('gen_base_query', 'metrics')(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count)) }} +{% macro gen_base_query(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, model_name, model_values) %} + {{ return(adapter.dispatch('gen_base_query', 'metrics')(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, model_name, model_values)) }} {% endmacro %} -{% macro default__gen_base_query(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count) %} +{% macro default__gen_base_query(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, model_name, model_values) %} {# This is the "base" CTE which selects the fields we need to correctly calculate the metric. -#} select {% if grain -%} - cast(base_model.{{metric_dictionary.timestamp}} as date) as metric_date_day, - calendar_table.date_{{ grain }} as date_{{grain}}, - calendar_table.date_day as window_filter_date, + {#- + Given that we've already determined the metrics in metric_names share + the same windows & filters, we can base the conditional off of the first + value in the list because the order doesn't matter. + -#} + cast(base_model.{{model_values.timestamp}} as date) as metric_date_day, + calendar.date_{{ grain }} as date_{{grain}}, + calendar.date_day as window_filter_date, {%- if secondary_calculations | length > 0 %} {%- for period in relevant_periods %} - calendar_table.date_{{ period }}, + calendar.date_{{ period }}, {%- endfor -%} {%- endif -%} {%- endif -%} @@ -21,18 +26,21 @@ base_model.{{ dim }}, {%- endfor %} {%- for calendar_dim in calendar_dimensions -%} - calendar_table.{{ calendar_dim }}, + calendar.{{ calendar_dim }}, {%- endfor -%} - {{ metrics.gen_property_to_aggregate(metric_dictionary, grain, dimensions, calendar_dimensions) }} - from {{ metric_dictionary.metric_model }} base_model + {%- for metric_name in model_values.metric_names -%} + {{ metrics.gen_property_to_aggregate(metrics_dictionary[metric_name], grain, dimensions, calendar_dimensions) }} + {%- if not loop.last -%},{%- endif -%} + {%- endfor%} + from {{ model_values.metric_model }} base_model {# -#} {%- if grain or calendar_dimensions|length > 0 -%} - {{ metrics.gen_calendar_table_join(metric_dictionary, calendar_tbl) }} + {{ metrics.gen_calendar_join(model_values) }} {%- endif -%} {# #} where 1=1 {#- -#} - {{ metrics.gen_filters(metric_dictionary, start_date, end_date) }} + {{ metrics.gen_filters(model_values, start_date, end_date) }} {# #} {%- endmacro -%} \ No newline at end of file diff --git a/macros/sql_gen/gen_calendar_join.sql b/macros/sql_gen/gen_calendar_join.sql new file mode 100644 index 00000000..2ee12af4 --- /dev/null +++ b/macros/sql_gen/gen_calendar_join.sql @@ -0,0 +1,43 @@ +{% macro gen_calendar_join(model_values) %} + {{ return(adapter.dispatch('gen_calendar_join', 'metrics')(model_values)) }} +{%- endmacro -%} + +{% macro default__gen_calendar_join(model_values) %} + left join calendar + {%- if model_values.window is not none %} + on cast(base_model.{{model_values.timestamp}} as date) > dateadd({{model_values.window.period}}, -{{model_values.window.count}}, calendar.date_day) + and cast(base_model.{{metric_dictionary.timestamp}} as date) <= calendar.date_day + {%- else %} + on cast(base_model.{{model_values.timestamp}} as date) = calendar.date_day + {% endif -%} +{% endmacro %} + +{% macro bigquery__gen_calendar_join(model_values) %} + left join calendar + {%- if model_values.window is not none %} + on cast(base_model.{{model_values.timestamp}} as date) > date_sub(calendar.date_day, interval {{model_values.window.count}} {{model_values.window.period}}) + and cast(base_model.{{model_values.timestamp}} as date) <= calendar.date_day + {%- else %} + on cast(base_model.{{model_values.timestamp}} as date) = calendar.date_day + {% endif -%} +{% endmacro %} + +{% macro postgres__gen_calendar_join(model_values) %} + left join calendar + {%- if model_values.window is not none %} + on cast(base_model.{{model_values.timestamp}} as date) > calendar.date_day - interval '{{model_values.window.count}} {{model_values.window.period}}' + and cast(base_model.{{model_values.timestamp}} as date) <= calendar.date_day + {%- else %} + on cast(base_model.{{model_values.timestamp}} as date) = calendar.date_day + {% endif -%} +{% endmacro %} + +{% macro redshift__gen_calendar_join(model_values) %} + left join calendar + {%- if model_values.window is not none %} + on cast(base_model.{{model_values.timestamp}} as date) > dateadd({{model_values.window.period}}, -{{model_values.window.count}}, calendar.date_day) + and cast(base_model.{{model_values.timestamp}} as date) <= calendar.date_day + {%- else %} + on cast(base_model.{{model_values.timestamp}} as date) = calendar.date_day + {% endif -%} +{% endmacro %} diff --git a/macros/sql_gen/gen_calendar_table_join.sql b/macros/sql_gen/gen_calendar_table_join.sql deleted file mode 100644 index 006be508..00000000 --- a/macros/sql_gen/gen_calendar_table_join.sql +++ /dev/null @@ -1,43 +0,0 @@ -{% macro gen_calendar_table_join(metric_dictionary, calendar_tbl) %} - {{ return(adapter.dispatch('gen_calendar_table_join', 'metrics')(metric_dictionary, calendar_tbl)) }} -{%- endmacro -%} - -{% macro default__gen_calendar_table_join(metric_dictionary, calendar_tbl) %} - left join {{calendar_tbl}} calendar_table - {%- if metric_dictionary.window is not none %} - on cast(base_model.{{metric_dictionary.timestamp}} as date) > dateadd({{metric_dictionary.window.period}}, -{{metric_dictionary.window.count}}, calendar_table.date_day) - and cast(base_model.{{metric_dictionary.timestamp}} as date) <= calendar_table.date_day - {%- else %} - on cast(base_model.{{metric_dictionary.timestamp}} as date) = calendar_table.date_day - {% endif -%} -{% endmacro %} - -{% macro bigquery__gen_calendar_table_join(metric_dictionary, calendar_tbl) %} - left join {{calendar_tbl}} calendar_table - {%- if metric_dictionary.window is not none %} - on cast(base_model.{{metric_dictionary.timestamp}} as date) > date_sub(calendar_table.date_day, interval {{metric_dictionary.window.count}} {{metric_dictionary.window.period}}) - and cast(base_model.{{metric_dictionary.timestamp}} as date) <= calendar_table.date_day - {%- else %} - on cast(base_model.{{metric_dictionary.timestamp}} as date) = calendar_table.date_day - {% endif -%} -{% endmacro %} - -{% macro postgres__gen_calendar_table_join(metric_dictionary, calendar_tbl) %} - left join {{calendar_tbl}} calendar_table - {%- if metric_dictionary.window is not none %} - on cast(base_model.{{metric_dictionary.timestamp}} as date) > calendar_table.date_day - interval '{{metric_dictionary.window.count}} {{metric_dictionary.window.period}}' - and cast(base_model.{{metric_dictionary.timestamp}} as date) <= calendar_table.date_day - {%- else %} - on cast(base_model.{{metric_dictionary.timestamp}} as date) = calendar_table.date_day - {% endif -%} -{% endmacro %} - -{% macro redshift__gen_calendar_table_join(metric_dictionary, calendar_tbl) %} - left join {{calendar_tbl}} calendar_table - {%- if metric_dictionary.window is not none %} - on cast(base_model.{{metric_dictionary.timestamp}} as date) > dateadd({{metric_dictionary.window.period}}, -{{metric_dictionary.window.count}}, calendar_table.date_day) - and cast(base_model.{{metric_dictionary.timestamp}} as date) <= calendar_table.date_day - {%- else %} - on cast(base_model.{{metric_dictionary.timestamp}} as date) = calendar_table.date_day - {% endif -%} -{% endmacro %} diff --git a/macros/sql_gen/gen_dimensions_cte.sql b/macros/sql_gen/gen_dimensions_cte.sql index 13a1c95e..a4913e84 100644 --- a/macros/sql_gen/gen_dimensions_cte.sql +++ b/macros/sql_gen/gen_dimensions_cte.sql @@ -1,16 +1,16 @@ -{%- macro gen_dimensions_cte(metric_name, dimensions) -%} - {{ return(adapter.dispatch('gen_dimensions_cte', 'metrics')(metric_name, dimensions)) }} +{%- macro gen_dimensions_cte(model_name, dimensions) -%} + {{ return(adapter.dispatch('gen_dimensions_cte', 'metrics')(model_name, dimensions)) }} {%- endmacro -%} -{% macro default__gen_dimensions_cte(metric_name, dimensions) %} +{% macro default__gen_dimensions_cte(model_name, dimensions) %} -, {{metric_name}}__dims as ( +, {{model_name}}__dims as ( select distinct {%- for dim in dimensions %} {{ dim }}{%- if not loop.last -%},{% endif -%} {%- endfor %} - from {{metric_name}}__aggregate + from {{model_name}}__aggregate ) {%- endmacro -%} diff --git a/macros/sql_gen/gen_filters.sql b/macros/sql_gen/gen_filters.sql index ae5a7a22..5a630610 100644 --- a/macros/sql_gen/gen_filters.sql +++ b/macros/sql_gen/gen_filters.sql @@ -1,27 +1,27 @@ -{%- macro gen_filters(metric_dictionary, start_date, end_date) -%} - {{ return(adapter.dispatch('gen_filters', 'metrics')(metric_dictionary, start_date, end_date)) }} +{%- macro gen_filters(model_values, start_date, end_date) -%} + {{ return(adapter.dispatch('gen_filters', 'metrics')(model_values, start_date, end_date)) }} {%- endmacro -%} -{%- macro default__gen_filters(metric_dictionary, start_date, end_date) -%} +{%- macro default__gen_filters(model_values, start_date, end_date) -%} {#- metric start/end dates also applied here to limit incoming data -#} {% if start_date or end_date %} and ( {% if start_date and end_date -%} - cast(base_model.{{metric_dictionary.timestamp}} as date) >= cast('{{ start_date }}' as date) - and cast(base_model.{{metric_dictionary.timestamp}} as date) <= cast('{{ end_date }}' as date) + cast(base_model.{{model_values.timestamp}} as date) >= cast('{{ start_date }}' as date) + and cast(base_model.{{model_values.timestamp}} as date) <= cast('{{ end_date }}' as date) {%- elif start_date and not end_date -%} - cast(base_model.{{metric_dictionary.timestamp}} as date) >= cast('{{ start_date }}' as date) + cast(base_model.{{model_values.timestamp}} as date) >= cast('{{ start_date }}' as date) {%- elif end_date and not start_date -%} - cast(base_model.{{metric_dictionary.timestamp}} as date) <= cast('{{ end_date }}' as date) + cast(base_model.{{model_values.timestamp}} as date) <= cast('{{ end_date }}' as date) {%- endif %} ) {% endif -%} {#- metric filter clauses... -#} - {% if metric_dictionary.filters %} + {% if model_values.filters %} and ( - {% for filter in metric_dictionary.filters -%} + {% for filter in model_values.filters -%} {%- if not loop.first -%} and {% endif %}{{ filter.field }} {{ filter.operator }} {{ filter.value }} {% endfor -%} ) diff --git a/macros/sql_gen/gen_final_cte.sql b/macros/sql_gen/gen_final_cte.sql index c12e8d7d..84f61b9c 100644 --- a/macros/sql_gen/gen_final_cte.sql +++ b/macros/sql_gen/gen_final_cte.sql @@ -1,9 +1,11 @@ -{%- macro gen_final_cte(metric_tree, metrics_dictionary, grain, dimensions, calendar_dimensions, relevant_periods, secondary_calculations, where, date_alias) -%} - {{ return(adapter.dispatch('gen_final_cte', 'metrics')(metric_tree, metrics_dictionary, grain, dimensions, calendar_dimensions, relevant_periods, secondary_calculations, where, date_alias)) }} +{%- macro gen_final_cte(metric_tree, metrics_dictionary, models_grouping, grain, dimensions, calendar_dimensions, relevant_periods, secondary_calculations, where, date_alias) -%} + {{ return(adapter.dispatch('gen_final_cte', 'metrics')(metric_tree, metrics_dictionary, models_grouping, grain, dimensions, calendar_dimensions, relevant_periods, secondary_calculations, where, date_alias)) }} {%- endmacro -%} -{%- macro default__gen_final_cte(metric_tree, metrics_dictionary, grain, dimensions, calendar_dimensions, relevant_periods, secondary_calculations, where, date_alias) %} +{%- macro default__gen_final_cte(metric_tree, metrics_dictionary, models_grouping, grain, dimensions, calendar_dimensions, relevant_periods, secondary_calculations, where, date_alias) %} + {%- if secondary_calculations | length > 0 %} +{#- This section is for queries using secondary calculations -#} select date_{{grain}} {% if date_alias%}as {{date_alias}}{%endif%} {%- if secondary_calculations | length > 0 -%} @@ -17,42 +19,45 @@ select {%- for calendar_dim in calendar_dimensions %} ,{{ calendar_dim }} {%- endfor %} - {%- for metric_name in metric_tree.full_set %} + {%- for metric_name in metric_tree.parent_set|list + metric_tree.derived_set|list %} ,{{metric_name}} - {%- endfor %} + {%- endfor %} {{ metrics.gen_secondary_calculations(metric_tree, metrics_dictionary, grain, dimensions, secondary_calculations, calendar_dimensions)}} -from {% if metric_tree.full_set | length > 1 -%} joined_metrics {%- else -%} {{ metric_tree.base_set[0] }}__final {%- endif %} + {%- if models_grouping| length > 1 or metric_tree['derived_set'] | length > 0 %} +from joined_metrics + {%- else %} +from {% for model_name, model_values in models_grouping.items()-%}{{model_name}}__final {%-endfor-%} + {%- endif %} {# metric where clauses #} -{%- if where %} + {%- if where %} where {{ where }} -{%- endif %} + {%- endif %} {{ metrics.gen_order_by(grain, dimensions, calendar_dimensions, relevant_periods) }} -{%- else %} - -{%- if metric_tree.full_set | length > 1 %} +{%- elif models_grouping| length > 1 or metric_tree['derived_set'] | length > 0 -%} +{#- This section is for queries from multiple models or using derived metrics -#} select {%- if grain %} date_{{grain}} {% if date_alias%}as {{date_alias}}{%endif%}, - {% endif -%} + {%- endif %} {%- for dim in dimensions %} {{ dim }}, {%- endfor %} {%- for calendar_dim in calendar_dimensions %} {{ calendar_dim }}, - {% endfor -%} - {%- for metric_name in metric_tree.full_set %} - {{metric_name}}{%if not loop.last%},{%endif%} - {% endfor -%} + {%- endfor %} + {%- for metric_name in metric_tree.parent_set|list + metric_tree.derived_set|list %} + {{metric_name}}{%- if not loop.last -%},{%- endif -%} + {%- endfor %} from joined_metrics {#- metric where clauses -#} -{%- if where %} + {%- if where %} where {{ where }} -{%- endif -%} + {%- endif -%} {{ metrics.gen_order_by(grain, dimensions, calendar_dimensions, relevant_periods) }} -{%- else %} - +{%- else -%} +{#- This section is for non-derived, non-secondary calc queries -#} select {%- if grain %} date_{{grain}} {% if date_alias%}as {{date_alias}}{%endif%}, @@ -63,15 +68,17 @@ select {%- for calendar_dim in calendar_dimensions %} {{ calendar_dim }}, {% endfor -%} - {%- for metric_name in metric_tree.full_set %} + {%- for metric_name in metric_tree.parent_set|list + metric_tree.derived_set|list %} + {{metric_name}}{%- if not loop.last -%},{%- endif -%} + {%- endfor %} + {# {%- for metric_name in metric_tree.full_set %} {{metric_name}}{%if not loop.last%},{%endif%} - {%- endfor %} -from {{metric_tree.base_set[0]}}__final + {%- endfor %} #} +from {% for model_name, model_values in models_grouping.items()-%}{{model_name}}__final {%-endfor-%} {%- if where %} where {{ where }} {%- endif -%} {{ metrics.gen_order_by(grain, dimensions, calendar_dimensions, relevant_periods) }} - {%- endif %} {%- endif %} {%- endmacro %} \ No newline at end of file diff --git a/macros/sql_gen/gen_joined_metrics_cte.sql b/macros/sql_gen/gen_joined_metrics_cte.sql index 32d529cc..2819c5b1 100644 --- a/macros/sql_gen/gen_joined_metrics_cte.sql +++ b/macros/sql_gen/gen_joined_metrics_cte.sql @@ -1,16 +1,16 @@ -{%- macro gen_joined_metrics_cte(metric_tree, grain, dimensions, calendar_dimensions, secondary_calculations, relevant_periods, metrics_dictionary, total_dimension_count) -%} - {{ return(adapter.dispatch('gen_joined_metrics_cte', 'metrics')(metric_tree, grain, dimensions, calendar_dimensions, secondary_calculations, relevant_periods, metrics_dictionary, total_dimension_count)) }} +{%- macro gen_joined_metrics_cte(metric_tree, metrics_dictionary, models_grouping, grain, dimensions, calendar_dimensions, secondary_calculations, relevant_periods, total_dimension_count) -%} + {{ return(adapter.dispatch('gen_joined_metrics_cte', 'metrics')(metric_tree, metrics_dictionary, models_grouping, grain, dimensions, calendar_dimensions, secondary_calculations, relevant_periods, total_dimension_count)) }} {%- endmacro -%} -{% macro default__gen_joined_metrics_cte(metric_tree, grain, dimensions, calendar_dimensions, secondary_calculations, relevant_periods, metrics_dictionary, total_dimension_count) %} +{% macro default__gen_joined_metrics_cte(metric_tree, metrics_dictionary, models_grouping, grain, dimensions, calendar_dimensions, secondary_calculations, relevant_periods, total_dimension_count) %} {#- This section is a hacky workaround to account for postgres changes -#} {%- set cte_numbers = [] -%} {%- set unique_cte_numbers = [] -%} {#- the cte numbers are more representative of node depth -#} -{%- if metric_tree.expression_set | length > 0 -%} - {%- for metric_name in metric_tree.ordered_expression_set -%} - {%- do cte_numbers.append(metric_tree.ordered_expression_set[metric_name]) -%} +{%- if metric_tree.derived_set | length > 0 -%} + {%- for metric_name in metric_tree.ordered_derived_set -%} + {%- do cte_numbers.append(metric_tree.ordered_derived_set[metric_name]) -%} {%- endfor -%} {%- for cte_num in cte_numbers|unique -%} {%- do unique_cte_numbers.append(cte_num) -%} @@ -26,9 +26,9 @@ {%- endif -%} {%- for calendar_dim in calendar_dimensions %} coalesce( - {%- for metric_name in metric_tree.parent_set %} - {{metric_name}}__final.{{ calendar_dim }}{%- if not loop.last -%},{% endif %} - {%- if metric_tree.parent_set | length == 1 -%} + {%- for model_name, model_values in models_grouping.items() %} + {{model_name}}__final.{{ calendar_dim }}{%- if not loop.last -%},{% endif %} + {%- if models_grouping | length == 1 -%} , NULL {%- endif -%} {% endfor %} @@ -36,9 +36,9 @@ {% endfor %} {%- for period in relevant_periods %} coalesce( - {%- for metric_name in metric_tree.parent_set %} - {{metric_name}}__final.date_{{ period }} {%- if not loop.last -%},{% endif %} - {%- if metric_tree.parent_set | length == 1 %} + {%- for model_name, model_values in models_grouping.items() %} + {{model_name}}__final.date_{{ period }} {%- if not loop.last -%},{% endif %} + {%- if models_grouping | length == 1 %} , NULL {%- endif -%} {% endfor %} @@ -46,9 +46,9 @@ {%- endfor %} {%- for dim in dimensions %} coalesce( - {%- for metric_name in metric_tree.parent_set %} - {{metric_name}}__final.{{ dim }} {%- if not loop.last -%},{% endif %} - {%- if metric_tree.parent_set | length == 1 %} + {%- for model_name, model_values in models_grouping.items() %} + {{model_name}}__final.{{ dim }} {%- if not loop.last -%},{% endif %} + {%- if models_grouping | length == 1 %} , NULL {%- endif -%} {% endfor %} @@ -63,12 +63,12 @@ {%- endif %} {%- endfor %} {#- Loop through leaf metric list -#} - {% for metric_name in metric_tree.parent_set %} + {% for model_name, model_values in models_grouping.items() %} {%- if loop.first %} - from {{ metric_name }}__final + from {{ model_name }}__final {%- else %} {%- if grain %} - full outer join {{metric_name}}__final + full outer join {{model_name}}__final using ( date_{{grain}} {%- for calendar_dim in calendar_dimensions %} @@ -80,7 +80,7 @@ ) {%- else -%} {% if dimension_count != 0 %} - full outer join {{metric_name}}__final + full outer join {{model_name}}__final using ( {%- for calendar_dim in calendar_dimensions -%} {%- if not loop.first -%},{%- endif -%} {{ calendar_dim }} @@ -97,7 +97,7 @@ {%- endfor -%} ) {%- elif dimension_count == 0 %} - cross join {{metric_name}}__final + cross join {{model_name}}__final {%- endif %} {%- endif %} {%- endif -%} @@ -115,11 +115,11 @@ {%- else %} join_metrics__{{previous_cte_number}}.* {%- endif %} - {%- for metric in metric_tree.expression_set %} - {%- if metric_tree.ordered_expression_set[metric] == cte_number %} + {%- for metric_name in metric_tree.derived_set %} + {%- if metric_tree.ordered_derived_set[metric_name] == cte_number %} {#- this logic will parse an expression for divisions signs (/) and wrap all divisors in nullif functions to prevent divide by zero -#} {#- "1 / 2 / 3 / ... / N" results in "1 / nullif(2, 0) / nullif(3, 0) / ... / nullif(N, 0)" -#} - {%- set metric_expression = metrics_dictionary[metric].expression %} + {%- set metric_expression = metrics_dictionary[metric_name].expression %} {%- if "/" in metric_expression -%} {%- set split_division_metric = metric_expression.split('/') -%} {%- set dividend = split_division_metric[0] -%} @@ -128,7 +128,7 @@ {%- else -%} {%- set expression = metric_expression -%} {%- endif %} - , ({{ expression | replace(".metric_value","") }}) as {{ metrics_dictionary[metric].name }} + , ({{ expression | replace(".metric_value","") }}) as {{ metrics_dictionary[metric_name].name }} {%- endif -%} {%- endfor -%} {% if loop.first %} @@ -161,11 +161,11 @@ {{ dim }}, {%- endfor %} - {%- for metric in metric_tree.parent_set|list + metric_tree.expression_set|list %} - {{metric}}{%- if not loop.last -%}, {%- endif -%} + {%- for metric_name in metric_tree.parent_set|list + metric_tree.derived_set|list %} + {{metric_name}}{%- if not loop.last -%}, {%- endif -%} {%- endfor %} - {%- if metric_tree.expression_set | length == 0 %} + {%- if metric_tree.derived_set | length == 0 %} from first_join_metrics {%- else %} from join_metrics__999 diff --git a/macros/sql_gen/gen_metric_cte.sql b/macros/sql_gen/gen_metric_cte.sql index 1ef6e06b..8b197035 100644 --- a/macros/sql_gen/gen_metric_cte.sql +++ b/macros/sql_gen/gen_metric_cte.sql @@ -1,21 +1,12 @@ -{%- macro gen_metric_cte(metric_name, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, treat_null_values_as_zero) -%} - {{ return(adapter.dispatch('gen_metric_cte', 'metrics')(metric_name, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, treat_null_values_as_zero)) }} +{%- macro gen_metric_cte(metrics_dictionary, model_name, model_values, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions) -%} + {{ return(adapter.dispatch('gen_metric_cte', 'metrics')(metrics_dictionary, model_name, model_values, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions)) }} {%- endmacro -%} -{%- macro default__gen_metric_cte(metric_name, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, treat_null_values_as_zero) %} -{%- set combined_dimensions = calendar_dimensions | list + dimensions | list -%} +{%- macro default__gen_metric_cte(metrics_dictionary, model_name, model_values, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions) %} -, {{metric_name}}__final as ( +{%- set combined_dimensions = calendar_dimensions | list + dimensions | list -%} +, {{model_name}}__final as ( {# #} - {%- if not treat_null_values_as_zero -%} - {%- set metric_val = metric_name -%} - {%- else -%} - {%- if target.name == 'databricks' -%} - {%- set metric_val = "cast(coalesce(" ~ metric_name ~ ", 0) as numeric) as " ~ metric_name -%} - {%- else -%} - {%- set metric_val = "coalesce(" ~ metric_name ~ ", 0) as " ~ metric_name -%} - {%- endif %} - {%- endif %} select {%- if grain %} parent_metric_cte.date_{{grain}}, @@ -33,11 +24,20 @@ {%- for dim in dimensions %} parent_metric_cte.{{ dim }}, {%- endfor %} - {{ metric_val }} - + + {%- for metric_name in model_values.metric_names -%} + {# TODO: coalesce based on the value. Need to bring this config #} + {%- if not metrics_dictionary[metric_name].get("config").get("treat_null_values_as_zero", True) %} + {{ metric_name }} + {%- else %} + coalesce({{ metric_name }}, 0) as {{ metric_name }} + {%- endif %} + {%- if not loop.last-%},{%endif%} + {%- endfor %} + {%- if secondary_calculations | length > 0 %} - from {{metric_name}}__spine_time as parent_metric_cte - left outer join {{metric_name}}__aggregate + from {{model_name}}__spine_time as parent_metric_cte + left outer join {{model_name}}__aggregate using (date_{{grain}} {%- if combined_dimensions | length > 0 -%}, {{ combined_dimensions | join(", ") }} {%-endif-%} ) {% if not start_date or not end_date -%} @@ -46,31 +46,31 @@ parent_metric_cte.date_{{grain}} >= ( select min(case when has_data then date_{{grain}} end) - from {{metric_name}}__aggregate + from {{model_name}}__aggregate ) and parent_metric_cte.date_{{grain}} <= ( select max(case when has_data then date_{{grain}} end) - from {{metric_name}}__aggregate + from {{model_name}}__aggregate ) {% elif not start_date and end_date -%} parent_metric_cte.date_{{grain}} >= ( select min(case when has_data then date_{{grain}} end) - from {{metric_name}}__aggregate + from {{model_name}}__aggregate ) {% elif start_date and not end_date -%} parent_metric_cte.date_{{grain}} <= ( select max(case when has_data then date_{{grain}} end) - from {{metric_name}}__aggregate + from {{model_name}}__aggregate ) {%- endif %} ) {%- endif %} {%- else %} - from {{metric_name}}__aggregate as parent_metric_cte + from {{model_name}}__aggregate as parent_metric_cte {%- endif %} ) diff --git a/macros/sql_gen/gen_property_to_aggregate.sql b/macros/sql_gen/gen_property_to_aggregate.sql index 65008865..1f6f3f61 100644 --- a/macros/sql_gen/gen_property_to_aggregate.sql +++ b/macros/sql_gen/gen_property_to_aggregate.sql @@ -4,13 +4,13 @@ {% macro default__gen_property_to_aggregate(metric_dictionary, grain, dimensions, calendar_dimensions) %} {% if metric_dictionary.calculation_method == 'median' -%} - {{ return(adapter.dispatch('property_to_aggregate_median', 'metrics')(metric_dictionary.expression, grain, dimensions, calendar_dimensions)) }} + {{ return(adapter.dispatch('property_to_aggregate_median', 'metrics')(metric_dictionary, grain, dimensions, calendar_dimensions)) }} {% elif metric_dictionary.calculation_method == 'count' -%} - {{ return(adapter.dispatch('property_to_aggregate_count', 'metrics')()) }} + {{ return(adapter.dispatch('property_to_aggregate_count', 'metrics')(metric_dictionary)) }} {% elif metric_dictionary.expression and metric_dictionary.expression | replace('*', '') | trim != '' %} - {{ return(adapter.dispatch('property_to_aggregate_default', 'metrics')(metric_dictionary.expression)) }} + {{ return(adapter.dispatch('property_to_aggregate_default', 'metrics')(metric_dictionary)) }} {% else %} {%- do exceptions.raise_compiler_error("Expression to aggregate is required for non-count aggregation in metric `" ~ metric_dictionary.name ~ "`") -%} @@ -18,13 +18,13 @@ {%- endmacro -%} -{% macro default__property_to_aggregate_median(expression, grain, dimensions, calendar_dimensions) %} - ({{expression }}) as property_to_aggregate +{% macro default__property_to_aggregate_median(metric_dictionary, grain, dimensions, calendar_dimensions) %} + ({{metric_dictionary.expression }}) as property_to_aggregate__{{metric_dictionary.name}} {%- endmacro -%} -{% macro bigquery__property_to_aggregate_median(expression, grain, dimensions, calendar_dimensions) %} +{% macro bigquery__property_to_aggregate_median(metric_dictionary, grain, dimensions, calendar_dimensions) %} - percentile_cont({{expression }}, 0.5) over ( + percentile_cont({{metric_dictionary.expression }}, 0.5) over ( {% if grain or dimensions | length > 0 or calendar_dimensions | length > 0 -%} partition by {% if grain -%} @@ -45,14 +45,14 @@ {%- endif -%} {%- endfor %} {%- endif %} - ) as property_to_aggregate + ) as property_to_aggregate__{{metric_dictionary.name}} {%- endmacro -%} -{% macro default__property_to_aggregate_count() %} - 1 as property_to_aggregate +{% macro default__property_to_aggregate_count(metric_dictionary) %} + 1 as property_to_aggregate__{{metric_dictionary.name}} {%- endmacro -%} -{% macro default__property_to_aggregate_default(expression) %} - ({{expression }}) as property_to_aggregate +{% macro default__property_to_aggregate_default(metric_dictionary) %} + ({{metric_dictionary.expression }}) as property_to_aggregate__{{metric_dictionary.name}} {%- endmacro -%} \ No newline at end of file diff --git a/macros/sql_gen/gen_spine_time_cte.sql b/macros/sql_gen/gen_spine_time_cte.sql index 529d6126..48df06cb 100644 --- a/macros/sql_gen/gen_spine_time_cte.sql +++ b/macros/sql_gen/gen_spine_time_cte.sql @@ -1,10 +1,10 @@ -{%- macro gen_spine_time_cte(metric_name, grain, dimensions, secondary_calculations, relevant_periods, calendar_dimensions, dimensions_provided) -%} - {{ return(adapter.dispatch('gen_spine_time_cte', 'metrics')(metric_name, grain, dimensions, secondary_calculations, relevant_periods, calendar_dimensions, dimensions_provided)) }} +{%- macro gen_spine_time_cte(model_name, grain, dimensions, secondary_calculations, relevant_periods, calendar_dimensions, dimensions_provided) -%} + {{ return(adapter.dispatch('gen_spine_time_cte', 'metrics')(model_name, grain, dimensions, secondary_calculations, relevant_periods, calendar_dimensions, dimensions_provided)) }} {%- endmacro -%} -{% macro default__gen_spine_time_cte(metric_name, grain, dimensions, secondary_calculations, relevant_periods, calendar_dimensions, dimensions_provided) %} +{% macro default__gen_spine_time_cte(model_name, grain, dimensions, secondary_calculations, relevant_periods, calendar_dimensions, dimensions_provided) %} -, {{metric_name}}__spine_time as ( +, {{model_name}}__spine_time as ( select calendar.date_{{grain}} @@ -19,11 +19,11 @@ , calendar.{{ calendar_dim }} {%- endfor %} {%- for dim in dimensions %} - , {{metric_name}}__dims.{{ dim }} + , {{model_name}}__dims.{{ dim }} {%- endfor %} from calendar {%- if dimensions_provided %} - cross join {{metric_name}}__dims + cross join {{model_name}}__dims {%- endif %} {{ metrics.gen_group_by(grain,dimensions,calendar_dimensions,relevant_periods) }} diff --git a/macros/variables/get_faux_metric_tree.sql b/macros/variables/get_faux_metric_tree.sql index c9722e93..bc08fe6a 100644 --- a/macros/variables/get_faux_metric_tree.sql +++ b/macros/variables/get_faux_metric_tree.sql @@ -2,9 +2,9 @@ {%- set metric_tree = {'full_set':[]} %} {%- do metric_tree.update({'parent_set':[]}) -%} - {%- do metric_tree.update({'expression_set':[]}) -%} + {%- do metric_tree.update({'derived_set':[]}) -%} {%- do metric_tree.update({'base_set':metric_list}) -%} - {%- do metric_tree.update({'ordered_expression_set':{}}) -%} + {%- do metric_tree.update({'ordered_derived_set':{}}) -%} {% for metric_name in metric_list %} {% set metric_definition = develop_yml[metric_name]%} @@ -13,21 +13,21 @@ {%- do metric_tree.update({'full_set':set(metric_tree['full_set'])}) -%} {%- do metric_tree.update({'parent_set':set(metric_tree['parent_set'])}) -%} - {%- do metric_tree.update({'expression_set':set(metric_tree['expression_set'])}) -%} + {%- do metric_tree.update({'derived_set':set(metric_tree['derived_set'])}) -%} {% for metric_name in metric_tree['parent_set']|unique%} - {%- do metric_tree['ordered_expression_set'].pop(metric_name) -%} + {%- do metric_tree['ordered_derived_set'].pop(metric_name) -%} {% endfor %} {# This section overrides the derived set by ordering the metrics on their depth so they can be correctly referenced in the downstream sql query #} {% set ordered_expression_list = []%} - {% for item in metric_tree['ordered_expression_set']|dictsort(false, 'value') %} - {% if item[0] in metric_tree["expression_set"]%} + {% for item in metric_tree['ordered_derived_set']|dictsort(false, 'value') %} + {% if item[0] in metric_tree["derived_set"]%} {% do ordered_expression_list.append(item[0])%} {% endif %} {% endfor %} - {%- do metric_tree.update({'expression_set':ordered_expression_list}) -%} + {%- do metric_tree.update({'derived_set':ordered_expression_list}) -%} {%- do return(metric_tree) -%} diff --git a/macros/variables/get_metric_definition.sql b/macros/variables/get_metric_definition.sql index 965ae106..6f18acf4 100644 --- a/macros/variables/get_metric_definition.sql +++ b/macros/variables/get_metric_definition.sql @@ -11,6 +11,7 @@ {% do metrics_dictionary_dict.update({'config': metric_definition.config})%} {% if metric_definition.calculation_method != 'derived' %} {% set metric_model_name = metrics.get_metric_model_name(metric_model=metric_definition.model) %} + {% do metrics_dictionary_dict.update({'metric_model_name': metric_model_name }) %} {% do metrics_dictionary_dict.update({'metric_model': metrics.get_model_relation(metric_model_name, metric_name)}) %} {% endif %} diff --git a/macros/variables/get_metric_tree.sql b/macros/variables/get_metric_tree.sql index 9d88bbdd..c61820af 100644 --- a/macros/variables/get_metric_tree.sql +++ b/macros/variables/get_metric_tree.sql @@ -13,14 +13,14 @@ are both upstream of Metric C AND not derived metrics themselves. #} {%- do metric_tree.update({'parent_set':[]}) -%} {# The derived set is a list of derived metrics. This includes all derived metrics referenced in the macro itself OR upstream of the metrics referenced in the macro #} -{%- do metric_tree.update({'expression_set':[]}) -%} +{%- do metric_tree.update({'derived_set':[]}) -%} {# The base set is the list of metrics that are provided into the macro #} {%- do metric_tree.update({'base_set':[]}) -%} {# The ordered derived set is the list of derived metrics that are ordered based on their node depth. So if Metric C were downstream of Metric A and B, which were also derived metrics, Metric C would have the value of 999 (max depth) and A and B would have 998, representing that they are one depth upstream #} -{%- do metric_tree.update({'ordered_expression_set':{}}) -%} +{%- do metric_tree.update({'ordered_derived_set':{}}) -%} {% set base_set_list = []%} {% for metric in metric_list %} @@ -32,7 +32,7 @@ are one depth upstream #} {# Now we will iterate over the metric tree and make it a unique list to account for duplicates #} {% set full_set = [] %} {% set parent_set = [] %} -{% set expression_set = [] %} +{% set derived_set = [] %} {% set base_set = [] %} {% for metric_name in metric_tree['full_set']|unique%} @@ -45,24 +45,24 @@ are one depth upstream #} {% endfor %} {%- do metric_tree.update({'parent_set':parent_set}) -%} -{% for metric_name in metric_tree['expression_set']|unique%} - {% do expression_set.append(metric_name)%} +{% for metric_name in metric_tree['derived_set']|unique%} + {% do derived_set.append(metric_name)%} {% endfor %} -{%- do metric_tree.update({'expression_set':expression_set}) -%} +{%- do metric_tree.update({'derived_set':derived_set}) -%} {% for metric in metric_tree['parent_set']|unique%} - {%- do metric_tree['ordered_expression_set'].pop(metric) -%} + {%- do metric_tree['ordered_derived_set'].pop(metric) -%} {% endfor %} {# This section overrides the derived set by ordering the metrics on their depth so they can be correctly referenced in the downstream sql query #} {% set ordered_expression_list = []%} -{% for item in metric_tree['ordered_expression_set']|dictsort(false, 'value') %} - {% if item[0] in metric_tree["expression_set"]%} +{% for item in metric_tree['ordered_derived_set']|dictsort(false, 'value') %} + {% if item[0] in metric_tree["derived_set"]%} {% do ordered_expression_list.append(item[0])%} {% endif %} {% endfor %} -{%- do metric_tree.update({'expression_set':ordered_expression_list}) -%} +{%- do metric_tree.update({'derived_set':ordered_expression_list}) -%} {%- do return(metric_tree) -%} diff --git a/macros/variables/get_model_group.sql b/macros/variables/get_model_group.sql new file mode 100644 index 00000000..0cee8bd1 --- /dev/null +++ b/macros/variables/get_model_group.sql @@ -0,0 +1,69 @@ +{%- macro get_model_group(models_grouping, metric_model, metric_model_name, metric_name, metric_timestamp=none, metric_filters=none, metric_window=none) -%} + +{#- +This macro is called from get_models_grouping in order to calculate +the group for each model based on the inputs. This allows us to reduce +the complexity of the aforementioned macro because there is a factorial +combination of possibilities based on the inputs, minus some combinations +that are invalid. + +By factorial, we mean that the three potential inputs can be combined in +a multitude of different ways in order to calculate the group. The potential +combinations are: + - timestamp + - filters + - timestamp + window + - timestamp + filters + - timestamp + filters + window + -#} + + {% set metric_model_list = [metric_model_name] %} + + {% if metric_timestamp %} + {% set timestamp_list = [ + metric_timestamp | lower + ]%} + {% else %} + {% set timestamp_list = [] %} + {% endif %} + + {% if metric_window %} + {% set window_list = [ + metric_window.count | lower + ,metric_window.period | lower + ]%} + {% else %} + {% set window_list = [] %} + {% endif %} + + {% if metric_filters %} + {% set filter_list = [] %} + {% for filter in metric_filters %} + {% do filter_list.append(filter.field | lower)%} + {% do filter_list.append(filter.operator | lower)%} + {% do filter_list.append(filter.value | lower)%} + {% endfor %} + {% else %} + {% set filter_list = [] %} + {% endif %} + + {% set group_list = (metric_model_list + timestamp_list + window_list + filter_list) | sort %} + {% set group_name = 'model_' ~ local_md5(group_list | join('_')) %} + + {% if not models_grouping[group_name] %} + {% do models_grouping.update({group_name:{}})%} + {% do models_grouping[group_name].update({'metric_names':{}})%} + {% do models_grouping[group_name].update({'metric_model':metric_model})%} + {% do models_grouping[group_name].update({'timestamp':metric_timestamp})%} + {% do models_grouping[group_name].update({'filters':metric_filters})%} + {% do models_grouping[group_name].update({'window':metric_window})%} + {% do models_grouping[group_name].update({'metric_names':[metric_name]})%} + {% else %} + {% set metric_names = models_grouping[group_name]['metric_names'] %} + {% do metric_names.append(metric_name)%} + {% do models_grouping[group_name].update({'metric_names':metric_names})%} + {% endif %} + + {% do return(metrics_grouping) %} + +{%- endmacro -%} \ No newline at end of file diff --git a/macros/variables/get_models_grouping.sql b/macros/variables/get_models_grouping.sql new file mode 100644 index 00000000..f8a7bf54 --- /dev/null +++ b/macros/variables/get_models_grouping.sql @@ -0,0 +1,54 @@ +{%- macro get_models_grouping(metric_tree, metrics_dictionary) -%} +{#- +The purpose of this macro is to create a dictionary that can be used by +gen_base_query and gen_aggregate_query in order to intelligently group +metrics together on whether they can be queried in the same query. These +will be grouped together with a unique model name as the key and the value +containing the list of the metrics. This is complicated because we allow +different properties that affect the base query, so we can't do a single +grouping based on model. As such, if a metric contains one of these properties +we have to create a group for that specific combination. + +The properties that cause us to group the metric seperately are: + - windows + - filters + - timestamp fields + +In order to ensure consistency, we will also include those values in the +dictionary so we can reference them from the metrics grouping (ie a single +location) instead of from a randomly selected metric in the list of metrics. + +An example output looks like: +{ + 'model_4f977327f02b5c04af4337f54ed81a17': { + 'metric_names':['metric_a','metric_b'], + 'metric_timestamp': order_date, + 'metric_filters':[ + MetricFilter(field='had_discount', operator='is', value='true'), + MetricFilter(field='order_country', operator='=', value='CA') + ] + 'metric_window': MetricTime(count=14, period=) + } +} + -#} + + {% set models_grouping = {} %} + + {% for metric_name in metric_tree.parent_set %} + {% set metric_dictionary = metrics_dictionary[metric_name] %} + + {% set models_grouping = metrics.get_model_group( + models_grouping=models_grouping, + metric_model=metric_dictionary.metric_model, + metric_model_name=metric_dictionary.metric_model_name, + metric_name=metric_dictionary.name, + metric_timestamp=metric_dictionary.timestamp, + metric_filters=metric_dictionary.filters, + metric_window=metric_dictionary.window + ) %} + + {% endfor %} + + {% do return(models_grouping) %} + +{%- endmacro -%} \ No newline at end of file diff --git a/macros/variables/update_faux_metric_tree.sql b/macros/variables/update_faux_metric_tree.sql index 132f3510..9df837f4 100644 --- a/macros/variables/update_faux_metric_tree.sql +++ b/macros/variables/update_faux_metric_tree.sql @@ -13,7 +13,7 @@ {# Here we're starting with the highest level and assigning the metric tree that first level value. This is used before de-duping in get_faux_metric_tree #} - {%- do metric_tree["ordered_expression_set"].update({metric_definition.name:metric_count}) -%} + {%- do metric_tree["ordered_derived_set"].update({metric_definition.name:metric_count}) -%} {%- set metric_count = metric_count - 1 -%} {# Here we create two sets, sets being the same as lists but they account for uniqueness. @@ -70,8 +70,8 @@ {%- endif -%} - {%- set expression_set_plus = ( metric_tree["full_set"] | reject('in',metric_tree["parent_set"]) | list) -%} - {%- do metric_tree.update({'expression_set':expression_set_plus}) -%} + {%- set derived_set_plus = ( metric_tree["full_set"] | reject('in',metric_tree["parent_set"]) | list) -%} + {%- do metric_tree.update({'derived_set':derived_set_plus}) -%} {%- do return(metric_tree) -%} diff --git a/macros/variables/update_metric_tree.sql b/macros/variables/update_metric_tree.sql index 33440589..d483c877 100644 --- a/macros/variables/update_metric_tree.sql +++ b/macros/variables/update_metric_tree.sql @@ -10,7 +10,7 @@ {%- endif -%} - {%- do metric_tree["ordered_expression_set"].update({metric.name:metric_count}) -%} + {%- do metric_tree["ordered_derived_set"].update({metric.name:metric_count}) -%} {%- set metric_count = metric_count - 1 -%} {# Here we create two sets, sets being the same as lists but they account for uniqueness. @@ -61,8 +61,8 @@ {%- endif -%} - {%- set expression_set_plus = ( metric_tree["full_set"] | reject('in',metric_tree["parent_set"]) | list) -%} - {%- do metric_tree.update({'expression_set':expression_set_plus}) -%} + {%- set derived_set_plus = ( metric_tree["full_set"] | reject('in',metric_tree["parent_set"]) | list) -%} + {%- do metric_tree.update({'derived_set':derived_set_plus}) -%} {%- do return(metric_tree) -%} From 0398e24ff06fb6a50d579dc223ebff9157980d3d Mon Sep 17 00:00:00 2001 From: Callum McCann Date: Thu, 12 Jan 2023 20:54:47 -0600 Subject: [PATCH 2/8] removing calculate code --- dev-requirements.txt | 26 +++++++++++++------------- macros/calculate.sql | 3 --- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index fdf823c1..971f076f 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,21 +2,21 @@ pytest pytest-dotenv # Bleeding edge -git+https://github.com/dbt-labs/dbt-core.git@main#egg=dbt-tests-adapter&subdirectory=tests/adapter -git+https://github.com/dbt-labs/dbt-core.git@main#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-core.git@main#egg=dbt-postgres&subdirectory=plugins/postgres -git+https://github.com/dbt-labs/dbt-redshift.git -git+https://github.com/dbt-labs/dbt-snowflake.git -git+https://github.com/dbt-labs/dbt-bigquery.git -git+https://github.com/databricks/dbt-databricks.git +# git+https://github.com/dbt-labs/dbt-core.git@main#egg=dbt-tests-adapter&subdirectory=tests/adapter +# git+https://github.com/dbt-labs/dbt-core.git@main#egg=dbt-core&subdirectory=core +# git+https://github.com/dbt-labs/dbt-core.git@main#egg=dbt-postgres&subdirectory=plugins/postgres +# git+https://github.com/dbt-labs/dbt-redshift.git +# git+https://github.com/dbt-labs/dbt-snowflake.git +# git+https://github.com/dbt-labs/dbt-bigquery.git +# git+https://github.com/databricks/dbt-databricks.git # Most recent release candidates -# dbt-tests-adapter==1.3.0 -# dbt-core==1.3.0 -# dbt-redshift==1.3.0 -# dbt-snowflake==1.3.0 -# dbt-bigquery==1.3.0 -# dbt-databricks==1.3.0 +dbt-tests-adapter==1.4.0rc1 +dbt-core==1.4.0rc1 +dbt-redshift==1.4.0rc1 +dbt-snowflake==1.4.0rc1 +dbt-bigquery==1.4.0rc1 +# dbt-databricks==1.4.0 # Most recent stable release # dbt-tests-adapter==1.3.0 diff --git a/macros/calculate.sql b/macros/calculate.sql index 66af10ee..bcd79c64 100644 --- a/macros/calculate.sql +++ b/macros/calculate.sql @@ -20,9 +20,6 @@ {#- Here we are creating the metrics dictionary which contains all of the metric information needed for sql gen. -#} {%- set metrics_dictionary = metrics.get_metrics_dictionary(metric_tree=metric_tree) -%} - {#- Here we are creating the metric grouping that we use to determine if metrics can be pulled from the same base query -#} - {# {%- set metrics_grouping = metrics.get_metrics_grouping(metric_tree=metric_tree,metrics_dictionary=metrics_dictionary) -%} #} - {#- ############ VALIDATION - Make sure everything is good! ############ -#} From a9ac0850d3dc0d5a9bb563f99c7436128176ee8b Mon Sep 17 00:00:00 2001 From: Callum McCann Date: Tue, 17 Jan 2023 09:24:45 -0600 Subject: [PATCH 3/8] adding median allowlist --- .changes/unreleased/Under the Hood-20230117-092325.yaml | 7 +++++++ macros/variables/get_metric_allowlist.sql | 1 + 2 files changed, 8 insertions(+) create mode 100644 .changes/unreleased/Under the Hood-20230117-092325.yaml diff --git a/.changes/unreleased/Under the Hood-20230117-092325.yaml b/.changes/unreleased/Under the Hood-20230117-092325.yaml new file mode 100644 index 00000000..2de58cd9 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20230117-092325.yaml @@ -0,0 +1,7 @@ +kind: Under the Hood +body: Adding grouping for query generation +time: 2023-01-17T09:23:25.796327-06:00 +custom: + Author: callum-mcdata + Issue: "114" + PR: "211" diff --git a/macros/variables/get_metric_allowlist.sql b/macros/variables/get_metric_allowlist.sql index 7c6c8900..9896391e 100644 --- a/macros/variables/get_metric_allowlist.sql +++ b/macros/variables/get_metric_allowlist.sql @@ -6,6 +6,7 @@ {# Keys are the primary aggregation, values are the permitted aggregations to run in secondary calculations. #} {% do return ({ "average": ['min', 'max'], + "median": ['min', 'max'], "count": ['min', 'max', 'sum', 'average'], "count_distinct": ['min', 'max', 'sum', 'average'], "sum": ['min', 'max', 'sum', 'average'], From ff8eabf922cad14ac473877fb4bdc7cb705b242f Mon Sep 17 00:00:00 2001 From: Callum McCann Date: Tue, 17 Jan 2023 09:51:41 -0600 Subject: [PATCH 4/8] adding secondary calc test --- .../test_invalid_secondary_calculations.py | 184 ++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 tests/functional/metric_options/secondary_calculations/test_invalid_secondary_calculations.py diff --git a/tests/functional/metric_options/secondary_calculations/test_invalid_secondary_calculations.py b/tests/functional/metric_options/secondary_calculations/test_invalid_secondary_calculations.py new file mode 100644 index 00000000..9fbab69c --- /dev/null +++ b/tests/functional/metric_options/secondary_calculations/test_invalid_secondary_calculations.py @@ -0,0 +1,184 @@ +from struct import pack +import os +import pytest +from dbt.tests.util import run_dbt + +# our file contents +from tests.functional.fixtures import ( + fact_orders_source_csv, + fact_orders_sql, + fact_orders_yml, + custom_calendar_sql + +) + +# models/avg_metric.sql +avg_metric_sql = """ +select * +from +{{ metrics.calculate(metric('avg_metric'), + grain='week', + secondary_calculations=[metrics.rolling(aggregate="sum",interval=2)] + ) +}} +""" + +# models/avg_metric.yml +avg_metric_yml = """ +version: 2 +models: + - name: avg_metric + +metrics: + - name: avg_metric + model: ref('fact_orders') + label: Count Distinct + timestamp: order_date + time_grains: [day, week, month] + calculation_method: average + expression: customer_id + dimensions: + - had_discount + - order_country +""" + +class TestInvalidAverageSecondaryCalc: + + # configuration in dbt_project.yml + # setting bigquery as table to get around query complexity + # resource constraints with compunding views + if os.getenv('dbt_target') == 'bigquery': + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "name": "example", + "models": {"+materialized": "table"}, + } + else: + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "name": "example", + "models": {"+materialized": "view"} + } + + # install current repo as package + @pytest.fixture(scope="class") + def packages(self): + return { + "packages": [ + {"local": os.getcwd()} + ] + } + + # everything that goes in the "seeds" directory + @pytest.fixture(scope="class") + def seeds(self): + return { + "fact_orders_source.csv": fact_orders_source_csv, + } + + # everything that goes in the "models" directory + @pytest.fixture(scope="class") + def models(self): + return { + "fact_orders.sql": fact_orders_sql, + "fact_orders.yml": fact_orders_yml, + "avg_metric.sql": avg_metric_sql, + "avg_metric.yml": avg_metric_yml + } + + def test_invalid_average_metric(self,project,): + # running deps to install package + run_dbt(["deps"]) + # seed seeds + run_dbt(["seed"]) + # initial run + run_dbt(["run"], expect_pass = False) + + +# models/median_metric.sql +median_metric_sql = """ +select * +from +{{ metrics.calculate(metric('median_metric'), + grain='week', + secondary_calculations=[ + metrics.rolling(aggregate="sum",interval=2) + ] + ) +}} +""" + +# models/median_metric.yml +median_metric_yml = """ +version: 2 +models: + - name: median_metric + +metrics: + - name: median_metric + model: ref('fact_orders') + label: Count Distinct + timestamp: order_date + time_grains: [day, week, month] + calculation_method: median + expression: customer_id + dimensions: + - had_discount + - order_country +""" + +class TestInvalidMedianSecondaryCalc: + + # configuration in dbt_project.yml + # setting bigquery as table to get around query complexity + # resource constraints with compunding views + if os.getenv('dbt_target') == 'bigquery': + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "name": "example", + "models": {"+materialized": "table"}, + } + else: + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "name": "example", + "models": {"+materialized": "view"} + } + + # install current repo as package + @pytest.fixture(scope="class") + def packages(self): + return { + "packages": [ + {"local": os.getcwd()} + ] + } + + # everything that goes in the "seeds" directory + @pytest.fixture(scope="class") + def seeds(self): + return { + "fact_orders_source.csv": fact_orders_source_csv + } + + # everything that goes in the "models" directory + @pytest.fixture(scope="class") + def models(self): + return { + "fact_orders.sql": fact_orders_sql, + "fact_orders.yml": fact_orders_yml, + "median_metric.sql": median_metric_sql, + "median_metric.yml": median_metric_yml + } + + def test_invalid_median_metric(self,project,): + # running deps to install package + run_dbt(["deps"]) + # seed seeds + run_dbt(["seed"]) + # initial run + run_dbt(["run"], expect_pass = False) \ No newline at end of file From 386d1fa246378d1e652ecc054150fcac8cb315be Mon Sep 17 00:00:00 2001 From: Callum McCann Date: Tue, 17 Jan 2023 12:53:16 -0600 Subject: [PATCH 5/8] fixing variable names --- macros/get_metric_sql.sql | 6 ++-- macros/sql_gen/build_metric_sql.sql | 14 ++++---- macros/sql_gen/gen_aggregate_cte.sql | 16 ++++----- macros/sql_gen/gen_base_query.sql | 16 ++++----- macros/sql_gen/gen_calendar_join.sql | 44 +++++++++++------------ macros/sql_gen/gen_dimensions_cte.sql | 10 +++--- macros/sql_gen/gen_final_cte.sql | 4 +-- macros/sql_gen/gen_joined_metrics_cte.sql | 22 ++++++------ macros/sql_gen/gen_metric_cte.sql | 24 ++++++------- macros/sql_gen/gen_spine_time_cte.sql | 12 +++---- 10 files changed, 84 insertions(+), 84 deletions(-) diff --git a/macros/get_metric_sql.sql b/macros/get_metric_sql.sql index d9f844e7..a98b3651 100644 --- a/macros/get_metric_sql.sql +++ b/macros/get_metric_sql.sql @@ -55,7 +55,7 @@ metrics there are -#} {#- If composite, we begin by looping through each of the metric names that make up the composite metric. -#} -{%- for model_name, model_values in models_grouping.items() -%} +{%- for group_name, group_values in models_grouping.items() -%} {{ metrics.build_metric_sql( metrics_dictionary=metrics_dictionary, @@ -68,8 +68,8 @@ up the composite metric. -#} calendar_dimensions=calendar_dimensions, dimensions_provided=dimensions_provided, total_dimension_count=total_dimension_count, - model_name=model_name, - model_values=model_values + group_name=group_name, + group_values=group_values ) }} diff --git a/macros/sql_gen/build_metric_sql.sql b/macros/sql_gen/build_metric_sql.sql index 93e25572..87f97917 100644 --- a/macros/sql_gen/build_metric_sql.sql +++ b/macros/sql_gen/build_metric_sql.sql @@ -1,4 +1,4 @@ -{%- macro build_metric_sql(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, dimensions_provided, total_dimension_count, model_name, model_values) %} +{%- macro build_metric_sql(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, dimensions_provided, total_dimension_count, group_name, group_values) %} {#- This is the SQL Gen part - we've broken each component out into individual macros -#} {#- We broke this out so it can loop for composite metrics -#} @@ -12,8 +12,8 @@ relevant_periods=relevant_periods, calendar_dimensions=calendar_dimensions, total_dimension_count=total_dimension_count, - model_name=model_name, - model_values=model_values + group_name=group_name, + group_values=group_values ) }} {#- Diverging path for secondary calcs and needing to datespine -#} @@ -22,14 +22,14 @@ {%- if dimensions_provided == true -%} {{ metrics.gen_dimensions_cte( - model_name=model_name, + group_name=group_name, dimensions=dimensions ) }} {%- endif -%} {{ metrics.gen_spine_time_cte( - model_name=model_name, + group_name=group_name, grain=grain, dimensions=dimensions, secondary_calculations=secondary_calculations, @@ -42,8 +42,8 @@ {{ metrics.gen_metric_cte( metrics_dictionary=metrics_dictionary, - model_name=model_name, - model_values=model_values, + group_name=group_name, + group_values=group_values, grain=grain, dimensions=dimensions, secondary_calculations=secondary_calculations, diff --git a/macros/sql_gen/gen_aggregate_cte.sql b/macros/sql_gen/gen_aggregate_cte.sql index 734c7da9..f5474530 100644 --- a/macros/sql_gen/gen_aggregate_cte.sql +++ b/macros/sql_gen/gen_aggregate_cte.sql @@ -1,10 +1,10 @@ -{%- macro gen_aggregate_cte(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, model_name, model_values) -%} - {{ return(adapter.dispatch('gen_aggregate_cte', 'metrics')(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, model_name, model_values)) }} +{%- macro gen_aggregate_cte(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, group_name, group_values) -%} + {{ return(adapter.dispatch('gen_aggregate_cte', 'metrics')(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, group_name, group_values)) }} {%- endmacro -%} -{%- macro default__gen_aggregate_cte(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, model_name, model_values) %} +{%- macro default__gen_aggregate_cte(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, group_name, group_values) %} -, {{model_name}}__aggregate as ( +, {{group_name}}__aggregate as ( {# This is the most important CTE. Instead of joining all relevant information and THEN aggregating, we are instead aggregating from the beginning and then joining downstream for performance. Additionally, we're using a subquery instead @@ -42,7 +42,7 @@ {#- This line performs the relevant aggregation by calling the gen_primary_metric_aggregate macro. Take a look at that one if you're curious -#} - {%- for metric_name in model_values.metric_names -%} + {%- for metric_name in group_values.metric_names -%} {{ metrics.gen_primary_metric_aggregate(metrics_dictionary[metric_name].calculation_method, 'property_to_aggregate__'~metric_name) }} as {{ metric_name }} {%- if not loop.last -%},{%- endif -%} {%- endfor%} @@ -56,8 +56,8 @@ relevant_periods=relevant_periods, calendar_dimensions=calendar_dimensions, total_dimension_count=total_dimension_count, - model_name=model_name, - model_values=model_values + group_name=group_name, + group_values=group_values ) }} ) as base_query @@ -68,7 +68,7 @@ the same windows & filters, we can base the conditional off of the first value in the list because the order doesn't matter. -#} - {%- if model_values.window is not none and grain %} + {%- if group_values.window is not none and grain %} and date_{{grain}} = window_filter_date {%- endif %} {{ metrics.gen_group_by(grain, dimensions, calendar_dimensions, relevant_periods) }} diff --git a/macros/sql_gen/gen_base_query.sql b/macros/sql_gen/gen_base_query.sql index 0f815482..572682db 100644 --- a/macros/sql_gen/gen_base_query.sql +++ b/macros/sql_gen/gen_base_query.sql @@ -1,8 +1,8 @@ -{% macro gen_base_query(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, model_name, model_values) %} - {{ return(adapter.dispatch('gen_base_query', 'metrics')(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, model_name, model_values)) }} +{% macro gen_base_query(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, group_name, group_values) %} + {{ return(adapter.dispatch('gen_base_query', 'metrics')(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, group_name, group_values)) }} {% endmacro %} -{% macro default__gen_base_query(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, model_name, model_values) %} +{% macro default__gen_base_query(metrics_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions, total_dimension_count, group_name, group_values) %} {# This is the "base" CTE which selects the fields we need to correctly calculate the metric. -#} select @@ -12,7 +12,7 @@ the same windows & filters, we can base the conditional off of the first value in the list because the order doesn't matter. -#} - cast(base_model.{{model_values.timestamp}} as date) as metric_date_day, + cast(base_model.{{group_values.timestamp}} as date) as metric_date_day, calendar.date_{{ grain }} as date_{{grain}}, calendar.date_day as window_filter_date, {%- if secondary_calculations | length > 0 %} @@ -28,19 +28,19 @@ {%- for calendar_dim in calendar_dimensions -%} calendar.{{ calendar_dim }}, {%- endfor -%} - {%- for metric_name in model_values.metric_names -%} + {%- for metric_name in group_values.metric_names -%} {{ metrics.gen_property_to_aggregate(metrics_dictionary[metric_name], grain, dimensions, calendar_dimensions) }} {%- if not loop.last -%},{%- endif -%} {%- endfor%} - from {{ model_values.metric_model }} base_model + from {{ group_values.metric_model }} base_model {# -#} {%- if grain or calendar_dimensions|length > 0 -%} - {{ metrics.gen_calendar_join(model_values) }} + {{ metrics.gen_calendar_join(group_values) }} {%- endif -%} {# #} where 1=1 {#- -#} - {{ metrics.gen_filters(model_values, start_date, end_date) }} + {{ metrics.gen_filters(group_values, start_date, end_date) }} {# #} {%- endmacro -%} \ No newline at end of file diff --git a/macros/sql_gen/gen_calendar_join.sql b/macros/sql_gen/gen_calendar_join.sql index 2ee12af4..7869d5d6 100644 --- a/macros/sql_gen/gen_calendar_join.sql +++ b/macros/sql_gen/gen_calendar_join.sql @@ -1,43 +1,43 @@ -{% macro gen_calendar_join(model_values) %} - {{ return(adapter.dispatch('gen_calendar_join', 'metrics')(model_values)) }} +{% macro gen_calendar_join(group_values) %} + {{ return(adapter.dispatch('gen_calendar_join', 'metrics')(group_values)) }} {%- endmacro -%} -{% macro default__gen_calendar_join(model_values) %} +{% macro default__gen_calendar_join(group_values) %} left join calendar - {%- if model_values.window is not none %} - on cast(base_model.{{model_values.timestamp}} as date) > dateadd({{model_values.window.period}}, -{{model_values.window.count}}, calendar.date_day) - and cast(base_model.{{metric_dictionary.timestamp}} as date) <= calendar.date_day + {%- if group_values.window is not none %} + on cast(base_model.{{group_values.timestamp}} as date) > dateadd({{group_values.window.period}}, -{{group_values.window.count}}, calendar.date_day) + and cast(base_model.{{group_values.timestamp}} as date) <= calendar.date_day {%- else %} - on cast(base_model.{{model_values.timestamp}} as date) = calendar.date_day + on cast(base_model.{{group_values.timestamp}} as date) = calendar.date_day {% endif -%} {% endmacro %} -{% macro bigquery__gen_calendar_join(model_values) %} +{% macro bigquery__gen_calendar_join(group_values) %} left join calendar - {%- if model_values.window is not none %} - on cast(base_model.{{model_values.timestamp}} as date) > date_sub(calendar.date_day, interval {{model_values.window.count}} {{model_values.window.period}}) - and cast(base_model.{{model_values.timestamp}} as date) <= calendar.date_day + {%- if group_values.window is not none %} + on cast(base_model.{{group_values.timestamp}} as date) > date_sub(calendar.date_day, interval {{group_values.window.count}} {{group_values.window.period}}) + and cast(base_model.{{group_values.timestamp}} as date) <= calendar.date_day {%- else %} - on cast(base_model.{{model_values.timestamp}} as date) = calendar.date_day + on cast(base_model.{{group_values.timestamp}} as date) = calendar.date_day {% endif -%} {% endmacro %} -{% macro postgres__gen_calendar_join(model_values) %} +{% macro postgres__gen_calendar_join(group_values) %} left join calendar - {%- if model_values.window is not none %} - on cast(base_model.{{model_values.timestamp}} as date) > calendar.date_day - interval '{{model_values.window.count}} {{model_values.window.period}}' - and cast(base_model.{{model_values.timestamp}} as date) <= calendar.date_day + {%- if group_values.window is not none %} + on cast(base_model.{{group_values.timestamp}} as date) > calendar.date_day - interval '{{group_values.window.count}} {{group_values.window.period}}' + and cast(base_model.{{group_values.timestamp}} as date) <= calendar.date_day {%- else %} - on cast(base_model.{{model_values.timestamp}} as date) = calendar.date_day + on cast(base_model.{{group_values.timestamp}} as date) = calendar.date_day {% endif -%} {% endmacro %} -{% macro redshift__gen_calendar_join(model_values) %} +{% macro redshift__gen_calendar_join(group_values) %} left join calendar - {%- if model_values.window is not none %} - on cast(base_model.{{model_values.timestamp}} as date) > dateadd({{model_values.window.period}}, -{{model_values.window.count}}, calendar.date_day) - and cast(base_model.{{model_values.timestamp}} as date) <= calendar.date_day + {%- if group_values.window is not none %} + on cast(base_model.{{group_values.timestamp}} as date) > dateadd({{group_values.window.period}}, -{{group_values.window.count}}, calendar.date_day) + and cast(base_model.{{group_values.timestamp}} as date) <= calendar.date_day {%- else %} - on cast(base_model.{{model_values.timestamp}} as date) = calendar.date_day + on cast(base_model.{{group_values.timestamp}} as date) = calendar.date_day {% endif -%} {% endmacro %} diff --git a/macros/sql_gen/gen_dimensions_cte.sql b/macros/sql_gen/gen_dimensions_cte.sql index a4913e84..a13ef291 100644 --- a/macros/sql_gen/gen_dimensions_cte.sql +++ b/macros/sql_gen/gen_dimensions_cte.sql @@ -1,16 +1,16 @@ -{%- macro gen_dimensions_cte(model_name, dimensions) -%} - {{ return(adapter.dispatch('gen_dimensions_cte', 'metrics')(model_name, dimensions)) }} +{%- macro gen_dimensions_cte(group_name, dimensions) -%} + {{ return(adapter.dispatch('gen_dimensions_cte', 'metrics')(group_name, dimensions)) }} {%- endmacro -%} -{% macro default__gen_dimensions_cte(model_name, dimensions) %} +{% macro default__gen_dimensions_cte(group_name, dimensions) %} -, {{model_name}}__dims as ( +, {{group_name}}__dims as ( select distinct {%- for dim in dimensions %} {{ dim }}{%- if not loop.last -%},{% endif -%} {%- endfor %} - from {{model_name}}__aggregate + from {{group_name}}__aggregate ) {%- endmacro -%} diff --git a/macros/sql_gen/gen_final_cte.sql b/macros/sql_gen/gen_final_cte.sql index 84f61b9c..e08210d6 100644 --- a/macros/sql_gen/gen_final_cte.sql +++ b/macros/sql_gen/gen_final_cte.sql @@ -26,7 +26,7 @@ select {%- if models_grouping| length > 1 or metric_tree['derived_set'] | length > 0 %} from joined_metrics {%- else %} -from {% for model_name, model_values in models_grouping.items()-%}{{model_name}}__final {%-endfor-%} +from {% for group_name, group_values in models_grouping.items()-%}{{group_name}}__final {%-endfor-%} {%- endif %} {# metric where clauses #} {%- if where %} @@ -74,7 +74,7 @@ select {# {%- for metric_name in metric_tree.full_set %} {{metric_name}}{%if not loop.last%},{%endif%} {%- endfor %} #} -from {% for model_name, model_values in models_grouping.items()-%}{{model_name}}__final {%-endfor-%} +from {% for group_name, group_values in models_grouping.items()-%}{{group_name}}__final {%-endfor-%} {%- if where %} where {{ where }} {%- endif -%} diff --git a/macros/sql_gen/gen_joined_metrics_cte.sql b/macros/sql_gen/gen_joined_metrics_cte.sql index 2819c5b1..742bf941 100644 --- a/macros/sql_gen/gen_joined_metrics_cte.sql +++ b/macros/sql_gen/gen_joined_metrics_cte.sql @@ -26,8 +26,8 @@ {%- endif -%} {%- for calendar_dim in calendar_dimensions %} coalesce( - {%- for model_name, model_values in models_grouping.items() %} - {{model_name}}__final.{{ calendar_dim }}{%- if not loop.last -%},{% endif %} + {%- for group_name, group_values in models_grouping.items() %} + {{group_name}}__final.{{ calendar_dim }}{%- if not loop.last -%},{% endif %} {%- if models_grouping | length == 1 -%} , NULL {%- endif -%} @@ -36,8 +36,8 @@ {% endfor %} {%- for period in relevant_periods %} coalesce( - {%- for model_name, model_values in models_grouping.items() %} - {{model_name}}__final.date_{{ period }} {%- if not loop.last -%},{% endif %} + {%- for group_name, group_values in models_grouping.items() %} + {{group_name}}__final.date_{{ period }} {%- if not loop.last -%},{% endif %} {%- if models_grouping | length == 1 %} , NULL {%- endif -%} @@ -46,8 +46,8 @@ {%- endfor %} {%- for dim in dimensions %} coalesce( - {%- for model_name, model_values in models_grouping.items() %} - {{model_name}}__final.{{ dim }} {%- if not loop.last -%},{% endif %} + {%- for group_name, group_values in models_grouping.items() %} + {{group_name}}__final.{{ dim }} {%- if not loop.last -%},{% endif %} {%- if models_grouping | length == 1 %} , NULL {%- endif -%} @@ -63,12 +63,12 @@ {%- endif %} {%- endfor %} {#- Loop through leaf metric list -#} - {% for model_name, model_values in models_grouping.items() %} + {% for group_name, group_values in models_grouping.items() %} {%- if loop.first %} - from {{ model_name }}__final + from {{ group_name }}__final {%- else %} {%- if grain %} - full outer join {{model_name}}__final + full outer join {{group_name}}__final using ( date_{{grain}} {%- for calendar_dim in calendar_dimensions %} @@ -80,7 +80,7 @@ ) {%- else -%} {% if dimension_count != 0 %} - full outer join {{model_name}}__final + full outer join {{group_name}}__final using ( {%- for calendar_dim in calendar_dimensions -%} {%- if not loop.first -%},{%- endif -%} {{ calendar_dim }} @@ -97,7 +97,7 @@ {%- endfor -%} ) {%- elif dimension_count == 0 %} - cross join {{model_name}}__final + cross join {{group_name}}__final {%- endif %} {%- endif %} {%- endif -%} diff --git a/macros/sql_gen/gen_metric_cte.sql b/macros/sql_gen/gen_metric_cte.sql index 8b197035..2e4e5184 100644 --- a/macros/sql_gen/gen_metric_cte.sql +++ b/macros/sql_gen/gen_metric_cte.sql @@ -1,11 +1,11 @@ -{%- macro gen_metric_cte(metrics_dictionary, model_name, model_values, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions) -%} - {{ return(adapter.dispatch('gen_metric_cte', 'metrics')(metrics_dictionary, model_name, model_values, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions)) }} +{%- macro gen_metric_cte(metrics_dictionary, group_name, group_values, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions) -%} + {{ return(adapter.dispatch('gen_metric_cte', 'metrics')(metrics_dictionary, group_name, group_values, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions)) }} {%- endmacro -%} -{%- macro default__gen_metric_cte(metrics_dictionary, model_name, model_values, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions) %} +{%- macro default__gen_metric_cte(metrics_dictionary, group_name, group_values, grain, dimensions, secondary_calculations, start_date, end_date, relevant_periods, calendar_dimensions) %} {%- set combined_dimensions = calendar_dimensions | list + dimensions | list -%} -, {{model_name}}__final as ( +, {{group_name}}__final as ( {# #} select {%- if grain %} @@ -25,7 +25,7 @@ parent_metric_cte.{{ dim }}, {%- endfor %} - {%- for metric_name in model_values.metric_names -%} + {%- for metric_name in group_values.metric_names -%} {# TODO: coalesce based on the value. Need to bring this config #} {%- if not metrics_dictionary[metric_name].get("config").get("treat_null_values_as_zero", True) %} {{ metric_name }} @@ -36,8 +36,8 @@ {%- endfor %} {%- if secondary_calculations | length > 0 %} - from {{model_name}}__spine_time as parent_metric_cte - left outer join {{model_name}}__aggregate + from {{group_name}}__spine_time as parent_metric_cte + left outer join {{group_name}}__aggregate using (date_{{grain}} {%- if combined_dimensions | length > 0 -%}, {{ combined_dimensions | join(", ") }} {%-endif-%} ) {% if not start_date or not end_date -%} @@ -46,31 +46,31 @@ parent_metric_cte.date_{{grain}} >= ( select min(case when has_data then date_{{grain}} end) - from {{model_name}}__aggregate + from {{group_name}}__aggregate ) and parent_metric_cte.date_{{grain}} <= ( select max(case when has_data then date_{{grain}} end) - from {{model_name}}__aggregate + from {{group_name}}__aggregate ) {% elif not start_date and end_date -%} parent_metric_cte.date_{{grain}} >= ( select min(case when has_data then date_{{grain}} end) - from {{model_name}}__aggregate + from {{group_name}}__aggregate ) {% elif start_date and not end_date -%} parent_metric_cte.date_{{grain}} <= ( select max(case when has_data then date_{{grain}} end) - from {{model_name}}__aggregate + from {{group_name}}__aggregate ) {%- endif %} ) {%- endif %} {%- else %} - from {{model_name}}__aggregate as parent_metric_cte + from {{group_name}}__aggregate as parent_metric_cte {%- endif %} ) diff --git a/macros/sql_gen/gen_spine_time_cte.sql b/macros/sql_gen/gen_spine_time_cte.sql index 48df06cb..0346a4c0 100644 --- a/macros/sql_gen/gen_spine_time_cte.sql +++ b/macros/sql_gen/gen_spine_time_cte.sql @@ -1,10 +1,10 @@ -{%- macro gen_spine_time_cte(model_name, grain, dimensions, secondary_calculations, relevant_periods, calendar_dimensions, dimensions_provided) -%} - {{ return(adapter.dispatch('gen_spine_time_cte', 'metrics')(model_name, grain, dimensions, secondary_calculations, relevant_periods, calendar_dimensions, dimensions_provided)) }} +{%- macro gen_spine_time_cte(group_name, grain, dimensions, secondary_calculations, relevant_periods, calendar_dimensions, dimensions_provided) -%} + {{ return(adapter.dispatch('gen_spine_time_cte', 'metrics')(group_name, grain, dimensions, secondary_calculations, relevant_periods, calendar_dimensions, dimensions_provided)) }} {%- endmacro -%} -{% macro default__gen_spine_time_cte(model_name, grain, dimensions, secondary_calculations, relevant_periods, calendar_dimensions, dimensions_provided) %} +{% macro default__gen_spine_time_cte(group_name, grain, dimensions, secondary_calculations, relevant_periods, calendar_dimensions, dimensions_provided) %} -, {{model_name}}__spine_time as ( +, {{group_name}}__spine_time as ( select calendar.date_{{grain}} @@ -19,11 +19,11 @@ , calendar.{{ calendar_dim }} {%- endfor %} {%- for dim in dimensions %} - , {{model_name}}__dims.{{ dim }} + , {{group_name}}__dims.{{ dim }} {%- endfor %} from calendar {%- if dimensions_provided %} - cross join {{model_name}}__dims + cross join {{group_name}}__dims {%- endif %} {{ metrics.gen_group_by(grain,dimensions,calendar_dimensions,relevant_periods) }} From c8cc461c0dc3a170c4c5791b726419f15c2d715d Mon Sep 17 00:00:00 2001 From: Callum McCann Date: Tue, 17 Jan 2023 15:56:06 -0600 Subject: [PATCH 6/8] updating tests --- integration_tests/dbt_project.yml | 2 +- macros/sql_gen/gen_property_to_aggregate.sql | 6 +++--- .../functional/calculation_methods/test_average.py | 1 - .../secondary_calculations/test_period_to_date.py | 13 +++++++------ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 0b56f9c8..b9ea119e 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -6,7 +6,7 @@ version: "1.0.0" config-version: 2 # This setting configures which "profile" dbt uses for this project. -profile: "dbt_metrics_integration_tests_postgres" +profile: "dbt_metrics_integration_tests_bigquery" model-paths: ["models"] analysis-paths: ["analyses"] diff --git a/macros/sql_gen/gen_property_to_aggregate.sql b/macros/sql_gen/gen_property_to_aggregate.sql index 1f6f3f61..f492b11e 100644 --- a/macros/sql_gen/gen_property_to_aggregate.sql +++ b/macros/sql_gen/gen_property_to_aggregate.sql @@ -28,7 +28,7 @@ {% if grain or dimensions | length > 0 or calendar_dimensions | length > 0 -%} partition by {% if grain -%} - calendar_table.date_{{ grain }} + calendar.date_{{ grain }} {%- endif %} {% for dim in dimensions -%} {%- if loop.first and not grain-%} @@ -39,9 +39,9 @@ {%- endfor -%} {% for calendar_dim in calendar_dimensions -%} {%- if loop.first and dimensions | length == 0 and not grain %} - calendar_table.{{ calendar_dim }} + calendar.{{ calendar_dim }} {%else -%} - ,calendar_table.{{ calendar_dim }} + ,calendar.{{ calendar_dim }} {%- endif -%} {%- endfor %} {%- endif %} diff --git a/tests/functional/calculation_methods/test_average.py b/tests/functional/calculation_methods/test_average.py index d231887b..36cccf12 100644 --- a/tests/functional/calculation_methods/test_average.py +++ b/tests/functional/calculation_methods/test_average.py @@ -175,7 +175,6 @@ def test_build_completion(self,project,): - name: base_average_metric_no_time_grain__expected config: column_types: - date_month: date base_average_metric_no_time_grain: FLOAT64 """.lstrip() else: diff --git a/tests/functional/metric_options/secondary_calculations/test_period_to_date.py b/tests/functional/metric_options/secondary_calculations/test_period_to_date.py index 4dd618f1..e1c41758 100644 --- a/tests/functional/metric_options/secondary_calculations/test_period_to_date.py +++ b/tests/functional/metric_options/secondary_calculations/test_period_to_date.py @@ -999,7 +999,7 @@ def test_build_completion(self,project,): secondary_calculations=[ metrics.period_to_date(aggregate="sum", period="year", alias="this_year_sum"), metrics.period_to_date(aggregate="max", period="year"), - metrics.period_to_date(aggregate="sum", period="year"), + metrics.period_to_date(aggregate="min", period="year"), metrics.period_to_date(aggregate="average", period="year"), ] ) @@ -1030,15 +1030,15 @@ def test_build_completion(self,project,): # seeds/period_to_date_sum__expected.csv if os.getenv('dbt_target') == 'snowflake': period_to_date_sum__expected_csv = """ -date_month,date_year,period_to_date_sum,period_to_date_sum_this_year_sum,period_to_date_sum_max_for_year,period_to_date_sum_sum_for_year,period_to_date_sum_average_for_year +date_month,date_year,period_to_date_sum,period_to_date_sum_this_year_sum,period_to_date_sum_max_for_year,period_to_date_sum_min_for_year,period_to_date_sum_average_for_year 2022-01-01,2022-01-01,18,18,18,18,18.000000 2022-02-01,2022-01-01,6,24,18,24,12.000000 """.lstrip() else: period_to_date_sum__expected_csv = """ -date_month,date_year,period_to_date_sum,period_to_date_sum_this_year_sum,period_to_date_sum_max_for_year,period_to_date_sum_sum_for_year,period_to_date_sum_average_for_year +date_month,date_year,period_to_date_sum,period_to_date_sum_this_year_sum,period_to_date_sum_max_for_year,period_to_date_sum_min_for_year,period_to_date_sum_average_for_year 2022-01-01,2022-01-01,18,18,18,18,18.0000000000000000 -2022-02-01,2022-01-01,6,24,18,24,12.0000000000000000 +2022-02-01,2022-01-01,6,24,18,6,12.0000000000000000 """.lstrip() # seeds/period_to_date_sum__expected.yml @@ -1052,9 +1052,9 @@ def test_build_completion(self,project,): date_month: date date_year: date period_to_date_sum: INT64 - period_to_date_sum_this_year_min: INT64 + period_to_date_sum_this_year_sum: INT64 period_to_date_sum_max_for_year: INT64 - period_to_date_sum_sum_for_year: INT64 + period_to_date_sum_min_for_year: INT64 period_to_date_sum_average_for_year: FLOAT64 """.lstrip() else: @@ -1119,6 +1119,7 @@ def test_build_completion(self,project,): # initial run results = run_dbt(["run"]) + breakpoint() assert len(results) == 3 # test tests From 4df6b0c2278b8635019a34cdc125c285050881e4 Mon Sep 17 00:00:00 2001 From: Callum McCann Date: Tue, 17 Jan 2023 20:46:04 -0600 Subject: [PATCH 7/8] removing breakpoint --- .../metric_options/secondary_calculations/test_period_to_date.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/functional/metric_options/secondary_calculations/test_period_to_date.py b/tests/functional/metric_options/secondary_calculations/test_period_to_date.py index e1c41758..533f7780 100644 --- a/tests/functional/metric_options/secondary_calculations/test_period_to_date.py +++ b/tests/functional/metric_options/secondary_calculations/test_period_to_date.py @@ -1119,7 +1119,6 @@ def test_build_completion(self,project,): # initial run results = run_dbt(["run"]) - breakpoint() assert len(results) == 3 # test tests From a6198addce7825f18f3c69527746417893e751ed Mon Sep 17 00:00:00 2001 From: Callum McCann Date: Wed, 18 Jan 2023 09:22:20 -0600 Subject: [PATCH 8/8] fixing test --- .../secondary_calculations/test_period_to_date.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/metric_options/secondary_calculations/test_period_to_date.py b/tests/functional/metric_options/secondary_calculations/test_period_to_date.py index 533f7780..9cf83f2a 100644 --- a/tests/functional/metric_options/secondary_calculations/test_period_to_date.py +++ b/tests/functional/metric_options/secondary_calculations/test_period_to_date.py @@ -1032,7 +1032,7 @@ def test_build_completion(self,project,): period_to_date_sum__expected_csv = """ date_month,date_year,period_to_date_sum,period_to_date_sum_this_year_sum,period_to_date_sum_max_for_year,period_to_date_sum_min_for_year,period_to_date_sum_average_for_year 2022-01-01,2022-01-01,18,18,18,18,18.000000 -2022-02-01,2022-01-01,6,24,18,24,12.000000 +2022-02-01,2022-01-01,6,24,18,6,12.000000 """.lstrip() else: period_to_date_sum__expected_csv = """