Skip to content

Commit fccbe2d

Browse files
QMalcolmMichelleArkmikealfare
authored
Add microbatch strategy (#924)
* Add microbatch strategy This work is basically in entirety a duplicate of the work done by MichelleArk in dbt-labs/dbt-snowflake#1179. I don't really expect this to work first try, but it might. I expect to need to do some edits, but who knows, maybe I'll get lucky. * Add changie doc * Add comment to microbatch macro to explain why we are re-implementing delete+insert * Add `begin` to microbatch config in test_incremental_microbatch.py * Cleanup predicates in microbatch materialization * Fix predicate/incremental predicate logic in microbatch macro * Remove unnecessary `if` in microbatch macro The `if` is unnecessary because predicates are guaranteed to exist, but the `if` was guarding against when there are no predicates. * Get batch start and end time in the same way * Remove unnecessary `target` specifications for columns of predicates in microbatch materialization The `target.` portion of `target.<column_name>` is unnecessary for the predicates in the microbatch materialization macro because the delete statement already ensures the "targeting` of `target` in the delete statement via the clause `delete from {{ target }}`. Said another way, there is no use of the word `using` in the delete clause, thus it is unambiguous what is being deleted from. --------- Co-authored-by: Michelle Ark <[email protected]> Co-authored-by: Mike Alfare <[email protected]>
1 parent 1943ac5 commit fccbe2d

File tree

4 files changed

+78
-1
lines changed

4 files changed

+78
-1
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
kind: Features
2+
body: Add microbatch strategy
3+
time: 2024-10-02T17:11:12.88725-05:00
4+
custom:
5+
Author: QMalcolm
6+
Issue: "923"

dbt/adapters/redshift/impl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def valid_incremental_strategies(self):
130130
"""The set of standard builtin strategies which this adapter supports out-of-the-box.
131131
Not used to validate custom strategies defined by end users.
132132
"""
133-
return ["append", "delete+insert", "merge"]
133+
return ["append", "delete+insert", "merge", "microbatch"]
134134

135135
def timestamp_add_sql(self, add_to: str, number: int = 1, interval: str = "hour") -> str:
136136
return f"{add_to} + interval '{number} {interval}'"

dbt/include/redshift/macros/materializations/incremental_merge.sql

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,50 @@
6565
)
6666

6767
{% endmacro %}
68+
69+
{% macro redshift__get_incremental_microbatch_sql(arg_dict) %}
70+
{#-
71+
Technically this function could just call out to the default implementation of delete_insert.
72+
However, the default implementation requires a unique_id, which we actually do not want or
73+
need. Thus we re-implement delete insert here without the unique_id requirement
74+
-#}
75+
76+
{%- set target = arg_dict["target_relation"] -%}
77+
{%- set source = arg_dict["temp_relation"] -%}
78+
{%- set dest_columns = arg_dict["dest_columns"] -%}
79+
{%- set predicates = [] -%}
80+
81+
{%- set incremental_predicates = [] if arg_dict.get('incremental_predicates') is none else arg_dict.get('incremental_predicates') -%}
82+
{%- for pred in incremental_predicates -%}
83+
{% if "DBT_INTERNAL_DEST." in pred %}
84+
{%- set pred = pred | replace("DBT_INTERNAL_DEST.", target ~ "." ) -%}
85+
{% endif %}
86+
{% if "dbt_internal_dest." in pred %}
87+
{%- set pred = pred | replace("dbt_internal_dest.", target ~ "." ) -%}
88+
{% endif %}
89+
{% do predicates.append(pred) %}
90+
{% endfor %}
91+
92+
{% if not model.config.get("__dbt_internal_microbatch_event_time_start") or not model.config.get("__dbt_internal_microbatch_event_time_end") -%}
93+
{% do exceptions.raise_compiler_error('dbt could not compute the start and end timestamps for the running batch') %}
94+
{% endif %}
95+
96+
{#-- Add additional incremental_predicates to filter for batch --#}
97+
{% do predicates.append(model.config.event_time ~ " >= TIMESTAMP '" ~ model.config.__dbt_internal_microbatch_event_time_start ~ "'") %}
98+
{% do predicates.append(model.config.event_time ~ " < TIMESTAMP '" ~ model.config.__dbt_internal_microbatch_event_time_end ~ "'") %}
99+
{% do arg_dict.update({'incremental_predicates': predicates}) %}
100+
101+
delete from {{ target }}
102+
where (
103+
{% for predicate in predicates %}
104+
{%- if not loop.first %}and {% endif -%} {{ predicate }}
105+
{% endfor %}
106+
);
107+
108+
{%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%}
109+
insert into {{ target }} ({{ dest_cols_csv }})
110+
(
111+
select {{ dest_cols_csv }}
112+
from {{ source }}
113+
)
114+
{% endmacro %}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import pytest
2+
from dbt.tests.adapter.incremental.test_incremental_microbatch import (
3+
BaseMicrobatch,
4+
)
5+
6+
7+
# No requirement for a unique_id for redshift microbatch!
8+
_microbatch_model_no_unique_id_sql = """
9+
{{ config(materialized='incremental', incremental_strategy='microbatch', event_time='event_time', batch_size='day', begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0)) }}
10+
select * from {{ ref('input_model') }}
11+
"""
12+
13+
14+
class TestSnowflakeMicrobatch(BaseMicrobatch):
15+
@pytest.fixture(scope="class")
16+
def microbatch_model_sql(self) -> str:
17+
return _microbatch_model_no_unique_id_sql
18+
19+
@pytest.fixture(scope="class")
20+
def insert_two_rows_sql(self, project) -> str:
21+
test_schema_relation = project.adapter.Relation.create(
22+
database=project.database, schema=project.test_schema
23+
)
24+
return f"insert into {test_schema_relation}.input_model (id, event_time) values (4, '2020-01-04 00:00:00-0'), (5, '2020-01-05 00:00:00-0')"

0 commit comments

Comments
 (0)