From 2ecf22f808246e379bf55e0aeb4c992105ddd48b Mon Sep 17 00:00:00 2001 From: Guillaume Wagner Date: Wed, 21 Aug 2024 11:33:29 -0400 Subject: [PATCH] Added support for structs and modes (repeated and required) --- .../bigquery/create_external_table.sql | 16 +++++++- sample_sources/bigquery.yml | 38 +++++++++++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/macros/plugins/bigquery/create_external_table.sql b/macros/plugins/bigquery/create_external_table.sql index fe7a5ac8..5e4716e6 100644 --- a/macros/plugins/bigquery/create_external_table.sql +++ b/macros/plugins/bigquery/create_external_table.sql @@ -1,3 +1,16 @@ +{%- macro create_struct_type_definition(fields) -%} + STRUCT< + {%for field in fields%}{{get_column_definition(field)}}{{- ',' if not loop.last -}}{%- endfor %} + > +{%- endmacro -%} + +{%- macro get_column_definition(column) -%} + {%- set column_quoted = adapter.quote(column.name) if column.quote else column.name %} + {%- set column_type = create_struct_type_definition(column.fields) if column.data_type == 'STRUCT' else column.data_type %} + {%- set column_type = 'ARRAY<' ~ column_type ~ '>' if column.mode == 'REPEATED' else column_type %} + {{column_quoted}} {{column_type}} {{- ' NOT NULL' if column.mode == 'REQUIRED'}} +{%- endmacro -%} + {% macro bigquery__create_external_table(source_node) %} {%- set columns = source_node.columns.values() -%} {%- set external = source_node.external -%} @@ -19,8 +32,7 @@ create or replace external table {{source(source_node.source_name, source_node.name)}} {%- if columns -%}( {% for column in columns %} - {%- set column_quoted = adapter.quote(column.name) if column.quote else column.name %} - {{column_quoted}} {{column.data_type}} {{- ',' if not loop.last -}} + {{ get_column_definition(column) }} {{- ',' if not loop.last -}} {%- endfor -%} ) {% endif %} diff --git a/sample_sources/bigquery.yml b/sample_sources/bigquery.yml index cfe18cde..9d86a60e 100644 --- a/sample_sources/bigquery.yml +++ b/sample_sources/bigquery.yml @@ -26,6 +26,7 @@ sources: - name: app_id data_type: varchar(255) description: "Application ID" + mode: REQUIRED - name: domain_sessionidx data_type: int description: "A visit / session index" @@ -58,3 +59,40 @@ sources: - 'gs://bucket_a/path/*' - 'gs://bucket_b/path/*' - 'gs://bucket_c/more/specific/path/file.csv' + + + - name: users + schema: users + tables: + - name: users + external: + location: gs://my-prod-bucket/users.jsonl + options: + format: JSON + columns: + - name: id + data_type: INTEGER + mode: REQUIRED + description: > + User ID from the prod database + - name: name + data_type: STRING + description: "User full name" + - name: addresses + data_type: STRUCT + mode: REPEATED + description: A list of addresses where the user has lived + fields: + - name: city + data_type: STRING + description: "City name" + - name: address + data_type: STRING + description: "Street address" + - name: postal_code + data_type: STRING + description: "Postal code" + - name: country + data_type: STRING + description: "Country name" + \ No newline at end of file