From 22fe5e1cb4ec614beaa36feb8738902a19784c62 Mon Sep 17 00:00:00 2001 From: Agnes Kiss Date: Thu, 1 Sep 2022 10:41:35 +0100 Subject: [PATCH] Redshift Web: load_tstamp missing from table definition (Close #135) --- .../expectations/web/v1/base_redshift.json | 3 +- .../99-page-views-complete.yml.tmpl | 6 +- .../01-base/01-main/00-setup-base.sql | 154 ++---------------- ...ase-staged.sql => 98-drop-base-staged.sql} | 2 +- 4 files changed, 23 insertions(+), 142 deletions(-) rename web/v1/redshift/sql-runner/sql/standard/02-page-views/99-complete/{98-truncate-base-staged.sql => 98-drop-base-staged.sql} (91%) diff --git a/.test/great_expectations/expectations/web/v1/base_redshift.json b/.test/great_expectations/expectations/web/v1/base_redshift.json index a0502c40..8921a433 100644 --- a/.test/great_expectations/expectations/web/v1/base_redshift.json +++ b/.test/great_expectations/expectations/web/v1/base_redshift.json @@ -5,7 +5,7 @@ { "expectation_type": "expect_table_column_count_to_equal", "kwargs": { - "value": 129 + "value": 130 } }, { @@ -140,6 +140,7 @@ "event_version", "event_fingerprint", "true_tstamp", + "load_tstamp", "page_view_id" ] } diff --git a/web/v1/redshift/sql-runner/playbooks/standard/02-page-views/99-page-views-complete.yml.tmpl b/web/v1/redshift/sql-runner/playbooks/standard/02-page-views/99-page-views-complete.yml.tmpl index a2b38a21..c6683ccb 100644 --- a/web/v1/redshift/sql-runner/playbooks/standard/02-page-views/99-page-views-complete.yml.tmpl +++ b/web/v1/redshift/sql-runner/playbooks/standard/02-page-views/99-page-views-complete.yml.tmpl @@ -15,10 +15,10 @@ :cleanup_mode: all :ends_run: false :steps: -- :name: 98-truncate-base-staged +- :name: 98-drop-base-staged :queries: - - :name: 98-truncate-base-staged - :file: standard/02-page-views/99-complete/98-truncate-base-staged.sql + - :name: 98-drop-base-staged + :file: standard/02-page-views/99-complete/98-drop-base-staged.sql :template: true - :name: 99-page-views-cleanup :queries: diff --git a/web/v1/redshift/sql-runner/sql/standard/01-base/01-main/00-setup-base.sql b/web/v1/redshift/sql-runner/sql/standard/01-base/01-main/00-setup-base.sql index d7b79721..bc4ebb63 100644 --- a/web/v1/redshift/sql-runner/sql/standard/01-base/01-main/00-setup-base.sql +++ b/web/v1/redshift/sql-runner/sql/standard/01-base/01-main/00-setup-base.sql @@ -125,141 +125,21 @@ INSERT INTO {{.output_schema}}.base_session_id_manifest{{.entropy}} ( -- ensures that the seed is not re-inserted if the table is populated. ); --- Create staged table: -CREATE TABLE IF NOT EXISTS {{.scratch_schema}}.events_staged{{.entropy}} ( - - app_id VARCHAR(255) ENCODE ZSTD, - platform VARCHAR(255) ENCODE ZSTD, - etl_tstamp TIMESTAMP ENCODE ZSTD, - collector_tstamp TIMESTAMP NOT NULL ENCODE RAW, - dvce_created_tstamp TIMESTAMP ENCODE ZSTD, - event VARCHAR(128) ENCODE ZSTD, - event_id CHAR(36) NOT NULL UNIQUE ENCODE ZSTD, - txn_id INT ENCODE ZSTD, - name_tracker VARCHAR(128) ENCODE ZSTD, - v_tracker VARCHAR(100) ENCODE ZSTD, - v_collector VARCHAR(100) ENCODE ZSTD NOT NULL, - v_etl VARCHAR(100) ENCODE ZSTD NOT NULL, - user_id VARCHAR(255) ENCODE ZSTD, - user_ipaddress VARCHAR(128) ENCODE ZSTD, - user_fingerprint VARCHAR(128) ENCODE ZSTD, - domain_userid VARCHAR(128) ENCODE ZSTD, - domain_sessionidx INT ENCODE ZSTD, - network_userid VARCHAR(128) ENCODE ZSTD, - geo_country CHAR(2) ENCODE ZSTD, - geo_region CHAR(3) ENCODE ZSTD, - geo_city VARCHAR(75) ENCODE ZSTD, - geo_zipcode VARCHAR(15) ENCODE ZSTD, - geo_latitude DOUBLE PRECISION ENCODE ZSTD, - geo_longitude DOUBLE PRECISION ENCODE ZSTD, - geo_region_name VARCHAR(100) ENCODE ZSTD, - ip_isp VARCHAR(100) ENCODE ZSTD, - ip_organization VARCHAR(128) ENCODE ZSTD, - ip_domain VARCHAR(128) ENCODE ZSTD, - ip_netspeed VARCHAR(100) ENCODE ZSTD, - page_url VARCHAR(4096) ENCODE ZSTD, - page_title VARCHAR(2000) ENCODE ZSTD, - page_referrer VARCHAR(4096) ENCODE ZSTD, - page_urlscheme VARCHAR(16) ENCODE ZSTD, - page_urlhost VARCHAR(255) ENCODE ZSTD, - page_urlport INT ENCODE ZSTD, - page_urlpath VARCHAR(3000) ENCODE ZSTD, - page_urlquery VARCHAR(6000) ENCODE ZSTD, - page_urlfragment VARCHAR(3000) ENCODE ZSTD, - refr_urlscheme VARCHAR(16) ENCODE ZSTD, - refr_urlhost VARCHAR(255) ENCODE ZSTD, - refr_urlport INT ENCODE ZSTD, - refr_urlpath VARCHAR(6000) ENCODE ZSTD, - refr_urlquery VARCHAR(6000) ENCODE ZSTD, - refr_urlfragment VARCHAR(3000) ENCODE ZSTD, - refr_medium VARCHAR(25) ENCODE ZSTD, - refr_source VARCHAR(50) ENCODE ZSTD, - refr_term VARCHAR(255) ENCODE ZSTD, - mkt_medium VARCHAR(255) ENCODE ZSTD, - mkt_source VARCHAR(255) ENCODE ZSTD, - mkt_term VARCHAR(255) ENCODE ZSTD, - mkt_content VARCHAR(500) ENCODE ZSTD, - mkt_campaign VARCHAR(255) ENCODE ZSTD, - se_category VARCHAR(1000) ENCODE ZSTD, - se_action VARCHAR(1000) ENCODE ZSTD, - se_label VARCHAR(4096) ENCODE ZSTD, - se_property VARCHAR(1000) ENCODE ZSTD, - se_value DOUBLE PRECISION ENCODE ZSTD, - tr_orderid VARCHAR(255) ENCODE ZSTD, - tr_affiliation VARCHAR(255) ENCODE ZSTD, - tr_total dec(18,2) ENCODE ZSTD, - tr_tax dec(18,2) ENCODE ZSTD, - tr_shipping dec(18,2) ENCODE ZSTD, - tr_city VARCHAR(255) ENCODE ZSTD, - tr_state VARCHAR(255) ENCODE ZSTD, - tr_country VARCHAR(255) ENCODE ZSTD, - ti_orderid VARCHAR(255) ENCODE ZSTD, - ti_sku VARCHAR(255) ENCODE ZSTD, - ti_name VARCHAR(255) ENCODE ZSTD, - ti_category VARCHAR(255) ENCODE ZSTD, - ti_price dec(18,2) ENCODE ZSTD, - ti_quantity INT ENCODE ZSTD, - pp_xoffset_min INT ENCODE ZSTD, - pp_xoffset_max INT ENCODE ZSTD, - pp_yoffset_min INT ENCODE ZSTD, - pp_yoffset_max INT ENCODE ZSTD, - useragent VARCHAR(1000) ENCODE ZSTD, - br_name VARCHAR(50) ENCODE ZSTD, - br_family VARCHAR(50) ENCODE ZSTD, - br_version VARCHAR(50) ENCODE ZSTD, - br_type VARCHAR(50) ENCODE ZSTD, - br_renderengine VARCHAR(50) ENCODE ZSTD, - br_lang VARCHAR(255) ENCODE ZSTD, - br_features_pdf BOOLEAN ENCODE ZSTD, - br_features_flash BOOLEAN ENCODE ZSTD, - br_features_java BOOLEAN ENCODE ZSTD, - br_features_director BOOLEAN ENCODE ZSTD, - br_features_quicktime BOOLEAN ENCODE ZSTD, - br_features_realplayer BOOLEAN ENCODE ZSTD, - br_features_windowsmedia BOOLEAN ENCODE ZSTD, - br_features_gears BOOLEAN ENCODE ZSTD, - br_features_silverlight BOOLEAN ENCODE ZSTD, - br_cookies BOOLEAN ENCODE ZSTD, - br_colordepth VARCHAR(12) ENCODE ZSTD, - br_viewwidth INT ENCODE ZSTD, - br_viewheight INT ENCODE ZSTD, - os_name VARCHAR(50) ENCODE ZSTD, - os_family VARCHAR(50) ENCODE ZSTD, - os_manufacturer VARCHAR(50) ENCODE ZSTD, - os_timezone VARCHAR(255) ENCODE ZSTD, - dvce_type VARCHAR(50) ENCODE ZSTD, - dvce_ismobile BOOLEAN ENCODE ZSTD, - dvce_screenwidth INT ENCODE ZSTD, - dvce_screenheight INT ENCODE ZSTD, - doc_charset VARCHAR(128) ENCODE ZSTD, - doc_width INT ENCODE ZSTD, - doc_height INT ENCODE ZSTD, - tr_currency CHAR(3) ENCODE ZSTD, - tr_total_base dec(18, 2) ENCODE ZSTD, - tr_tax_base dec(18, 2) ENCODE ZSTD, - tr_shipping_base dec(18, 2) ENCODE ZSTD, - ti_currency CHAR(3) ENCODE ZSTD, - ti_price_base dec(18, 2) ENCODE ZSTD, - base_currency CHAR(3) ENCODE ZSTD, - geo_timezone VARCHAR(64) ENCODE ZSTD, - mkt_clickid VARCHAR(128) ENCODE ZSTD, - mkt_network VARCHAR(64) ENCODE ZSTD, - etl_tags VARCHAR(500) ENCODE ZSTD, - dvce_sent_tstamp TIMESTAMP ENCODE ZSTD, - refr_domain_userid VARCHAR(128) ENCODE ZSTD, - refr_dvce_tstamp TIMESTAMP ENCODE ZSTD, - domain_sessionid CHAR(128) ENCODE ZSTD, - derived_tstamp TIMESTAMP ENCODE ZSTD, - event_vendor VARCHAR(1000) ENCODE ZSTD, - event_name VARCHAR(1000) ENCODE ZSTD, - event_format VARCHAR(128) ENCODE ZSTD, - event_version VARCHAR(128) ENCODE ZSTD, - event_fingerprint VARCHAR(128) ENCODE ZSTD, - true_tstamp TIMESTAMP ENCODE ZSTD, - - page_view_id CHAR(36) ENCODE ZSTD +-- Drop staged table: +DROP TABLE IF EXISTS {{.scratch_schema}}.events_staged{{.entropy}}; -) -DISTSTYLE KEY -DISTKEY (event_id) -SORTKEY (collector_tstamp); +-- Create staged table: +CREATE TABLE {{.scratch_schema}}.events_staged{{.entropy}} + +AS ( + SELECT + a.*, + b.id AS page_view_id + FROM + {{.input_schema}}.events a + INNER JOIN + {{.input_schema}}.com_snowplowanalytics_snowplow_web_page_1 b + ON a.event_id = b.root_id + AND a.collector_tstamp = b.root_tstamp + WHERE collector_tstamp > DATEADD('day', 1, current_date) +); diff --git a/web/v1/redshift/sql-runner/sql/standard/02-page-views/99-complete/98-truncate-base-staged.sql b/web/v1/redshift/sql-runner/sql/standard/02-page-views/99-complete/98-drop-base-staged.sql similarity index 91% rename from web/v1/redshift/sql-runner/sql/standard/02-page-views/99-complete/98-truncate-base-staged.sql rename to web/v1/redshift/sql-runner/sql/standard/02-page-views/99-complete/98-drop-base-staged.sql index 55f6f01b..3581e58d 100644 --- a/web/v1/redshift/sql-runner/sql/standard/02-page-views/99-complete/98-truncate-base-staged.sql +++ b/web/v1/redshift/sql-runner/sql/standard/02-page-views/99-complete/98-drop-base-staged.sql @@ -14,4 +14,4 @@ limitations under the License. */ -TRUNCATE {{.scratch_schema}}.events_staged{{.entropy}}; +DROP TABLE {{.scratch_schema}}.events_staged{{.entropy}};