Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 54d6f2c

Browse files
committed
gFix date/time formatting for snowlake, postres and bigquery
1 parent edad8e2 commit 54d6f2c

File tree

5 files changed

+57
-1
lines changed

5 files changed

+57
-1
lines changed

data_diff/abcs/database_types.py

+5
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,11 @@ class Date(TemporalType):
156156
pass
157157

158158

159+
@attrs.define(frozen=True)
160+
class Time(TemporalType):
161+
pass
162+
163+
159164
@attrs.define(frozen=True)
160165
class NumericType(ColType):
161166
# 'precision' signifies how many fractional digits (after the dot) we want to compare

data_diff/databases/bigquery.py

+19
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
TemporalType,
2020
Boolean,
2121
UnknownColType,
22+
Time,
23+
Date,
2224
)
2325
from data_diff.databases.base import (
2426
BaseDialect,
@@ -63,6 +65,8 @@ class Dialect(BaseDialect):
6365
# Dates
6466
"TIMESTAMP": Timestamp,
6567
"DATETIME": Datetime,
68+
"DATE": Date,
69+
"TIME": Time,
6670
# Numbers
6771
"INT64": Integer,
6872
"INT32": Integer,
@@ -160,6 +164,21 @@ def md5_as_hex(self, s: str) -> str:
160164
return f"md5({s})"
161165

162166
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
167+
try:
168+
is_date = coltype.is_date
169+
is_time = coltype.is_time
170+
except:
171+
is_date = False
172+
is_time = False
173+
if isinstance(coltype, Date) or is_date:
174+
return f"FORMAT_DATE('%F', {value})"
175+
if isinstance(coltype, Time) or is_time:
176+
microseconds = f"TIME_DIFF( {value}, cast('00:00:00' as time), microsecond)"
177+
rounded = f"ROUND({microseconds}, -6 + {coltype.precision})"
178+
time_value = f"TIME_ADD(cast('00:00:00' as time), interval cast({rounded} as int64) microsecond)"
179+
converted = f"FORMAT_TIME('%H:%M:%E6S', {time_value})"
180+
return converted
181+
163182
if coltype.rounds:
164183
timestamp = f"timestamp_micros(cast(round(unix_micros(cast({value} as timestamp))/1000000, {coltype.precision})*1000000 as int))"
165184
return f"FORMAT_TIMESTAMP('%F %H:%M:%E6S', {timestamp})"

data_diff/databases/mssql.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
)
1414
from data_diff.abcs.database_types import (
1515
JSON,
16-
Date,
1716
NumericType,
1817
Timestamp,
1918
TimestampTZ,
@@ -25,6 +24,8 @@
2524
Native_UUID,
2625
Text,
2726
Boolean,
27+
Date,
28+
Time
2829
)
2930

3031

@@ -48,6 +49,7 @@ class Dialect(BaseDialect):
4849
"datetime2": Timestamp,
4950
"smalldatetime": Timestamp,
5051
"date": Date,
52+
"time": Time,
5153
# Numbers
5254
"float": Float,
5355
"real": Float,

data_diff/databases/postgresql.py

+20
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
FractionalType,
1818
Boolean,
1919
Date,
20+
Time
2021
)
2122
from data_diff.databases.base import BaseDialect, ThreadedDatabase, import_helper, ConnectError
2223
from data_diff.databases.base import (
@@ -57,6 +58,8 @@ class PostgresqlDialect(BaseDialect):
5758
"timestamp without time zone": Timestamp,
5859
"timestamp": Timestamp,
5960
"date": Date,
61+
"time with time zone": Time,
62+
"time without time zone": Time,
6063
# Numbers
6164
"double precision": Float,
6265
"real": Float,
@@ -111,6 +114,23 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
111114
def _add_padding(coltype: TemporalType, timestamp6: str):
112115
return f"RPAD(LEFT({timestamp6}, {TIMESTAMP_PRECISION_POS+coltype.precision}), {TIMESTAMP_PRECISION_POS+6}, '0')"
113116

117+
try:
118+
is_date = coltype.is_date
119+
is_time = coltype.is_time
120+
except:
121+
is_date = False
122+
is_time = False
123+
124+
if isinstance(coltype, Date) or is_date:
125+
return f"cast({value} as varchar)"
126+
127+
if isinstance(coltype, Time) or is_time:
128+
seconds = f"EXTRACT( epoch from {value})"
129+
rounded = f"ROUND({seconds}, {coltype.precision})"
130+
time_value = f"CAST('00:00:00' as time) + make_interval(0, 0, 0, 0, 0, 0, {rounded})" # 6th arg = seconds
131+
converted = f"to_char({time_value}, 'hh24:mi:ss.ff6')"
132+
return converted
133+
114134
if coltype.rounds:
115135
# NULL value expected to return NULL after normalization
116136
null_case_begin = f"CASE WHEN {value} IS NULL THEN NULL ELSE "

data_diff/databases/snowflake.py

+10
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
DbPath,
1616
Boolean,
1717
Date,
18+
Time,
1819
)
1920
from data_diff.databases.base import (
2021
BaseDialect,
@@ -45,6 +46,7 @@ class Dialect(BaseDialect):
4546
"TIMESTAMP_LTZ": Timestamp,
4647
"TIMESTAMP_TZ": TimestampTZ,
4748
"DATE": Date,
49+
"TIME": Time,
4850
# Numbers
4951
"NUMBER": Decimal,
5052
"FLOAT": Float,
@@ -83,10 +85,18 @@ def md5_as_hex(self, s: str) -> str:
8385
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
8486
try:
8587
is_date = coltype.is_date
88+
is_time = coltype.is_time
8689
except:
8790
is_date = False
91+
is_time = False
8892
if isinstance(coltype, Date) or is_date:
8993
return f"({value}::varchar)"
94+
elif isinstance(coltype, Time) or is_time:
95+
microseconds = f"TIMEDIFF(microsecond, cast('00:00:00' as time), {value})"
96+
rounded = f"round({microseconds}, -6 + {coltype.precision})"
97+
time_value = f"TIMEADD(microsecond, {rounded}, cast('00:00:00' as time))"
98+
converted = f"TO_VARCHAR({time_value}, 'HH24:MI:SS.FF6')"
99+
return converted
90100

91101
if coltype.rounds:
92102
timestamp = f"to_timestamp(round(date_part(epoch_nanosecond, convert_timezone('UTC', {value})::timestamp(9))/1000000000, {coltype.precision}))"

0 commit comments

Comments
 (0)