Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit d198647

Browse files
authored
Merge pull request #545 from datafold/simplify-json-columns
Simplify JSON column types
2 parents 3c18507 + 46c8867 commit d198647

File tree

8 files changed

+20
-34
lines changed

8 files changed

+20
-34
lines changed

data_diff/hashdiff_tables.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from runtype import dataclass
99

10-
from data_diff.sqeleton.abcs import ColType_UUID, NumericType, PrecisionType, StringType, Boolean, JSONType
10+
from data_diff.sqeleton.abcs import ColType_UUID, NumericType, PrecisionType, StringType, Boolean, JSON
1111

1212
from .info_tree import InfoTree
1313
from .utils import safezip, diffs_are_equiv_jsons
@@ -205,7 +205,7 @@ def _bisect_and_diff_segments(
205205
if max_rows < self.bisection_threshold or max_space_size < self.bisection_factor * 2:
206206
rows1, rows2 = self._threaded_call("get_values", [table1, table2])
207207
json_cols = {i: colname for i, colname in enumerate(table1.extra_columns)
208-
if isinstance(table1._schema[colname], JSONType)}
208+
if isinstance(table1._schema[colname], JSON)}
209209
diff = list(diff_sets(rows1, rows2, json_cols))
210210

211211
info_tree.info.set_diff(diff)

data_diff/sqeleton/abcs/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@
1010
PrecisionType,
1111
StringType,
1212
Boolean,
13-
JSONType,
13+
JSON,
1414
)
1515
from .compiler import AbstractCompiler, Compilable

data_diff/sqeleton/abcs/database_types.py

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -134,19 +134,9 @@ class Text(StringType):
134134
supported = False
135135

136136

137-
class JSONType(ColType):
138-
pass
139-
140-
141-
class RedShiftSuper(JSONType):
142-
pass
143-
144-
145-
class PostgresqlJSON(JSONType):
146-
pass
147-
148-
149-
class PostgresqlJSONB(JSONType):
137+
# In majority of DBMSes, it is called JSON/JSONB. Only in Snowflake, it is OBJECT.
138+
@dataclass
139+
class JSON(ColType):
150140
pass
151141

152142

data_diff/sqeleton/abcs/mixins.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from abc import ABC, abstractmethod
2-
from .database_types import TemporalType, FractionalType, ColType_UUID, Boolean, ColType, String_UUID, JSONType
2+
from .database_types import TemporalType, FractionalType, ColType_UUID, Boolean, ColType, String_UUID, JSON
33
from .compiler import Compilable
44

55

@@ -49,7 +49,7 @@ def normalize_uuid(self, value: str, coltype: ColType_UUID) -> str:
4949
return f"TRIM({value})"
5050
return self.to_string(value)
5151

52-
def normalize_json(self, value: str, _coltype: JSONType) -> str:
52+
def normalize_json(self, value: str, _coltype: JSON) -> str:
5353
"""Creates an SQL expression, that converts 'value' to its minified json string representation."""
5454
raise NotImplementedError()
5555

@@ -77,7 +77,7 @@ def normalize_value_by_type(self, value: str, coltype: ColType) -> str:
7777
return self.normalize_uuid(value, coltype)
7878
elif isinstance(coltype, Boolean):
7979
return self.normalize_boolean(value, coltype)
80-
elif isinstance(coltype, JSONType):
80+
elif isinstance(coltype, JSON):
8181
return self.normalize_json(value, coltype)
8282
return self.to_string(value)
8383

data_diff/sqeleton/databases/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
DbTime,
3636
DbPath,
3737
Boolean,
38-
JSONType
38+
JSON
3939
)
4040
from ..abcs.mixins import Compilable
4141
from ..abcs.mixins import (
@@ -260,7 +260,7 @@ def parse_type(
260260
elif issubclass(cls, (Text, Native_UUID)):
261261
return cls()
262262

263-
elif issubclass(cls, JSONType):
263+
elif issubclass(cls, JSON):
264264
return cls()
265265

266266
raise TypeError(f"Parsing {type_repr} returned an unknown type '{cls}'.")

data_diff/sqeleton/databases/postgresql.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from ..abcs.database_types import (
22
DbPath,
3+
JSON,
34
Timestamp,
45
TimestampTZ,
56
Float,
@@ -11,8 +12,6 @@
1112
FractionalType,
1213
Boolean,
1314
Date,
14-
PostgresqlJSON,
15-
PostgresqlJSONB
1615
)
1716
from ..abcs.mixins import AbstractMixin_MD5, AbstractMixin_NormalizeValue
1817
from .base import BaseDialect, ThreadedDatabase, import_helper, ConnectError, Mixin_Schema
@@ -51,7 +50,7 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str:
5150
def normalize_boolean(self, value: str, _coltype: Boolean) -> str:
5251
return self.to_string(f"{value}::int")
5352

54-
def normalize_json(self, value: str, _coltype: PostgresqlJSON) -> str:
53+
def normalize_json(self, value: str, _coltype: JSON) -> str:
5554
return f"{value}::text"
5655

5756

@@ -81,12 +80,10 @@ class PostgresqlDialect(BaseDialect, Mixin_Schema):
8180
"character varying": Text,
8281
"varchar": Text,
8382
"text": Text,
84-
# JSON
85-
"json": PostgresqlJSON,
86-
"jsonb": PostgresqlJSONB,
87-
# UUID
83+
84+
"json": JSON,
85+
"jsonb": JSON,
8886
"uuid": Native_UUID,
89-
# Boolean
9087
"boolean": Boolean,
9188
}
9289

data_diff/sqeleton/databases/redshift.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from typing import List, Dict
22
from ..abcs.database_types import (
33
Float,
4+
JSON,
45
TemporalType,
56
FractionalType,
67
DbPath,
78
TimestampTZ,
8-
RedShiftSuper
99
)
1010
from ..abcs.mixins import AbstractMixin_MD5
1111
from .postgresql import (
@@ -47,7 +47,7 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
4747
def normalize_number(self, value: str, coltype: FractionalType) -> str:
4848
return self.to_string(f"{value}::decimal(38,{coltype.precision})")
4949

50-
def normalize_json(self, value: str, _coltype: RedShiftSuper) -> str:
50+
def normalize_json(self, value: str, _coltype: JSON) -> str:
5151
return f'nvl2({value}, json_serialize({value}), NULL)'
5252

5353

@@ -57,8 +57,7 @@ class Dialect(PostgresqlDialect):
5757
**PostgresqlDialect.TYPE_CLASSES,
5858
"double": Float,
5959
"real": Float,
60-
# JSON
61-
"super": RedShiftSuper
60+
"super": JSON,
6261
}
6362
SUPPORTS_INDEXES = False
6463

data_diff/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def diffs_are_equiv_jsons(diff: list, json_cols: dict):
160160
return False, overriden_diff_cols
161161
match = True
162162
for i, (col_a, col_b) in enumerate(safezip(diff[0][1][1:], diff[1][1][1:])): # index 0 is extra_columns first elem
163-
# we only attempt to parse columns of JSONType, but we still need to check if non-json columns don't match
163+
# we only attempt to parse columns of JSON type, but we still need to check if non-json columns don't match
164164
match = col_a == col_b
165165
if not match and (i in json_cols):
166166
if _jsons_equiv(col_a, col_b):

0 commit comments

Comments
 (0)