Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 88e8163

Browse files
committed
black -l 120 .
1 parent 40a785d commit 88e8163

File tree

9 files changed

+63
-44
lines changed

9 files changed

+63
-44
lines changed

data_diff/hashdiff_tables.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,10 @@ def diff_sets(a: list, b: list, json_cols: dict = None) -> Iterator:
4545
if parsed_match:
4646
to_warn = overriden_diff_cols - warned_diff_cols
4747
for w in to_warn:
48-
logger.warning(f"Equivalent JSON objects with different string representations detected "
49-
f"in column '{w}'. These cases are NOT reported as differences.")
48+
logger.warning(
49+
f"Equivalent JSON objects with different string representations detected "
50+
f"in column '{w}'. These cases are NOT reported as differences."
51+
)
5052
warned_diff_cols.add(w)
5153
continue
5254
yield from v
@@ -204,8 +206,11 @@ def _bisect_and_diff_segments(
204206
# This saves time, as bisection speed is limited by ping and query performance.
205207
if max_rows < self.bisection_threshold or max_space_size < self.bisection_factor * 2:
206208
rows1, rows2 = self._threaded_call("get_values", [table1, table2])
207-
json_cols = {i: colname for i, colname in enumerate(table1.extra_columns)
208-
if isinstance(table1._schema[colname], JSON)}
209+
json_cols = {
210+
i: colname
211+
for i, colname in enumerate(table1.extra_columns)
212+
if isinstance(table1._schema[colname], JSON)
213+
}
209214
diff = list(diff_sets(rows1, rows2, json_cols))
210215

211216
info_tree.info.set_diff(diff)

data_diff/sqeleton/abcs/mixins.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
from abc import ABC, abstractmethod
2-
from .database_types import Array, TemporalType, FractionalType, ColType_UUID, Boolean, ColType, String_UUID, JSON, Struct
2+
from .database_types import (
3+
Array,
4+
TemporalType,
5+
FractionalType,
6+
ColType_UUID,
7+
Boolean,
8+
ColType,
9+
String_UUID,
10+
JSON,
11+
Struct,
12+
)
313
from .compiler import Compilable
414

515

@@ -8,7 +18,6 @@ class AbstractMixin(ABC):
818

919

1020
class AbstractMixin_NormalizeValue(AbstractMixin):
11-
1221
@abstractmethod
1322
def to_comparable(self, value: str, coltype: ColType) -> str:
1423
"""Ensure that the expression is comparable in ``IS DISTINCT FROM``."""

data_diff/sqeleton/databases/postgresql.py

-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ class PostgresqlDialect(BaseDialect, Mixin_Schema):
8080
"character varying": Text,
8181
"varchar": Text,
8282
"text": Text,
83-
8483
"json": JSON,
8584
"jsonb": JSON,
8685
"uuid": Native_UUID,

data_diff/sqeleton/databases/redshift.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str:
4848
return self.to_string(f"{value}::decimal(38,{coltype.precision})")
4949

5050
def normalize_json(self, value: str, _coltype: JSON) -> str:
51-
return f'nvl2({value}, json_serialize({value}), NULL)'
51+
return f"nvl2({value}, json_serialize({value}), NULL)"
5252

5353

5454
class Dialect(PostgresqlDialect):
@@ -123,10 +123,10 @@ def query_external_table_schema(self, path: DbPath) -> Dict[str, tuple]:
123123
def select_view_columns(self, path: DbPath) -> str:
124124
_, schema, table = self._normalize_table_path(path)
125125

126-
return (
127-
"""select * from pg_get_cols('{}.{}')
126+
return """select * from pg_get_cols('{}.{}')
128127
cols(view_schema name, view_name name, col_name name, col_type varchar, col_num int)
129-
""".format(schema, table)
128+
""".format(
129+
schema, table
130130
)
131131

132132
def query_pg_get_cols(self, path: DbPath) -> Dict[str, tuple]:
@@ -138,14 +138,14 @@ def query_pg_get_cols(self, path: DbPath) -> Dict[str, tuple]:
138138
output = {}
139139
for r in rows:
140140
col_name = r[2]
141-
type_info = r[3].split('(')
141+
type_info = r[3].split("(")
142142
base_type = type_info[0]
143143
precision = None
144144
scale = None
145145

146146
if len(type_info) > 1:
147-
if base_type == 'numeric':
148-
precision, scale = type_info[1][:-1].split(',')
147+
if base_type == "numeric":
148+
precision, scale = type_info[1][:-1].split(",")
149149
precision = int(precision)
150150
scale = int(scale)
151151

data_diff/sqeleton/utils.py

-1
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,6 @@ def match_like(pattern: str, strs: Sequence[str]) -> Iterable[str]:
324324
yield s
325325

326326

327-
328327
class UnknownMeta(type):
329328
def __instancecheck__(self, instance):
330329
return instance is Unknown

docs/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@
9292

9393
autodoc_default_options = {
9494
# 'special-members': '__init__',
95-
'exclude-members': 'json,aslist,astuple,replace',
95+
"exclude-members": "json,aslist,astuple,replace",
9696
}
9797

9898
# -- Options for HTML output ----------------------------------------------

tests/sqeleton/test_database.py

+15-14
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,16 @@ def test_table_list(self):
7676
def test_type_mapping(self):
7777
name = "tbl_" + random_table_suffix()
7878
db = get_conn(self.db_cls)
79-
tbl = table(db.parse_table_name(name), schema={
80-
"int": int,
81-
"float": float,
82-
"datetime": datetime,
83-
"str": str,
84-
"bool": bool,
85-
})
79+
tbl = table(
80+
db.parse_table_name(name),
81+
schema={
82+
"int": int,
83+
"float": float,
84+
"datetime": datetime,
85+
"str": str,
86+
"bool": bool,
87+
},
88+
)
8689
q = db.dialect.list_tables(db.default_schema, name)
8790
assert not db.query(q)
8891

@@ -92,6 +95,7 @@ def test_type_mapping(self):
9295
db.query(tbl.drop())
9396
assert not db.query(q)
9497

98+
9599
@test_each_database
96100
class TestQueries(unittest.TestCase):
97101
def test_current_timestamp(self):
@@ -102,18 +106,16 @@ def test_current_timestamp(self):
102106
def test_correct_timezone(self):
103107
name = "tbl_" + random_table_suffix()
104108
db = get_conn(self.db_cls)
105-
tbl = table(name, schema={
106-
"id": int, "created_at": TimestampTZ(9), "updated_at": TimestampTZ(9)
107-
})
109+
tbl = table(name, schema={"id": int, "created_at": TimestampTZ(9), "updated_at": TimestampTZ(9)})
108110

109111
db.query(tbl.create())
110112

111-
tz = pytz.timezone('Europe/Berlin')
113+
tz = pytz.timezone("Europe/Berlin")
112114

113115
now = datetime.now(tz)
114116
if isinstance(db, dbs.Presto):
115117
ms = now.microsecond // 1000 * 1000 # Presto max precision is 3
116-
now = now.replace(microsecond = ms)
118+
now = now.replace(microsecond=ms)
117119

118120
db.query(table(name).insert_row(1, now, now))
119121
db.query(db.dialect.set_timezone_to_utc())
@@ -131,12 +133,12 @@ def test_correct_timezone(self):
131133
utc = now.astimezone(pytz.UTC)
132134
expected = utc.__format__("%Y-%m-%d %H:%M:%S.%f")
133135

134-
135136
self.assertEqual(created_at, expected)
136137
self.assertEqual(updated_at, expected)
137138

138139
db.query(tbl.drop())
139140

141+
140142
@test_each_database
141143
class TestThreePartIds(unittest.TestCase):
142144
def test_three_part_support(self):
@@ -159,4 +161,3 @@ def test_three_part_support(self):
159161
d = db.query_table_schema(part.path)
160162
assert len(d) == 1
161163
db.query(part.drop())
162-

tests/sqeleton/test_mixins.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33
from data_diff.sqeleton import connect
44

55
from data_diff.sqeleton.abcs import AbstractDialect, AbstractDatabase
6-
from data_diff.sqeleton.abcs.mixins import AbstractMixin_NormalizeValue, AbstractMixin_RandomSample, AbstractMixin_TimeTravel
6+
from data_diff.sqeleton.abcs.mixins import (
7+
AbstractMixin_NormalizeValue,
8+
AbstractMixin_RandomSample,
9+
AbstractMixin_TimeTravel,
10+
)
711

812

913
class TestMixins(unittest.TestCase):

tests/test_database_types.py

+15-13
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,7 @@ def init_conns():
7474
"boolean": [
7575
"boolean",
7676
],
77-
"json": [
78-
"json",
79-
"jsonb"
80-
]
77+
"json": ["json", "jsonb"],
8178
},
8279
db.MySQL: {
8380
# https://dev.mysql.com/doc/refman/8.0/en/integer-types.html
@@ -205,7 +202,7 @@ def init_conns():
205202
],
206203
"json": [
207204
"super",
208-
]
205+
],
209206
},
210207
db.Oracle: {
211208
"int": [
@@ -497,7 +494,7 @@ def __len__(self):
497494
"float": FloatFaker(N_SAMPLES),
498495
"uuid": UUID_Faker(N_SAMPLES),
499496
"boolean": BooleanFaker(N_SAMPLES),
500-
"json": JsonFaker(N_SAMPLES)
497+
"json": JsonFaker(N_SAMPLES),
501498
}
502499

503500

@@ -607,12 +604,17 @@ def _insert_to_table(conn, table_path, values, coltype):
607604
elif isinstance(conn, db.Redshift) and coltype in ("json", "jsonb"):
608605
values = [(i, Code(f"JSON_PARSE({sample})")) for i, sample in values]
609606
elif isinstance(conn, db.PostgreSQL) and coltype in ("json", "jsonb"):
610-
values = [(i, Code(
611-
"'{}'".format(
612-
(json.dumps(sample) if isinstance(sample, (dict, list)) else sample)
613-
.replace('\'', '\'\'')
607+
values = [
608+
(
609+
i,
610+
Code(
611+
"'{}'".format(
612+
(json.dumps(sample) if isinstance(sample, (dict, list)) else sample).replace("'", "''")
613+
)
614+
),
614615
)
615-
)) for i, sample in values]
616+
for i, sample in values
617+
]
616618

617619
insert_rows_in_batches(conn, tbl, values, columns=["id", "col"])
618620
conn.query(commit)
@@ -636,7 +638,7 @@ def _create_table_with_indexes(conn, table_path, type_):
636638
conn.query(tbl.create())
637639

638640
(index_id,) = table_path
639-
if conn.dialect.SUPPORTS_INDEXES and type_ not in ('json', 'jsonb', 'array', 'struct'):
641+
if conn.dialect.SUPPORTS_INDEXES and type_ not in ("json", "jsonb", "array", "struct"):
640642
conn.query(f"CREATE INDEX xa_{index_id} ON {table_name} ({quote('id')}, {quote('col')})")
641643
if conn.dialect.SUPPORTS_INDEXES:
642644
conn.query(f"CREATE INDEX xb_{index_id} ON {table_name} ({quote('id')})")
@@ -736,7 +738,7 @@ def test_types(self, source_db, target_db, source_type, target_type, type_catego
736738

737739
# For fuzzily diffed types, some rows can be downloaded for local comparison. This happens
738740
# when hashes are diferent but the essential payload is not; e.g. due to json serialization.
739-
if not {source_type, target_type} & {'json', 'jsonb', 'array', 'struct'}:
741+
if not {source_type, target_type} & {"json", "jsonb", "array", "struct"}:
740742
self.assertEqual(0, differ.stats.get("rows_downloaded", 0))
741743

742744
# This section downloads all rows to ensure that Python agrees with the

0 commit comments

Comments
 (0)