black -l 120 .

dlawin · dlawin · commit 88e816398071 · 2023-06-23T17:11:05.000-06:00
diff --git a/data_diff/hashdiff_tables.py b/data_diff/hashdiff_tables.py
@@ -45,8 +45,10 @@ def diff_sets(a: list, b: list, json_cols: dict = None) -> Iterator:
             if parsed_match:
                 to_warn = overriden_diff_cols - warned_diff_cols
                 for w in to_warn:
-                    logger.warning(f"Equivalent JSON objects with different string representations detected "
-                                   f"in column '{w}'. These cases are NOT reported as differences.")
+                    logger.warning(
+                        f"Equivalent JSON objects with different string representations detected "
+                        f"in column '{w}'. These cases are NOT reported as differences."
+                    )
                     warned_diff_cols.add(w)
                 continue
         yield from v
@@ -204,8 +206,11 @@ def _bisect_and_diff_segments(
         # This saves time, as bisection speed is limited by ping and query performance.
         if max_rows < self.bisection_threshold or max_space_size < self.bisection_factor * 2:
             rows1, rows2 = self._threaded_call("get_values", [table1, table2])
-            json_cols = {i: colname for i, colname in enumerate(table1.extra_columns)
-                         if isinstance(table1._schema[colname], JSON)}
+            json_cols = {
+                i: colname
+                for i, colname in enumerate(table1.extra_columns)
+                if isinstance(table1._schema[colname], JSON)
+            }
             diff = list(diff_sets(rows1, rows2, json_cols))
 
             info_tree.info.set_diff(diff)
diff --git a/data_diff/sqeleton/abcs/mixins.py b/data_diff/sqeleton/abcs/mixins.py
@@ -1,5 +1,15 @@
 from abc import ABC, abstractmethod
-from .database_types import Array, TemporalType, FractionalType, ColType_UUID, Boolean, ColType, String_UUID, JSON, Struct
+from .database_types import (
+    Array,
+    TemporalType,
+    FractionalType,
+    ColType_UUID,
+    Boolean,
+    ColType,
+    String_UUID,
+    JSON,
+    Struct,
+)
 from .compiler import Compilable
 
 
@@ -8,7 +18,6 @@ class AbstractMixin(ABC):
 
 
 class AbstractMixin_NormalizeValue(AbstractMixin):
-
     @abstractmethod
     def to_comparable(self, value: str, coltype: ColType) -> str:
         """Ensure that the expression is comparable in ``IS DISTINCT FROM``."""
diff --git a/data_diff/sqeleton/databases/postgresql.py b/data_diff/sqeleton/databases/postgresql.py
@@ -80,7 +80,6 @@ class PostgresqlDialect(BaseDialect, Mixin_Schema):
         "character varying": Text,
         "varchar": Text,
         "text": Text,
-
         "json": JSON,
         "jsonb": JSON,
         "uuid": Native_UUID,
diff --git a/data_diff/sqeleton/databases/redshift.py b/data_diff/sqeleton/databases/redshift.py
@@ -48,7 +48,7 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str:
         return self.to_string(f"{value}::decimal(38,{coltype.precision})")
 
     def normalize_json(self, value: str, _coltype: JSON) -> str:
-        return f'nvl2({value}, json_serialize({value}), NULL)'
+        return f"nvl2({value}, json_serialize({value}), NULL)"
 
 
 class Dialect(PostgresqlDialect):
@@ -123,10 +123,10 @@ def query_external_table_schema(self, path: DbPath) -> Dict[str, tuple]:
     def select_view_columns(self, path: DbPath) -> str:
         _, schema, table = self._normalize_table_path(path)
 
-        return (
-            """select * from pg_get_cols('{}.{}')
+        return """select * from pg_get_cols('{}.{}')
                 cols(view_schema name, view_name name, col_name name, col_type varchar, col_num int)
-            """.format(schema, table)
+            """.format(
+            schema, table
         )
 
     def query_pg_get_cols(self, path: DbPath) -> Dict[str, tuple]:
@@ -138,14 +138,14 @@ def query_pg_get_cols(self, path: DbPath) -> Dict[str, tuple]:
         output = {}
         for r in rows:
             col_name = r[2]
-            type_info = r[3].split('(')
+            type_info = r[3].split("(")
             base_type = type_info[0]
             precision = None
             scale = None
 
             if len(type_info) > 1:
-                if base_type == 'numeric':
-                    precision, scale = type_info[1][:-1].split(',')
+                if base_type == "numeric":
+                    precision, scale = type_info[1][:-1].split(",")
                     precision = int(precision)
                     scale = int(scale)
 
diff --git a/data_diff/sqeleton/utils.py b/data_diff/sqeleton/utils.py
@@ -324,7 +324,6 @@ def match_like(pattern: str, strs: Sequence[str]) -> Iterable[str]:
             yield s
 
 
-
 class UnknownMeta(type):
     def __instancecheck__(self, instance):
         return instance is Unknown
diff --git a/docs/conf.py b/docs/conf.py
@@ -92,7 +92,7 @@
 
 autodoc_default_options = {
     # 'special-members': '__init__',
-    'exclude-members': 'json,aslist,astuple,replace',
+    "exclude-members": "json,aslist,astuple,replace",
 }
 
 # -- Options for HTML output ----------------------------------------------
diff --git a/tests/sqeleton/test_database.py b/tests/sqeleton/test_database.py
@@ -76,13 +76,16 @@ def test_table_list(self):
     def test_type_mapping(self):
         name = "tbl_" + random_table_suffix()
         db = get_conn(self.db_cls)
-        tbl = table(db.parse_table_name(name), schema={
-            "int": int,
-            "float": float,
-            "datetime": datetime,
-            "str": str,
-            "bool": bool,
-        })
+        tbl = table(
+            db.parse_table_name(name),
+            schema={
+                "int": int,
+                "float": float,
+                "datetime": datetime,
+                "str": str,
+                "bool": bool,
+            },
+        )
         q = db.dialect.list_tables(db.default_schema, name)
         assert not db.query(q)
 
@@ -92,6 +95,7 @@ def test_type_mapping(self):
         db.query(tbl.drop())
         assert not db.query(q)
 
+
 @test_each_database
 class TestQueries(unittest.TestCase):
     def test_current_timestamp(self):
@@ -102,18 +106,16 @@ def test_current_timestamp(self):
     def test_correct_timezone(self):
         name = "tbl_" + random_table_suffix()
         db = get_conn(self.db_cls)
-        tbl = table(name, schema={
-            "id": int, "created_at": TimestampTZ(9), "updated_at": TimestampTZ(9)
-        })
+        tbl = table(name, schema={"id": int, "created_at": TimestampTZ(9), "updated_at": TimestampTZ(9)})
 
         db.query(tbl.create())
 
-        tz = pytz.timezone('Europe/Berlin')
+        tz = pytz.timezone("Europe/Berlin")
 
         now = datetime.now(tz)
         if isinstance(db, dbs.Presto):
             ms = now.microsecond // 1000 * 1000  # Presto max precision is 3
-            now = now.replace(microsecond = ms)
+            now = now.replace(microsecond=ms)
 
         db.query(table(name).insert_row(1, now, now))
         db.query(db.dialect.set_timezone_to_utc())
@@ -131,12 +133,12 @@ def test_correct_timezone(self):
         utc = now.astimezone(pytz.UTC)
         expected = utc.__format__("%Y-%m-%d %H:%M:%S.%f")
 
-
         self.assertEqual(created_at, expected)
         self.assertEqual(updated_at, expected)
 
         db.query(tbl.drop())
 
+
 @test_each_database
 class TestThreePartIds(unittest.TestCase):
     def test_three_part_support(self):
@@ -159,4 +161,3 @@ def test_three_part_support(self):
             d = db.query_table_schema(part.path)
             assert len(d) == 1
             db.query(part.drop())
-
diff --git a/tests/sqeleton/test_mixins.py b/tests/sqeleton/test_mixins.py
@@ -3,7 +3,11 @@
 from data_diff.sqeleton import connect
 
 from data_diff.sqeleton.abcs import AbstractDialect, AbstractDatabase
-from data_diff.sqeleton.abcs.mixins import AbstractMixin_NormalizeValue, AbstractMixin_RandomSample, AbstractMixin_TimeTravel
+from data_diff.sqeleton.abcs.mixins import (
+    AbstractMixin_NormalizeValue,
+    AbstractMixin_RandomSample,
+    AbstractMixin_TimeTravel,
+)
 
 
 class TestMixins(unittest.TestCase):
diff --git a/tests/test_database_types.py b/tests/test_database_types.py
@@ -74,10 +74,7 @@ def init_conns():
         "boolean": [
             "boolean",
         ],
-        "json": [
-            "json",
-            "jsonb"
-        ]
+        "json": ["json", "jsonb"],
     },
     db.MySQL: {
         # https://dev.mysql.com/doc/refman/8.0/en/integer-types.html
@@ -205,7 +202,7 @@ def init_conns():
         ],
         "json": [
             "super",
-        ]
+        ],
     },
     db.Oracle: {
         "int": [
@@ -497,7 +494,7 @@ def __len__(self):
     "float": FloatFaker(N_SAMPLES),
     "uuid": UUID_Faker(N_SAMPLES),
     "boolean": BooleanFaker(N_SAMPLES),
-    "json": JsonFaker(N_SAMPLES)
+    "json": JsonFaker(N_SAMPLES),
 }
 
 
@@ -607,12 +604,17 @@ def _insert_to_table(conn, table_path, values, coltype):
     elif isinstance(conn, db.Redshift) and coltype in ("json", "jsonb"):
         values = [(i, Code(f"JSON_PARSE({sample})")) for i, sample in values]
     elif isinstance(conn, db.PostgreSQL) and coltype in ("json", "jsonb"):
-        values = [(i, Code(
-            "'{}'".format(
-                (json.dumps(sample) if isinstance(sample, (dict, list)) else sample)
-                .replace('\'', '\'\'')
+        values = [
+            (
+                i,
+                Code(
+                    "'{}'".format(
+                        (json.dumps(sample) if isinstance(sample, (dict, list)) else sample).replace("'", "''")
+                    )
+                ),
             )
-        )) for i, sample in values]
+            for i, sample in values
+        ]
 
     insert_rows_in_batches(conn, tbl, values, columns=["id", "col"])
     conn.query(commit)
@@ -636,7 +638,7 @@ def _create_table_with_indexes(conn, table_path, type_):
         conn.query(tbl.create())
 
     (index_id,) = table_path
-    if conn.dialect.SUPPORTS_INDEXES and type_ not in ('json', 'jsonb', 'array', 'struct'):
+    if conn.dialect.SUPPORTS_INDEXES and type_ not in ("json", "jsonb", "array", "struct"):
         conn.query(f"CREATE INDEX xa_{index_id} ON {table_name} ({quote('id')}, {quote('col')})")
     if conn.dialect.SUPPORTS_INDEXES:
         conn.query(f"CREATE INDEX xb_{index_id} ON {table_name} ({quote('id')})")
@@ -736,7 +738,7 @@ def test_types(self, source_db, target_db, source_type, target_type, type_catego
 
         # For fuzzily diffed types, some rows can be downloaded for local comparison. This happens
         # when hashes are diferent but the essential payload is not; e.g. due to json serialization.
-        if not {source_type, target_type} & {'json', 'jsonb', 'array', 'struct'}:
+        if not {source_type, target_type} & {"json", "jsonb", "array", "struct"}:
             self.assertEqual(0, differ.stats.get("rows_downloaded", 0))
 
         # This section downloads all rows to ensure that Python agrees with the

Original file line number	Diff line number	Diff line change
`@@ -92,7 +92,7 @@`
`92`	`92`
`93`	`93`	`autodoc_default_options = {`
`94`	`94`	`# 'special-members': '__init__',`
`95`		`- 'exclude-members': 'json,aslist,astuple,replace',`
	`95`	`+ "exclude-members": "json,aslist,astuple,replace",`
`96`	`96`	`}`
`97`	`97`
`98`	`98`	`# -- Options for HTML output ----------------------------------------------`