Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 6a0ba8b

Browse files
committed
Tests: Added test for config
1 parent 55aa54c commit 6a0ba8b

File tree

6 files changed

+96
-36
lines changed

6 files changed

+96
-36
lines changed

data_diff/__main__.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
)
1313
from .databases.connect import connect
1414
from .parse_time import parse_time_before_now, UNITS_STR, ParseError
15-
from .config import apply_config
15+
from .config import apply_config_from_file
1616

1717
import rich
1818
import click
@@ -63,13 +63,22 @@
6363
"A higher number will increase performance, but take more capacity from your database. "
6464
"'serial' guarantees a single-threaded execution of the algorithm (useful for debugging).",
6565
)
66-
@click.option("--conf", default=None, help="Path to a configuration.toml file, to provide a default configuration, and a list of possible runs.")
67-
@click.option("--run", default=None, help="Name of run-configuration to run. If used, CLI arguments for database and table must be omitted.")
66+
@click.option(
67+
"--conf",
68+
default=None,
69+
help="Path to a configuration.toml file, to provide a default configuration, and a list of possible runs.",
70+
)
71+
@click.option(
72+
"--run",
73+
default=None,
74+
help="Name of run-configuration to run. If used, CLI arguments for database and table must be omitted.",
75+
)
6876
def main(conf, run, **kw):
6977
if conf:
70-
kw = apply_config(conf, run, kw)
78+
kw = apply_config_from_file(conf, run, kw)
7179
return _main(**kw)
7280

81+
7382
def _main(
7483
database1,
7584
table1,
@@ -109,7 +118,7 @@ def _main(
109118
logging.error("Cannot specify a limit when using the -s/--stats switch")
110119
return
111120

112-
key_column = key_column or 'id'
121+
key_column = key_column or "id"
113122
if bisection_factor is None:
114123
bisection_factor = DEFAULT_BISECTION_FACTOR
115124
if bisection_threshold is None:

data_diff/config.py

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,73 @@
1+
from typing import Any, Dict
12
import toml
2-
import logging
3+
34

45
class ConfigParseError(Exception):
56
pass
67

78

89
def is_uri(s: str) -> bool:
9-
return '://' in s
10-
10+
return "://" in s
1111

1212

13-
def _load_config(path):
14-
with open(path) as f:
15-
return toml.load(f)
16-
17-
def apply_config(path, run_name, kw):
13+
def _apply_config(config: Dict[str, Any], run_name: str, kw: Dict[str, Any]):
1814
# Load config
19-
config = _load_config(path)
20-
databases = config.pop('database', {})
21-
runs = config.pop('run', {})
15+
databases = config.pop("database", {})
16+
runs = config.pop("run", {})
2217
if config:
2318
raise ConfigParseError(f"Unknown option(s): {config}")
2419

2520
# Init run_args
26-
run_args = runs.get('default') or {}
21+
run_args = runs.get("default") or {}
2722
if run_name:
2823
if run_name not in runs:
29-
raise ConfigParseError(f"Cannot find run '{run_name}' in configuration '{path}'.")
24+
raise ConfigParseError(f"Cannot find run '{run_name}' in configuration.")
3025
run_args.update(runs[run_name])
3126
else:
32-
run_name = 'default'
27+
run_name = "default"
3328

3429
# Process databases + tables
35-
for index in '12':
30+
for index in "12":
3631
args = run_args.pop(index, {})
37-
for attr in ('database', 'table'):
32+
for attr in ("database", "table"):
3833
if attr not in args:
3934
raise ConfigParseError(f"Running 'run.{run_name}': Connection #{index} in missing attribute '{attr}'.")
4035

41-
database = args.pop('database')
42-
table = args.pop('table')
43-
threads = args.pop('threads', None)
36+
database = args.pop("database")
37+
table = args.pop("table")
38+
threads = args.pop("threads", None)
4439
if args:
4540
raise ConfigParseError(f"Unexpected attributes for connection #{index}: {args}")
4641

4742
if not is_uri(database):
4843
if database not in databases:
49-
raise ConfigParseError(f"Database '{database}' not found in list of databases. Available: {list(databases)}.")
44+
raise ConfigParseError(
45+
f"Database '{database}' not found in list of databases. Available: {list(databases)}."
46+
)
5047
database = dict(databases[database])
5148
assert isinstance(database, dict)
52-
if 'driver' not in database:
49+
if "driver" not in database:
5350
raise ConfigParseError(f"Database '{database}' did not specify a driver.")
5451

55-
run_args[f'database{index}'] = database
56-
run_args[f'table{index}'] = table
52+
run_args[f"database{index}"] = database
53+
run_args[f"table{index}"] = table
5754
if threads is not None:
58-
run_args[f'threads{index}'] = int(threads)
55+
run_args[f"threads{index}"] = int(threads)
5956

6057
# Update keywords
61-
new_kw = dict(kw) # Set defaults
62-
new_kw.update(run_args) # Apply config
63-
new_kw.update({k:v for k, v in kw.items() if v}) # Apply non-empty defaults
58+
new_kw = dict(kw) # Set defaults
59+
new_kw.update(run_args) # Apply config
60+
new_kw.update({k: v for k, v in kw.items() if v}) # Apply non-empty defaults
6461

65-
new_kw['__conf__'] = run_args
62+
new_kw["__conf__"] = run_args
6663

6764
return new_kw
65+
66+
67+
def apply_config_from_file(path: str, run_name: str, kw: Dict[str, Any]):
68+
with open(path) as f:
69+
return _apply_config(toml.load(f), run_name, kw)
70+
71+
72+
def apply_config_from_string(toml_config: str, run_name: str, kw: Dict[str, Any]):
73+
return _apply_config(toml.loads(toml_config), run_name, kw)

data_diff/databases/connect.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,10 @@ def connect_to_uri(db_uri: str, thread_count: Optional[int] = 1) -> Database:
137137

138138
return cls(**kw)
139139

140+
140141
def connect_with_dict(d, thread_count):
141142
d = dict(d)
142-
driver = d.pop('driver')
143+
driver = d.pop("driver")
143144
try:
144145
matcher = MATCH_URI_PATH[driver]
145146
except KeyError:
@@ -151,6 +152,7 @@ def connect_with_dict(d, thread_count):
151152

152153
return cls(**d)
153154

155+
154156
def connect(x, thread_count):
155157
if isinstance(x, str):
156158
return connect_to_uri(x, thread_count)

data_diff/databases/oracle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class Oracle(ThreadedDatabase):
2727
def __init__(self, *, host, database, thread_count, **kw):
2828
self.kwargs = dict(dsn="%s/%s" % (host, database), **kw)
2929

30-
self.default_schema = kw.get('user')
30+
self.default_schema = kw.get("user")
3131

3232
super().__init__(thread_count=thread_count)
3333

data_diff/diff_tables.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ class TableDiffer:
271271
"""
272272

273273
bisection_factor: int = DEFAULT_BISECTION_FACTOR
274-
bisection_threshold: Number = DEFAULT_BISECTION_THRESHOLD # Accepts inf for tests
274+
bisection_threshold: Number = DEFAULT_BISECTION_THRESHOLD # Accepts inf for tests
275275
threaded: bool = True
276276
max_threadpool_size: Optional[int] = 1
277277

tests/test_config.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import unittest
2+
3+
from data_diff.config import apply_config_from_string, ConfigParseError
4+
5+
6+
class TestConfig(unittest.TestCase):
7+
def test_basic(self):
8+
config = r"""
9+
[database.test_postgresql]
10+
driver = "postgresql"
11+
user = "postgres"
12+
password = "Password1"
13+
14+
[run.default]
15+
update_column = "timestamp"
16+
verbose = true
17+
threads = 2
18+
19+
[run.pg_pg]
20+
threads = 4
21+
1.database = "test_postgresql"
22+
1.table = "rating"
23+
1.threads = 11
24+
2.database = "postgresql://postgres:Password1@/"
25+
2.table = "rating_del1"
26+
2.threads = 22
27+
"""
28+
self.assertRaises(ConfigParseError, apply_config_from_string, config, "bla", {}) # No such run
29+
30+
res = apply_config_from_string(config, "pg_pg", {})
31+
assert res["update_column"] == "timestamp" # default
32+
assert res["verbose"] is True
33+
assert res["threads"] == 4 # overwritten by pg_pg
34+
assert res["database1"] == {"driver": "postgresql", "user": "postgres", "password": "Password1"}
35+
assert res["database2"] == "postgresql://postgres:Password1@/"
36+
assert res["table1"] == "rating"
37+
assert res["table2"] == "rating_del1"
38+
assert res["threads1"] == 11
39+
assert res["threads2"] == 22
40+
41+
res = apply_config_from_string(config, "pg_pg", {"update_column": "foo", "table2": "bar"})
42+
assert res["update_column"] == "foo"
43+
assert res["table2"] == "bar"

0 commit comments

Comments
 (0)