Skip to content

Commit e3124f2

Browse files
committed
New option: csv-diff one.csv two.csv --json
1 parent 81b43ca commit e3124f2

File tree

5 files changed

+143
-21
lines changed

5 files changed

+143
-21
lines changed

csv_diff/__init__.py

+37-16
Original file line numberDiff line numberDiff line change
@@ -23,35 +23,56 @@ def compare(previous, current):
2323
removed_or_added = set(removed) | set(added)
2424
potential_changes = [id for id in current if id not in removed_or_added]
2525
changed = [id for id in potential_changes if current[id] != previous[id]]
26-
# Now generate the readable summary
27-
summary = []
28-
title = []
26+
result = {"added": [], "removed": [], "changed": []}
2927
if added:
30-
fragment = "{} row{} added".format(len(added), "" if len(added) == 1 else "s")
28+
result["added"] = [current[id] for id in added]
29+
if removed:
30+
result["removed"] = [previous[id] for id in removed]
31+
if changed:
32+
for id in changed:
33+
d = list(diff(previous[id], current[id]))
34+
result["changed"].append(
35+
{
36+
"key": id,
37+
"changes": {
38+
field: [prev_value, current_value]
39+
for _, field, (prev_value, current_value) in d
40+
},
41+
}
42+
)
43+
return result
44+
45+
46+
def human_text(result):
47+
title = []
48+
summary = []
49+
if result["added"]:
50+
fragment = "{} row{} added".format(
51+
len(result["added"]), "" if len(result["added"]) == 1 else "s"
52+
)
3153
title.append(fragment)
3254
summary.append(fragment + "\n")
33-
for id in added:
34-
summary.append(" {}".format(json.dumps(current[id])))
55+
for row in result["added"]:
56+
summary.append(" {}".format(json.dumps(row)))
3557
summary.append("")
36-
if removed:
58+
if result["removed"]:
3759
fragment = "{} row{} removed".format(
38-
len(removed), "" if len(removed) == 1 else "s"
60+
len(result["removed"]), "" if len(result["removed"]) == 1 else "s"
3961
)
4062
title.append(fragment)
4163
summary.append(fragment + "\n")
42-
for id in removed:
43-
summary.append(" {}".format(json.dumps(previous[id])))
64+
for row in result["removed"]:
65+
summary.append(" {}".format(json.dumps(row)))
4466
summary.append("")
45-
if changed:
67+
if result["changed"]:
4668
fragment = "{} row{} changed".format(
47-
len(changed), "" if len(changed) == 1 else "s"
69+
len(result["changed"]), "" if len(result["changed"]) == 1 else "s"
4870
)
4971
title.append(fragment)
5072
summary.append(fragment + "\n")
51-
for id in changed:
52-
d = list(diff(previous[id], current[id]))
53-
summary.append(" Row {}".format(id))
54-
for _, field, (prev_value, current_value) in d:
73+
for details in result["changed"]:
74+
summary.append(" Row {}".format(details["key"]))
75+
for field, (prev_value, current_value) in details["changes"].items():
5576
summary.append(
5677
' {}: "{}" => "{}"'.format(field, prev_value, current_value)
5778
)

csv_diff/cli.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import click
2-
from . import load_csv, compare
2+
import json as std_json
3+
from . import load_csv, compare, human_text
34

45

56
@click.command()
@@ -15,6 +16,13 @@
1516
@click.option(
1617
"--key", type=str, default=None, help="Column to use as a unique ID for each row"
1718
)
18-
def cli(previous, current, key):
19+
@click.option(
20+
"--json", type=bool, default=False, help="Output changes as JSON", is_flag=True
21+
)
22+
def cli(previous, current, key, json):
1923
"Diff two CSV files"
20-
print(compare(load_csv(open(previous), key=key), load_csv(open(current), key=key)))
24+
diff = compare(load_csv(open(previous), key=key), load_csv(open(current), key=key))
25+
if json:
26+
print(std_json.dumps(diff, indent=4))
27+
else:
28+
print(human_text(diff))

tests/test_cli.py

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from click.testing import CliRunner
2+
from csv_diff import cli
3+
from .test_csv_diff import ONE, TWO, THREE
4+
import json
5+
6+
7+
def test_human_cli(tmpdir):
8+
one = tmpdir / "one.csv"
9+
one.write(ONE)
10+
two = tmpdir / "two.csv"
11+
two.write(TWO)
12+
result = CliRunner().invoke(cli.cli, [str(one), str(two), "--key", "id"])
13+
assert 0 == result.exit_code
14+
assert (
15+
'1 row changed\n\n1 row changed\n\n Row 1\n age: "4" => "5"'
16+
== result.output.strip()
17+
)
18+
19+
20+
def test_human_cli_json(tmpdir):
21+
one = tmpdir / "one.csv"
22+
one.write(ONE)
23+
two = tmpdir / "two.csv"
24+
two.write(TWO)
25+
result = CliRunner().invoke(cli.cli, [str(one), str(two), "--key", "id", "--json"])
26+
assert 0 == result.exit_code
27+
assert {
28+
"added": [],
29+
"removed": [],
30+
"changed": [{"key": "1", "changes": {"age": ["4", "5"]}}],
31+
} == json.loads(result.output.strip())

tests/test_csv_diff.py

+31-2
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,38 @@
99
1,Cleo,5
1010
2,Pancakes,2"""
1111

12+
THREE = """id,name,age
13+
1,Cleo,5"""
1214

13-
def test_diff():
15+
16+
def test_row_changed():
1417
diff = compare(
1518
load_csv(io.StringIO(ONE), key="id"), load_csv(io.StringIO(TWO), key="id")
1619
)
17-
assert '1 row changed\n\n1 row changed\n\n Row 1\n age: "4" => "5"' == diff
20+
assert {
21+
"added": [],
22+
"removed": [],
23+
"changed": [{"key": "1", "changes": {"age": ["4", "5"]}}],
24+
} == diff
25+
26+
27+
def test_row_added():
28+
diff = compare(
29+
load_csv(io.StringIO(THREE), key="id"), load_csv(io.StringIO(TWO), key="id")
30+
)
31+
assert {
32+
"changed": [],
33+
"removed": [],
34+
"added": [{"age": "2", "id": "2", "name": "Pancakes"}],
35+
} == diff
36+
37+
38+
def test_row_removed():
39+
diff = compare(
40+
load_csv(io.StringIO(TWO), key="id"), load_csv(io.StringIO(THREE), key="id")
41+
)
42+
assert {
43+
"changed": [],
44+
"removed": [{"age": "2", "id": "2", "name": "Pancakes"}],
45+
"added": [],
46+
} == diff

tests/test_human_text.py

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from csv_diff import load_csv, compare, human_text
2+
from .test_csv_diff import ONE, TWO, THREE
3+
import io
4+
5+
6+
def test_row_changed():
7+
diff = compare(
8+
load_csv(io.StringIO(ONE), key="id"), load_csv(io.StringIO(TWO), key="id")
9+
)
10+
assert (
11+
'1 row changed\n\n1 row changed\n\n Row 1\n age: "4" => "5"'
12+
== human_text(diff)
13+
)
14+
15+
16+
def test_row_added():
17+
diff = compare(
18+
load_csv(io.StringIO(THREE), key="id"), load_csv(io.StringIO(TWO), key="id")
19+
)
20+
assert (
21+
'1 row added\n\n1 row added\n\n {"id": "2", "name": "Pancakes", "age": "2"}'
22+
== human_text(diff)
23+
)
24+
25+
26+
def test_row_removed():
27+
diff = compare(
28+
load_csv(io.StringIO(TWO), key="id"), load_csv(io.StringIO(THREE), key="id")
29+
)
30+
assert (
31+
'1 row removed\n\n1 row removed\n\n {"id": "2", "name": "Pancakes", "age": "2"}'
32+
== human_text(diff)
33+
)

0 commit comments

Comments
 (0)