Skip to content

Commit 4fb6a36

Browse files
committed
Handle columns being added and removed
My sf-tree-history project unexpectedly broke because the columns in the CSV file changed. https://circleci.com/gh/simonw/sf-tree-history/110 csv-diff can now detect and handle this without crashing.
1 parent e8934a0 commit 4fb6a36

File tree

5 files changed

+146
-13
lines changed

5 files changed

+146
-13
lines changed

README.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ You can also run it using the `--json` option to get a machine-readable differen
7171
]
7272
}
7373
}
74-
]
74+
],
75+
"columns_added": [],
76+
"columns_removed": []
7577
}
7678

7779
You can also import the Python library into your own code like so:
@@ -83,3 +85,5 @@ You can also import the Python library into your own code like so:
8385
)
8486

8587
`diff` will now contain the same data structure as the output in the `--json` example above.
88+
89+
If the columns in the CSV have changed, those added or removed olumns will be ignored when calculating changes made to specific rows.

csv_diff/__init__.py

+52-11
Original file line numberDiff line numberDiff line change
@@ -16,30 +16,49 @@ def load_csv(fp, key=None):
1616

1717

1818
def compare(previous, current):
19+
result = {
20+
"added": [],
21+
"removed": [],
22+
"changed": [],
23+
"columns_added": [],
24+
"columns_removed": [],
25+
}
26+
# Have the columns changed?
27+
previous_columns = set(next(iter(previous.values())).keys())
28+
current_columns = set(next(iter(current.values())).keys())
29+
ignore_columns = None
30+
if previous_columns != current_columns:
31+
result["columns_added"] = [
32+
c for c in current_columns if c not in previous_columns
33+
]
34+
result["columns_removed"] = [
35+
c for c in previous_columns if c not in current_columns
36+
]
37+
ignore_columns = current_columns.symmetric_difference(previous_columns)
1938
# Have any rows been removed or added?
2039
removed = [id for id in previous if id not in current]
2140
added = [id for id in current if id not in previous]
2241
# How about changed?
2342
removed_or_added = set(removed) | set(added)
2443
potential_changes = [id for id in current if id not in removed_or_added]
2544
changed = [id for id in potential_changes if current[id] != previous[id]]
26-
result = {"added": [], "removed": [], "changed": []}
2745
if added:
2846
result["added"] = [current[id] for id in added]
2947
if removed:
3048
result["removed"] = [previous[id] for id in removed]
3149
if changed:
3250
for id in changed:
33-
d = list(diff(previous[id], current[id]))
34-
result["changed"].append(
35-
{
36-
"key": id,
37-
"changes": {
38-
field: [prev_value, current_value]
39-
for _, field, (prev_value, current_value) in d
40-
},
41-
}
42-
)
51+
d = list(diff(previous[id], current[id], ignore=ignore_columns))
52+
if d:
53+
result["changed"].append(
54+
{
55+
"key": id,
56+
"changes": {
57+
field: [prev_value, current_value]
58+
for _, field, (prev_value, current_value) in d
59+
},
60+
}
61+
)
4362
return result
4463

4564

@@ -49,6 +68,28 @@ def human_text(result, key=None, singular=None, plural=None):
4968
title = []
5069
summary = []
5170
show_headers = sum(1 for key in result if result[key]) > 1
71+
if result["columns_added"]:
72+
fragment = "{} {} added".format(
73+
len(result["columns_added"]),
74+
"column" if len(result["columns_added"]) == 1 else "columns",
75+
)
76+
title.append(fragment)
77+
summary.extend(
78+
[fragment]
79+
+ [" {}".format(c) for c in sorted(result["columns_added"])]
80+
+ [""]
81+
)
82+
if result["columns_removed"]:
83+
fragment = "{} {} removed".format(
84+
len(result["columns_removed"]),
85+
"column" if len(result["columns_removed"]) == 1 else "columns",
86+
)
87+
title.append(fragment)
88+
summary.extend(
89+
[fragment]
90+
+ [" {}".format(c) for c in sorted(result["columns_removed"])]
91+
+ [""]
92+
)
5293
if result["changed"]:
5394
fragment = "{} {} changed".format(
5495
len(result["changed"]), singular if len(result["changed"]) == 1 else plural

tests/test_cli.py

+2
Original file line numberDiff line numberDiff line change
@@ -71,4 +71,6 @@ def test_human_cli_json(tmpdir):
7171
"added": [],
7272
"removed": [],
7373
"changed": [{"key": "1", "changes": {"age": ["4", "5"]}}],
74+
"columns_added": [],
75+
"columns_removed": [],
7476
} == json.loads(result.output.strip())

tests/test_csv_diff.py

+27
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@
2727
1,Cleo,5
2828
3,Bailey,1"""
2929

30+
SEVEN = """id,name,weight
31+
1,Cleo,48
32+
3,Bailey,20"""
33+
34+
EIGHT = """id,name,age,length
35+
3,Bailee,1,100
36+
4,Bob,7,422"""
37+
3038

3139
def test_row_changed():
3240
diff = compare(
@@ -36,6 +44,8 @@ def test_row_changed():
3644
"added": [],
3745
"removed": [],
3846
"changed": [{"key": "1", "changes": {"age": ["4", "5"]}}],
47+
"columns_added": [],
48+
"columns_removed": [],
3949
} == diff
4050

4151

@@ -47,6 +57,8 @@ def test_row_added():
4757
"changed": [],
4858
"removed": [],
4959
"added": [{"age": "2", "id": "2", "name": "Pancakes"}],
60+
"columns_added": [],
61+
"columns_removed": [],
5062
} == diff
5163

5264

@@ -58,4 +70,19 @@ def test_row_removed():
5870
"changed": [],
5971
"removed": [{"age": "2", "id": "2", "name": "Pancakes"}],
6072
"added": [],
73+
"columns_added": [],
74+
"columns_removed": [],
75+
} == diff
76+
77+
78+
def test_columns_changed():
79+
diff = compare(
80+
load_csv(io.StringIO(SIX), key="id"), load_csv(io.StringIO(SEVEN), key="id")
81+
)
82+
assert {
83+
"changed": [],
84+
"removed": [],
85+
"added": [],
86+
"columns_added": ["weight"],
87+
"columns_removed": ["age"],
6188
} == diff

tests/test_human_text.py

+60-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from csv_diff import load_csv, compare, human_text
2-
from .test_csv_diff import ONE, TWO, THREE, FOUR, FIVE, SIX
2+
from .test_csv_diff import ONE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT
33
from textwrap import dedent
44
import io
55

@@ -113,3 +113,62 @@ def test_row_changed_and_row_added_and_row_deleted():
113113
).strip()
114114
== human_text(diff, "id")
115115
)
116+
117+
118+
def test_columns_changed():
119+
diff = compare(
120+
load_csv(io.StringIO(SIX), key="id"), load_csv(io.StringIO(SEVEN), key="id")
121+
)
122+
assert (
123+
dedent(
124+
"""
125+
1 column added, 1 column removed
126+
127+
1 column added
128+
weight
129+
130+
1 column removed
131+
age
132+
"""
133+
).strip()
134+
== human_text(diff, "id")
135+
)
136+
137+
138+
def test_columns_and_rows_changed():
139+
diff = compare(
140+
load_csv(io.StringIO(SEVEN), key="id"), load_csv(io.StringIO(EIGHT), key="id")
141+
)
142+
assert (
143+
dedent(
144+
"""
145+
2 columns added, 1 column removed, 1 row changed, 1 row added, 1 row removed
146+
147+
2 columns added
148+
age
149+
length
150+
151+
1 column removed
152+
weight
153+
154+
1 row changed
155+
156+
id: 3
157+
name: "Bailey" => "Bailee"
158+
159+
1 row added
160+
161+
id: 4
162+
name: Bob
163+
age: 7
164+
length: 422
165+
166+
1 row removed
167+
168+
id: 1
169+
name: Cleo
170+
weight: 48
171+
"""
172+
).strip()
173+
== human_text(diff, "id")
174+
)

0 commit comments

Comments
 (0)