Skip to content

Commit c3d32f7

Browse files
committed
allow specifying encoding in cli
1 parent 33e0a59 commit c3d32f7

File tree

2 files changed

+48
-3
lines changed

2 files changed

+48
-3
lines changed

csv_diff/cli.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,12 @@
4242
is_flag=True,
4343
help="Show unchanged fields for rows with at least one change",
4444
)
45-
def cli(previous, current, key, format, json, singular, plural, show_unchanged):
45+
@click.option(
46+
"--encoding",
47+
default=None,
48+
help="Specify text encoding of the csv files",
49+
)
50+
def cli(previous, current, key, format, json, singular, plural, show_unchanged, encoding):
4651
"Diff two CSV or JSON files"
4752
dialect = {
4853
"csv": "excel",
@@ -51,10 +56,10 @@ def cli(previous, current, key, format, json, singular, plural, show_unchanged):
5156

5257
def load(filename):
5358
if format == "json":
54-
return load_json(open(filename), key=key)
59+
return load_json(open(filename, encoding=encoding), key=key)
5560
else:
5661
return load_csv(
57-
open(filename, newline=""), key=key, dialect=dialect.get(format)
62+
open(filename, newline="", encoding=encoding), key=key, dialect=dialect.get(format)
5863
)
5964

6065
diff = compare(load(previous), load(current), show_unchanged)

tests/test_cli.py

+40
Original file line numberDiff line numberDiff line change
@@ -234,3 +234,43 @@ def test_semicolon_delimited(tmpdir):
234234
"columns_added": [],
235235
"columns_removed": [],
236236
} == json.loads(result.output.strip())
237+
238+
239+
def test_human_cli_non_utf8_encoding(tmpdir):
240+
# This test confirms the ability to parse csv files that are not encoded using utf-8.
241+
# The names in the files contain characters that would cause UnicodeDecodeErrors if they
242+
# are encoeded using cp1252 and then parsed using utf-8.
243+
encoding = "cp1252"
244+
one = tmpdir / "one.csv"
245+
two = tmpdir / "two.csv"
246+
one.write_binary(
247+
dedent(
248+
"""
249+
id;name
250+
1;José
251+
"""
252+
).strip().encode(encoding)
253+
)
254+
two.write_binary(
255+
dedent(
256+
"""
257+
id;name
258+
1;Ángela
259+
"""
260+
).strip().encode(encoding)
261+
)
262+
result = CliRunner().invoke(
263+
cli.cli, [str(one), str(two), "--key", "id", "--encoding", encoding], catch_exceptions=False
264+
)
265+
assert 0 == result.exit_code
266+
assert (
267+
dedent(
268+
"""
269+
1 row changed
270+
271+
id: 1
272+
name: "José" => "Ángela"
273+
"""
274+
).strip()
275+
== result.output.strip()
276+
)

0 commit comments

Comments
 (0)