Skip to content

Commit a63e180

Browse files
authored
Merge pull request #88 from brain-geek/master
Change algorithm for detecting separator
2 parents 942070b + a414786 commit a63e180

File tree

2 files changed

+18
-1
lines changed

2 files changed

+18
-1
lines changed

lib/csv_importer/csv_reader.rb

+12-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,18 @@ def sanitize_content(csv_content)
5353
SEPARATORS = [",", ";", "\t"]
5454

5555
def detect_separator(csv_content)
56-
SEPARATORS.sort_by { |separator| csv_content.count(separator) }.last
56+
SEPARATORS.min_by do |separator|
57+
csv_content.count(separator)
58+
59+
all_lines = csv_content.lines
60+
base_number = all_lines.first.count(separator)
61+
62+
if base_number.zero?
63+
Float::MAX
64+
else
65+
all_lines.map{|line| line.count(separator) - base_number }.map(&:abs).inject(0) { |sum, i| sum + i }
66+
end
67+
end
5768
end
5869

5970
# Remove trailing white spaces and ensure we always return a string

spec/csv_importer/csv_reader_spec.rb

+6
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@ module CSVImporter
2424
expect(reader.header).to eq ["email", "first_name", "last_name"]
2525
end
2626

27+
it "supports semicolon separated csv when content has lot of commas" do
28+
reader = CSVReader.new(content: "email;first_name;last_name;letter_ids\n
29+
[email protected];Peter;Stone;1,2,3,4,5,6,7,8,9,10,11,12,13,14")
30+
expect(reader.header).to eq ["email", "first_name", "last_name", "letter_ids"]
31+
end
32+
2733
it "supports tab separated csv" do
2834
reader = CSVReader.new(content: "email\tfirst_name\tlast_name")
2935
expect(reader.header).to eq ["email", "first_name", "last_name"]

0 commit comments

Comments
 (0)