Skip to content

Commit 8badf9c

Browse files
committed
Merge #565 from branch '496-csvEscaping' of github.com:metafacture/metafacture-core
2 parents e7bfeef + 6f39fcc commit 8badf9c

File tree

3 files changed

+40
-6
lines changed

3 files changed

+40
-6
lines changed

metafacture-csv/build.gradle

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ description = 'Modules for processing comma-separated values'
1919

2020
dependencies {
2121
api project(':metafacture-framework')
22-
implementation 'com.opencsv:opencsv:3.10'
22+
implementation 'com.opencsv:opencsv:5.9'
2323
testImplementation "junit:junit:${versions.junit}"
2424
testImplementation "org.mockito:mockito-core:${versions.mockito}"
2525
}

metafacture-csv/src/main/java/org/metafacture/csv/CsvDecoder.java

+21-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2013, 2014 Deutsche Nationalbibliothek
2+
* Copyright 2013-2024 Deutsche Nationalbibliothek and hbz
33
*
44
* Licensed under the Apache License, Version 2.0 the "License";
55
* you may not use this file except in compliance with the License.
@@ -24,6 +24,10 @@
2424
import org.metafacture.framework.helpers.DefaultObjectPipe;
2525

2626
import com.opencsv.CSVReader;
27+
import com.opencsv.CSVReaderBuilder;
28+
import com.opencsv.RFC4180Parser;
29+
import com.opencsv.RFC4180ParserBuilder;
30+
import com.opencsv.exceptions.CsvException;
2731

2832
import java.io.IOException;
2933
import java.io.StringReader;
@@ -48,6 +52,7 @@ public final class CsvDecoder extends DefaultObjectPipe<String, StreamReceiver>
4852
private String[] header = new String[0];
4953
private int count;
5054
private boolean hasHeader;
55+
private RFC4180Parser parser;
5156

5257
/**
5358
* Creates an instance of {@link CsvDecoder} with a given separator.
@@ -56,6 +61,7 @@ public final class CsvDecoder extends DefaultObjectPipe<String, StreamReceiver>
5661
*/
5762
public CsvDecoder(final String separator) {
5863
this.separator = separator.charAt(0);
64+
initializeCsvParser();
5965
}
6066

6167
/**
@@ -65,13 +71,21 @@ public CsvDecoder(final String separator) {
6571
*/
6672
public CsvDecoder(final char separator) {
6773
this.separator = separator;
74+
initializeCsvParser();
6875
}
6976

7077
/**
7178
* Creates an instance of {@link CsvDecoder}. The default separator is
7279
* {@value #DEFAULT_SEP}.
7380
*/
7481
public CsvDecoder() {
82+
initializeCsvParser();
83+
}
84+
85+
private void initializeCsvParser() {
86+
this.parser = new RFC4180ParserBuilder()
87+
.withSeparator(separator)
88+
.build();
7589
}
7690

7791
@Override
@@ -105,18 +119,19 @@ else if (parts.length == header.length) {
105119
}
106120
}
107121

108-
private String[] parseCsv(final String string) {
122+
private String[] parseCsv(final String csv) {
109123
String[] parts = new String[0];
110124
try {
111-
final CSVReader reader = new CSVReader(new StringReader(string),
112-
separator);
125+
final CSVReader reader = new CSVReaderBuilder(new StringReader(csv))
126+
.withCSVParser(parser)
127+
.build();
113128
final List<String[]> lines = reader.readAll();
114129
if (lines.size() > 0) {
115130
parts = lines.get(0);
116131
}
117132
reader.close();
118133
}
119-
catch (final IOException e) {
134+
catch (final IOException | CsvException e) {
120135
e.printStackTrace();
121136
}
122137
return parts;
@@ -139,5 +154,6 @@ public void setHasHeader(final boolean hasHeader) {
139154
*/
140155
public void setSeparator(final String separator) {
141156
this.separator = separator.charAt(0);
157+
initializeCsvParser();
142158
}
143159
}

metafacture-csv/src/test/java/org/metafacture/csv/CsvDecoderTest.java

+18
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,22 @@ public void testTabSeparated() {
8989
ordered.verify(receiver).endRecord();
9090
}
9191

92+
/**
93+
* In: "a","b\t","c\\t","\","\cd\"
94+
* Out: a, b , c\\t, \, \cd\
95+
*/
96+
@Test
97+
public void issue496_escaping() {
98+
decoder.setHasHeader(false);
99+
decoder.process("\"a\",\"b\t\",\"c\\t\",\"\\\",\"\\cd\\\"");
100+
final InOrder ordered = inOrder(receiver);
101+
ordered.verify(receiver).startRecord("1");
102+
ordered.verify(receiver).literal("0", "a");
103+
ordered.verify(receiver).literal("1", "b\t");
104+
ordered.verify(receiver).literal("2", "c\\t");
105+
ordered.verify(receiver).literal("3", "\\");
106+
ordered.verify(receiver).literal("4", "\\cd\\");
107+
ordered.verify(receiver).endRecord();
108+
}
109+
92110
}

0 commit comments

Comments
 (0)