From f914fdd7bc33e095d94098268ac7c941f0362b2c Mon Sep 17 00:00:00 2001 From: Yuzhan Jiang Date: Thu, 26 Dec 2024 14:06:45 -0800 Subject: [PATCH] CSV-196-master: More changes --- .../org/apache/commons/csv/CSVParser.java | 5 +++-- .../org/apache/commons/csv/CSVRecord.java | 20 +++++-------------- .../commons/csv/ExtendedBufferedReader.java | 4 ++-- .../org/apache/commons/csv/CSVParserTest.java | 16 +++++++-------- .../org/apache/commons/csv/CSVRecordTest.java | 2 +- .../apache/commons/csv/JiraCsv196Test.java | 4 ++-- 6 files changed, 21 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 9ff28a96ae..50230388f8 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -539,13 +539,14 @@ public CSVParser(final Reader reader, final CSVFormat format, final long charact * @param recordNumber * The next record number to assign. * @param charset - * The character encoding to be used for the reader. + * The character encoding to be used for the reader when enableByteTracking is true. + * @param enableByteTracking + * {@code true} to enable byte tracking for the parser; {@code false} to disable it. * @throws IllegalArgumentException * If the parameters of the format are inconsistent or if either the reader or format is null. * @throws IOException * If there is a problem reading the header or skipping the first record. * @throws CSVException Thrown on invalid input. - * @since 1.13.0. */ private CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber, final Charset charset, final boolean enableByteTracking) diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java index 54c88812f0..386a25c852 100644 --- a/src/main/java/org/apache/commons/csv/CSVRecord.java +++ b/src/main/java/org/apache/commons/csv/CSVRecord.java @@ -51,7 +51,7 @@ public final class CSVRecord implements Serializable, Iterable { /** * The start byte of this record as a character byte in the source stream. */ - private final long characterByte; + private final long bytePosition; /** The accumulated comments (if any) */ private final String comment; @@ -65,24 +65,14 @@ public final class CSVRecord implements Serializable, Iterable { /** The parser that originates this record. This is not serialized. */ private final transient CSVParser parser; - CSVRecord(final CSVParser parser, final String[] values, final String comment, final long recordNumber, - final long characterPosition) { - this.recordNumber = recordNumber; - this.values = values != null ? values : Constants.EMPTY_STRING_ARRAY; - this.parser = parser; - this.comment = comment; - this.characterPosition = characterPosition; - this.characterByte = 0L; - } - CSVRecord(final CSVParser parser, final String[] values, final String comment, final long recordNumber, - final long characterPosition, final long characterByte) { + final long characterPosition, final long bytePosition) { this.recordNumber = recordNumber; this.values = values != null ? values : Constants.EMPTY_STRING_ARRAY; this.parser = parser; this.comment = comment; this.characterPosition = characterPosition; - this.characterByte = characterByte; + this.bytePosition = bytePosition; } /** * Returns a value by {@link Enum}. @@ -164,8 +154,8 @@ public long getCharacterPosition() { * * @return the start byte of this record as a character byte in the source stream. */ - public long getCharacterByte() { - return characterByte; + public long getBytePosition() { + return bytePosition; } /** diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java index 61f6ae2f3e..24044966d1 100644 --- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java +++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java @@ -147,7 +147,7 @@ public int read() throws IOException { lineNumber++; } if (encoder != null) { - this.bytesRead += getCharBytes(current); + this.bytesRead += getEncodedCharLength(current); } lastChar = current; position++; @@ -180,7 +180,7 @@ public int read() throws IOException { * @return the byte length of the character. * @throws CharacterCodingException if the character cannot be encoded. */ - private long getCharBytes(int current) throws CharacterCodingException { + private int getEncodedCharLength(int current) throws CharacterCodingException { final char cChar = (char) current; final char lChar = (char) lastChar; if (!Character.isSurrogate(cChar)) { diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index 7e3cafa65c..ac3708a52a 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -718,22 +718,22 @@ public void testGetRecordThreeBytesRead() throws Exception { assertNotNull(record = parser.nextRecord()); assertEquals(1, record.getRecordNumber()); assertEquals(code.indexOf('i'), record.getCharacterPosition()); - assertEquals(record.getCharacterByte(), record.getCharacterPosition()); + assertEquals(record.getBytePosition(), record.getCharacterPosition()); assertNotNull(record = parser.nextRecord()); assertEquals(2, record.getRecordNumber()); assertEquals(code.indexOf('1'), record.getCharacterPosition()); - assertEquals(record.getCharacterByte(), record.getCharacterPosition()); + assertEquals(record.getBytePosition(), record.getCharacterPosition()); assertNotNull(record = parser.nextRecord()); assertEquals(3, record.getRecordNumber()); assertEquals(code.indexOf('2'), record.getCharacterPosition()); - assertEquals(record.getCharacterByte(), 95); + assertEquals(record.getBytePosition(), 95); assertNotNull(record = parser.nextRecord()); assertEquals(4, record.getRecordNumber()); assertEquals(code.indexOf('3'), record.getCharacterPosition()); - assertEquals(record.getCharacterByte(), 154); + assertEquals(record.getBytePosition(), 154); }; } @@ -755,20 +755,20 @@ public void testGetRecordFourBytesRead() throws Exception { assertNotNull(record = parser.nextRecord()); assertEquals(1, record.getRecordNumber()); assertEquals(code.indexOf('i'), record.getCharacterPosition()); - assertEquals(record.getCharacterByte(), record.getCharacterPosition()); + assertEquals(record.getBytePosition(), record.getCharacterPosition()); assertNotNull(record = parser.nextRecord()); assertEquals(2, record.getRecordNumber()); assertEquals(code.indexOf('1'), record.getCharacterPosition()); - assertEquals(record.getCharacterByte(), record.getCharacterPosition()); + assertEquals(record.getBytePosition(), record.getCharacterPosition()); assertNotNull(record = parser.nextRecord()); assertEquals(3, record.getRecordNumber()); assertEquals(code.indexOf('2'), record.getCharacterPosition()); - assertEquals(record.getCharacterByte(), 26); + assertEquals(record.getBytePosition(), 26); assertNotNull(record = parser.nextRecord()); assertEquals(4, record.getRecordNumber()); assertEquals(code.indexOf('3'), record.getCharacterPosition()); - assertEquals(record.getCharacterByte(), 43); + assertEquals(record.getBytePosition(), 43); } } diff --git a/src/test/java/org/apache/commons/csv/CSVRecordTest.java b/src/test/java/org/apache/commons/csv/CSVRecordTest.java index 5b0c5d812c..40c057e9b8 100644 --- a/src/test/java/org/apache/commons/csv/CSVRecordTest.java +++ b/src/test/java/org/apache/commons/csv/CSVRecordTest.java @@ -85,7 +85,7 @@ record = parser.iterator().next(); @Test public void testCSVRecordNULLValues() throws IOException { try (CSVParser parser = CSVParser.parse("A,B\r\nONE,TWO", CSVFormat.DEFAULT.withHeader())) { - final CSVRecord csvRecord = new CSVRecord(parser, null, null, 0L, 0L); + final CSVRecord csvRecord = new CSVRecord(parser, null, null, 0L, 0L, 0L); assertEquals(0, csvRecord.size()); assertThrows(IllegalArgumentException.class, () -> csvRecord.get("B")); } diff --git a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java index a49d934cfc..150a5f7f13 100644 --- a/src/test/java/org/apache/commons/csv/JiraCsv196Test.java +++ b/src/test/java/org/apache/commons/csv/JiraCsv196Test.java @@ -42,7 +42,7 @@ public void parseThreeBytes() throws IOException { long[] charByteKey = {0, 89, 242, 395}; int idx = 0; for (CSVRecord record : parser) { - assertEquals(charByteKey[idx++], record.getCharacterByte()); + assertEquals(charByteKey[idx++], record.getBytePosition()); } parser.close(); } @@ -63,7 +63,7 @@ public void parseFourBytes() throws IOException { long[] charByteKey = {0, 84, 701, 1318, 1935}; int idx = 0; for (CSVRecord record : parser) { - assertEquals(charByteKey[idx++], record.getCharacterByte()); + assertEquals(charByteKey[idx++], record.getBytePosition()); } parser.close(); }