Skip to content

Commit 87ea98f

Browse files
authored
HPCC4J-569 Correct EOS character handling (#676)
- Modified BinaryRecordWriter to only truncate strings at EOS in var strings - Added unit test that verifies read / write all of Unicode chars and mid EOS Signed-off-by: James McMullan [email protected] Signed-off-by: James McMullan [email protected]
1 parent f78ecb4 commit 87ea98f

File tree

2 files changed

+128
-16
lines changed

2 files changed

+128
-16
lines changed

dfsclient/src/main/java/org/hpccsystems/dfs/client/BinaryRecordWriter.java

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
/**
3535
* Serializes records into the provided OutputStream utilizing the provided IRecordAccessor to access record data.
36-
*
36+
*
3737
* The IRecordAccessor must match the type of records that are provided to {@link #writeRecord(Object) writeRecord}.
3838
* The data written to the OutputStream will be in the HPCC Systems binary record format.
3939
*/
@@ -52,6 +52,8 @@ public class BinaryRecordWriter implements IRecordWriter
5252
private static final int QSTR_COMPRESSED_CHUNK_LEN = 3;
5353
private static final int QSTR_EXPANDED_CHUNK_LEN = 4;
5454

55+
private static final byte NULL_TERMINATOR = '\0';
56+
5557
private byte[] scratchBuffer = new byte[SCRATCH_BUFFER_SIZE];
5658

5759
private OutputStream outputStream = null;
@@ -104,7 +106,7 @@ public BinaryRecordWriter(OutputStream output, ByteOrder byteOrder) throws Excep
104106

105107
/*
106108
* (non-Javadoc)
107-
*
109+
*
108110
* @see org.hpccsystems.dfs.client.IRecordWriter#initialize(org.hpccsystems.dfs.client.IRecordAccessor)
109111
*/
110112
public void initialize(IRecordAccessor recordAccessor)
@@ -307,7 +309,7 @@ private void writeField(FieldDef fd, Object fieldValue) throws Exception
307309
fillLength = SCRATCH_BUFFER_SIZE;
308310
}
309311

310-
Arrays.fill(scratchBuffer, 0, fillLength, (byte) '\0');
312+
Arrays.fill(scratchBuffer, 0, fillLength, NULL_TERMINATOR);
311313
writeByteArray(scratchBuffer, 0, fillLength);
312314
numFillBytes -= fillLength;
313315
}
@@ -335,7 +337,7 @@ private void writeField(FieldDef fd, Object fieldValue) throws Exception
335337
case FILEPOS:
336338
{
337339
Long value = null;
338-
if (fieldValue==null)
340+
if (fieldValue==null)
339341
{
340342
value=Long.valueOf(0);
341343
}
@@ -391,7 +393,7 @@ else if (fd.getDataLen() < 8 && fd.getDataLen() > 0)
391393
{
392394
this.buffer.put((byte) ((value >> (i*8)) & 0xFF));
393395
}
394-
396+
395397
long signBit = value < 0 ? 0x80L : 0;
396398
this.buffer.put((byte) (((value >> (lastByteIdx*8)) & 0xFF) | signBit));
397399
}
@@ -436,8 +438,8 @@ else if (fd.getDataLen() == 8)
436438
}
437439
case CHAR:
438440
{
439-
byte c='\0';
440-
if (fieldValue!=null)
441+
byte c = NULL_TERMINATOR;
442+
if (fieldValue!=null)
441443
{
442444
String value = (String) fieldValue;
443445
c = (byte) value.charAt(0);
@@ -449,6 +451,15 @@ else if (fd.getDataLen() == 8)
449451
case STRING:
450452
{
451453
String value = fieldValue != null ? (String) fieldValue : "";
454+
if (fd.getFieldType() == FieldType.VAR_STRING)
455+
{
456+
int eosIdx = value.indexOf(NULL_TERMINATOR);
457+
if (eosIdx > -1)
458+
{
459+
value = value.substring(0,eosIdx);
460+
}
461+
}
462+
452463
byte[] data = new byte[0];
453464
if (fd.getSourceType() == HpccSrcType.UTF16LE)
454465
{
@@ -475,7 +486,7 @@ else if (fd.getSourceType() == HpccSrcType.QSTRING)
475486
int compressedDataLen = tempData.length * QSTR_COMPRESSED_CHUNK_LEN + (QSTR_EXPANDED_CHUNK_LEN-1);
476487
compressedDataLen /= QSTR_EXPANDED_CHUNK_LEN;
477488
data = new byte[compressedDataLen];
478-
489+
479490
int bitOffset = 0;
480491
for (int i = 0; i < tempData.length; i++)
481492
{
@@ -491,7 +502,7 @@ else if (fd.getSourceType() == HpccSrcType.QSTRING)
491502
case 2:
492503
// The top 4 bits of Char 2 are in the bot 4 bits of byte1
493504
data[byteIdx] |= (byte) ((qstrByteValue & 0x3C) >> 2);
494-
505+
495506
// The bot 2 bits of Char 2 are in the top 2 bits of byte2
496507
data[byteIdx+1] = (byte) ((qstrByteValue & 0x3) << 6);
497508
break;
@@ -541,10 +552,10 @@ else if (fd.getSourceType() == HpccSrcType.QSTRING)
541552
{
542553
if (fd.getFieldType() == FieldType.VAR_STRING && bytesToWrite > 0)
543554
{
544-
data[bytesToWrite - 1] = '\0';
555+
data[bytesToWrite - 1] = NULL_TERMINATOR;
545556
if (fd.getSourceType().isUTF16() && bytesToWrite > 1)
546557
{
547-
data[bytesToWrite - 2] = '\0';
558+
data[bytesToWrite - 2] = NULL_TERMINATOR;
548559
}
549560
}
550561

@@ -562,7 +573,7 @@ else if (fd.getSourceType() == HpccSrcType.QSTRING)
562573
fillLength = SCRATCH_BUFFER_SIZE;
563574
}
564575

565-
Arrays.fill(scratchBuffer, 0, fillLength, (byte) '\0');
576+
Arrays.fill(scratchBuffer, 0, fillLength, NULL_TERMINATOR);
566577
writeByteArray(scratchBuffer, 0, fillLength);
567578
numFillBytes -= fillLength;
568579
}
@@ -576,11 +587,25 @@ else if (fd.getSourceType() == HpccSrcType.QSTRING)
576587

577588
if (fd.getFieldType() == FieldType.VAR_STRING)
578589
{
579-
byte nullByte = '\0';
580-
this.buffer.put(nullByte);
581590
if (fd.getSourceType().isUTF16())
582591
{
583-
this.buffer.put(nullByte);
592+
boolean needsNullAdded = data.length < 2
593+
|| data[data.length - 1] != NULL_TERMINATOR
594+
|| data[data.length - 2] != NULL_TERMINATOR;
595+
if (needsNullAdded)
596+
{
597+
this.buffer.put(NULL_TERMINATOR);
598+
this.buffer.put(NULL_TERMINATOR);
599+
}
600+
}
601+
else
602+
{
603+
boolean needsNullAdded = data.length < 1
604+
|| data[data.length - 1] != NULL_TERMINATOR;
605+
if (needsNullAdded)
606+
{
607+
this.buffer.put(NULL_TERMINATOR);
608+
}
584609
}
585610
}
586611
}
@@ -885,7 +910,7 @@ private void writeDecimal(FieldDef fd, BigDecimal decimalValue)
885910

886911
// 1e18
887912
BigInteger divisor = BigInteger.valueOf(1000000000000000000L);
888-
for (int currentDigit = 0; currentDigit < desiredPrecision;)
913+
for (int currentDigit = 0; currentDigit < desiredPrecision;)
889914
{
890915
// Consume 18 digits at a time
891916
BigInteger[] quotientRemainder = unscaledInt.divideAndRemainder(divisor);

dfsclient/src/test/java/org/hpccsystems/dfs/client/DFSReadWriteTest.java

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,93 @@ public void readWithForcedTimeoutTest() throws Exception
8585
assertEquals("Not all records loaded",expectedCounts[0], records.size());
8686
}
8787

88+
@Test
89+
public void nullCharTests() throws Exception
90+
{
91+
// Unicode
92+
{
93+
FieldDef recordDef = null;
94+
{
95+
FieldDef[] fieldDefs = new FieldDef[2];
96+
fieldDefs[0] = new FieldDef("uni", FieldType.STRING, "STRING", 100, false, false, HpccSrcType.UTF16LE, new FieldDef[0]);
97+
fieldDefs[1] = new FieldDef("fixedUni", FieldType.STRING, "STRING", 100, true, false, HpccSrcType.UTF16LE, new FieldDef[0]);
98+
99+
recordDef = new FieldDef("RootRecord", FieldType.RECORD, "rec", 4, false, false, HpccSrcType.LITTLE_ENDIAN, fieldDefs);
100+
}
101+
102+
List<HPCCRecord> records = new ArrayList<HPCCRecord>();
103+
int maxUTF16BMPChar = Character.MAX_CODE_POINT;
104+
for (int i = 0; i < maxUTF16BMPChar; i++) {
105+
String strMidEOS = "";
106+
for (int j = 0; j < 98; j++, i++) {
107+
if (j == 50) {
108+
strMidEOS += "\0";
109+
}
110+
strMidEOS += Character.toString((char) i);
111+
}
112+
113+
Object[] fields = {strMidEOS, strMidEOS};
114+
records.add(new HPCCRecord(fields, recordDef));
115+
}
116+
117+
String fileName = "unicode::all_chars::test";
118+
writeFile(records, fileName, recordDef, connTO);
119+
120+
HPCCFile file = new HPCCFile(fileName, connString , hpccUser, hpccPass);
121+
List<HPCCRecord> readRecords = readFile(file, 10000, false, false, BinaryRecordReader.TRIM_STRINGS);
122+
123+
for (int i = 0; i < records.size(); i++) {
124+
HPCCRecord record = records.get(i);
125+
HPCCRecord readRecord = readRecords.get(i);
126+
if (readRecord.equals(record) == false)
127+
{
128+
System.out.println("Record: " + i + " did not match\n" + record + "\n" + readRecord);
129+
}
130+
}
131+
}
132+
133+
// SBC / ASCII
134+
{
135+
FieldDef recordDef = null;
136+
{
137+
FieldDef[] fieldDefs = new FieldDef[2];
138+
fieldDefs[0] = new FieldDef("str", FieldType.STRING, "STRING", 10, false, false, HpccSrcType.SINGLE_BYTE_CHAR, new FieldDef[0]);
139+
fieldDefs[1] = new FieldDef("fixedStr", FieldType.STRING, "STRING", 10, true, false, HpccSrcType.SINGLE_BYTE_CHAR, new FieldDef[0]);
140+
141+
recordDef = new FieldDef("RootRecord", FieldType.RECORD, "rec", 4, false, false, HpccSrcType.LITTLE_ENDIAN, fieldDefs);
142+
}
143+
144+
List<HPCCRecord> records = new ArrayList<HPCCRecord>();
145+
for (int i = 0; i < 255; i++) {
146+
String strMidEOS = "";
147+
for (int j = 0; j < 9; j++, i++) {
148+
if (j == 5) {
149+
strMidEOS += "\0";
150+
}
151+
strMidEOS += Character.toString((char) i);
152+
}
153+
154+
Object[] fields = {strMidEOS, strMidEOS};
155+
records.add(new HPCCRecord(fields, recordDef));
156+
}
157+
158+
String fileName = "ascii::all_chars::test";
159+
writeFile(records, fileName, recordDef, connTO);
160+
161+
HPCCFile file = new HPCCFile(fileName, connString , hpccUser, hpccPass);
162+
List<HPCCRecord> readRecords = readFile(file, 10000, false, false, BinaryRecordReader.TRIM_STRINGS);
163+
164+
for (int i = 0; i < records.size(); i++) {
165+
HPCCRecord record = records.get(i);
166+
HPCCRecord readRecord = readRecords.get(i);
167+
if (readRecord.equals(record) == false)
168+
{
169+
System.out.println("Record: " + i + " did not match\n" + record + "\n" + readRecord);
170+
}
171+
}
172+
}
173+
}
174+
88175
@Test
89176
public void integrationReadWriteBackTest() throws Exception
90177
{

0 commit comments

Comments
 (0)