Skip to content

Commit 5e936f0

Browse files
committed
HPCC4J-693 DFSClient: Issue reading long null terminated strings
- Fixed potential issue where reading long null terminated strings can cause misalignment within the read stream - Fixed an issue with QStrings where buffer stalls during reading can cause misalignments - Cleaned up null terminated string reading - Added check for max string length Signed-off-by: James McMullan [email protected]
1 parent 0e9372f commit 5e936f0

File tree

2 files changed

+85
-85
lines changed

2 files changed

+85
-85
lines changed

dfsclient/src/main/java/org/hpccsystems/dfs/client/BinaryRecordReader.java

Lines changed: 57 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@ public class BinaryRecordReader implements IRecordReader
159159
private static final int BUFFER_GROW_SIZE = 8192;
160160
private static final int OPTIMIZED_STRING_READ_AHEAD = 32;
161161

162+
// Max java UTF16 string length
163+
private static final int MAX_STRING_LENGTH = 1073741823;
164+
162165
// DO NOT CHANGE THESE VALUES. HERE FOR CODE READABILITY ONLY
163166
private static final int QSTR_COMPRESSED_CHUNK_LEN = 3;
164167
private static final int QSTR_EXPANDED_CHUNK_LEN = 4;
@@ -483,6 +486,11 @@ private Object parseFlatField(FieldDef fd, boolean isLittleEndian) throws Unpars
483486
codePoints = ((int) getInt(4, isLittleEndian));
484487
}
485488

489+
if (codePoints > MAX_STRING_LENGTH)
490+
{
491+
throw new UnparsableContentException("String length exceeds maximum supported length: " + MAX_STRING_LENGTH);
492+
}
493+
486494
fieldValue = getString(fd.getSourceType(), codePoints, shouldTrim);
487495
break;
488496
}
@@ -1033,33 +1041,43 @@ private String getNullTerminatedString(HpccSrcType stype, boolean shouldTrim) th
10331041
throw new IOException("Unsupported source type for null terminated string: " + stype);
10341042
}
10351043

1036-
// Note: separate for loops because consuming 2 bytes at a
1037-
// time makes null check easier. Do not have to check for alignment etc
1044+
// Read OPTIMIZED_STRING_READ_AHEAD bytes at a time until we find the end of the string
10381045
int eosLocation = -1;
10391046
int strByteLen = 0;
1040-
if (stype.isUTF16())
1047+
while (eosLocation < 0)
10411048
{
1042-
while (eosLocation < 0)
1049+
int readSize = 0;
1050+
try
10431051
{
1044-
int readSize = 0;
1045-
try
1046-
{
1047-
readSize = this.inputStream.available();
1048-
}
1049-
catch(Exception e)
1050-
{
1051-
throw new IOException("Error, unexpected EOS while constructing UTF16 string.");
1052-
}
1052+
readSize = this.inputStream.available();
1053+
}
1054+
catch(Exception e)
1055+
{
1056+
throw new IOException("Error, unexpected EOS while constructing UTF16 string.");
1057+
}
10531058

1059+
// Always read an even number of bytes for UTF16
1060+
if (stype.isUTF16()) {
10541061
readSize = ((readSize + 1) / 2) * 2;
1055-
if (readSize > OPTIMIZED_STRING_READ_AHEAD)
1056-
{
1057-
readSize = OPTIMIZED_STRING_READ_AHEAD;
1058-
}
1062+
}
10591063

1060-
this.inputStream.mark(readSize);
1061-
readIntoScratchBuffer(strByteLen, readSize);
1064+
if (readSize > OPTIMIZED_STRING_READ_AHEAD)
1065+
{
1066+
readSize = OPTIMIZED_STRING_READ_AHEAD;
1067+
}
10621068

1069+
if ((strByteLen + readSize) > MAX_STRING_LENGTH)
1070+
{
1071+
throw new IOException("Error, string length exceeds maximum supported length: " + MAX_STRING_LENGTH);
1072+
}
1073+
1074+
this.inputStream.mark(OPTIMIZED_STRING_READ_AHEAD);
1075+
readIntoScratchBuffer(strByteLen, readSize);
1076+
1077+
// Note: separate for loops because consuming 2 bytes at a
1078+
// time makes null check easier. Do not have to check for alignment etc
1079+
if (stype.isUTF16())
1080+
{
10631081
for (int j = 0; j < readSize-1; j += 2)
10641082
{
10651083
if (scratchBuffer[strByteLen + j] == '\0' && scratchBuffer[strByteLen + j + 1] == '\0')
@@ -1068,46 +1086,9 @@ private String getNullTerminatedString(HpccSrcType stype, boolean shouldTrim) th
10681086
break;
10691087
}
10701088
}
1071-
1072-
if (eosLocation != -1)
1073-
{
1074-
strByteLen += eosLocation;
1075-
1076-
// Reset back to our mark and the skip forward so we don't consume bytes
1077-
// passed the end of the string
1078-
this.inputStream.reset();
1079-
this.inputStream.skip(eosLocation + 2);
1080-
1081-
break;
1082-
}
1083-
else
1084-
{
1085-
strByteLen += readSize;
1086-
}
10871089
}
1088-
}
1089-
else
1090-
{
1091-
while (eosLocation < 0)
1090+
else
10921091
{
1093-
int readSize = 0;
1094-
try
1095-
{
1096-
readSize = this.inputStream.available();
1097-
}
1098-
catch(IOException e)
1099-
{
1100-
throw new IOException("Error, encountered EOS while constructing var string.");
1101-
}
1102-
1103-
if (readSize > OPTIMIZED_STRING_READ_AHEAD)
1104-
{
1105-
readSize = OPTIMIZED_STRING_READ_AHEAD;
1106-
}
1107-
1108-
this.inputStream.mark(readSize);
1109-
readIntoScratchBuffer(strByteLen, readSize);
1110-
11111092
for (int j = 0; j < readSize; j++)
11121093
{
11131094
if (scratchBuffer[strByteLen + j] == '\0')
@@ -1116,22 +1097,30 @@ private String getNullTerminatedString(HpccSrcType stype, boolean shouldTrim) th
11161097
break;
11171098
}
11181099
}
1100+
}
11191101

1120-
if (eosLocation != -1)
1121-
{
1122-
strByteLen += eosLocation;
1102+
if (eosLocation != -1)
1103+
{
1104+
strByteLen += eosLocation;
11231105

1124-
// Reset back to our mark and the skip forward so we don't consume bytes
1125-
// passed the end of the string
1126-
this.inputStream.reset();
1127-
this.inputStream.skip(eosLocation + 1);
1106+
// Reset back to our mark and the skip forward so we don't consume bytes
1107+
// passed the end of the string
1108+
this.inputStream.reset();
11281109

1129-
break;
1110+
if (stype.isUTF16())
1111+
{
1112+
this.inputStream.skip(eosLocation + 2);
11301113
}
11311114
else
11321115
{
1133-
strByteLen += readSize;
1116+
this.inputStream.skip(eosLocation + 1);
11341117
}
1118+
1119+
break;
1120+
}
1121+
else
1122+
{
1123+
strByteLen += readSize;
11351124
}
11361125
}
11371126

@@ -1264,26 +1253,10 @@ else if ((this.scratchBuffer[strByteLen + bytesScanned] & 0xF8) == 0xF0)
12641253
// Use the second half of the remaining buffer space as a temp place to read in compressed bytes.
12651254
// Beginning of the buffer will be used to construct the string
12661255

1267-
int bytesToRead = compressedLen;
1268-
int availableBytes = 0;
1269-
try
1270-
{
1271-
availableBytes = this.inputStream.available();
1272-
}
1273-
catch(Exception e)
1274-
{
1275-
throw new IOException("Error, unexpected EOS while constructing QString.");
1276-
}
1277-
1278-
if (bytesToRead > availableBytes)
1279-
{
1280-
bytesToRead = availableBytes;
1281-
}
1282-
12831256
// Scratch buffer is divided into two parts. First expandedLen bytes are for the final expanded string
12841257
// Remaining bytes are for reading in the compressed string.
12851258
int readPos = expandedLen + compressedBytesConsumed;
1286-
readIntoScratchBuffer(readPos, bytesToRead);
1259+
readIntoScratchBuffer(readPos, compressedLen);
12871260

12881261
// We want to consume only a whole chunk so round off residual chars
12891262
// Below we will handle any residual bytes. (strLen % 4)
@@ -1304,7 +1277,7 @@ else if ((this.scratchBuffer[strByteLen + bytesScanned] & 0xF8) == 0xF0)
13041277
compressedBytesConsumed += QSTR_COMPRESSED_CHUNK_LEN;
13051278
}
13061279

1307-
compressedBytesRead += bytesToRead;
1280+
compressedBytesRead += compressedLen;
13081281
strByteLen += writePos;
13091282
}
13101283

dfsclient/src/test/java/org/hpccsystems/dfs/client/DFSReadWriteTest.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
import java.nio.file.Paths;
2525
import java.nio.file.Path;
2626
import java.nio.file.Files;
27-
27+
import java.io.ByteArrayInputStream;
28+
import java.io.ByteArrayOutputStream;
2829
import java.math.BigDecimal;
2930
import java.math.BigInteger;
3031
import java.security.SecureRandom;
@@ -176,6 +177,32 @@ public void nullCharTests() throws Exception
176177
}
177178
}
178179

180+
@Test
181+
public void longNullTerminatedStringTest() throws Exception
182+
{
183+
Object[] fields = new Object[1];
184+
fields[0] = generateRandomString(4096);
185+
FieldDef recordDef = new FieldDef("RootRecord", FieldType.RECORD, "rec", 4, false, false, HpccSrcType.LITTLE_ENDIAN, new FieldDef[] {
186+
new FieldDef("varstr", FieldType.VAR_STRING, "VARSTRING", 0, false, false, HpccSrcType.SINGLE_BYTE_CHAR, new FieldDef[0])
187+
});
188+
189+
HPCCRecord record = new HPCCRecord(fields, recordDef);
190+
191+
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
192+
BinaryRecordWriter writer = new BinaryRecordWriter(outStream);
193+
writer.initialize(new HPCCRecordAccessor(recordDef));
194+
195+
writer.writeRecord(record);
196+
writer.finalize();
197+
198+
ByteArrayInputStream inStream = new ByteArrayInputStream(outStream.toByteArray());
199+
BinaryRecordReader reader = new BinaryRecordReader(inStream);
200+
reader.initialize(new HPCCRecordBuilder(recordDef));
201+
202+
HPCCRecord readRecord = (HPCCRecord) reader.getNext();
203+
assertEquals(record, readRecord);
204+
}
205+
179206
@Test
180207
public void integrationReadWriteBackTest() throws Exception
181208
{

0 commit comments

Comments
 (0)