Skip to content

Commit

Permalink
HPCC4J-693 DFSClient: Issue reading long null terminated strings
Browse files Browse the repository at this point in the history
- Fixed potential issue where reading long null terminated strings can cause misalignment within the read stream
- Fixed an issue with QStrings where buffer stalls during reading can cause misalignments
- Cleaned up null terminated string reading
- Added check for max string length

Signed-off-by: James McMullan [email protected]
  • Loading branch information
jpmcmu committed Feb 19, 2025
1 parent 0e9372f commit 5e936f0
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 85 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ public class BinaryRecordReader implements IRecordReader
private static final int BUFFER_GROW_SIZE = 8192;
private static final int OPTIMIZED_STRING_READ_AHEAD = 32;

// Max java UTF16 string length
private static final int MAX_STRING_LENGTH = 1073741823;

// DO NOT CHANGE THESE VALUES. HERE FOR CODE READABILITY ONLY
private static final int QSTR_COMPRESSED_CHUNK_LEN = 3;
private static final int QSTR_EXPANDED_CHUNK_LEN = 4;
Expand Down Expand Up @@ -483,6 +486,11 @@ private Object parseFlatField(FieldDef fd, boolean isLittleEndian) throws Unpars
codePoints = ((int) getInt(4, isLittleEndian));
}

if (codePoints > MAX_STRING_LENGTH)
{
throw new UnparsableContentException("String length exceeds maximum supported length: " + MAX_STRING_LENGTH);
}

fieldValue = getString(fd.getSourceType(), codePoints, shouldTrim);
break;
}
Expand Down Expand Up @@ -1033,33 +1041,43 @@ private String getNullTerminatedString(HpccSrcType stype, boolean shouldTrim) th
throw new IOException("Unsupported source type for null terminated string: " + stype);
}

// Note: separate for loops because consuming 2 bytes at a
// time makes null check easier. Do not have to check for alignment etc
// Read OPTIMIZED_STRING_READ_AHEAD bytes at a time until we find the end of the string
int eosLocation = -1;
int strByteLen = 0;
if (stype.isUTF16())
while (eosLocation < 0)
{
while (eosLocation < 0)
int readSize = 0;
try
{
int readSize = 0;
try
{
readSize = this.inputStream.available();
}
catch(Exception e)
{
throw new IOException("Error, unexpected EOS while constructing UTF16 string.");
}
readSize = this.inputStream.available();
}
catch(Exception e)
{
throw new IOException("Error, unexpected EOS while constructing UTF16 string.");
}

// Always read an even number of bytes for UTF16
if (stype.isUTF16()) {
readSize = ((readSize + 1) / 2) * 2;
if (readSize > OPTIMIZED_STRING_READ_AHEAD)
{
readSize = OPTIMIZED_STRING_READ_AHEAD;
}
}

this.inputStream.mark(readSize);
readIntoScratchBuffer(strByteLen, readSize);
if (readSize > OPTIMIZED_STRING_READ_AHEAD)
{
readSize = OPTIMIZED_STRING_READ_AHEAD;
}

if ((strByteLen + readSize) > MAX_STRING_LENGTH)
{
throw new IOException("Error, string length exceeds maximum supported length: " + MAX_STRING_LENGTH);
}

this.inputStream.mark(OPTIMIZED_STRING_READ_AHEAD);
readIntoScratchBuffer(strByteLen, readSize);

// Note: separate for loops because consuming 2 bytes at a
// time makes null check easier. Do not have to check for alignment etc
if (stype.isUTF16())
{
for (int j = 0; j < readSize-1; j += 2)
{
if (scratchBuffer[strByteLen + j] == '\0' && scratchBuffer[strByteLen + j + 1] == '\0')
Expand All @@ -1068,46 +1086,9 @@ private String getNullTerminatedString(HpccSrcType stype, boolean shouldTrim) th
break;
}
}

if (eosLocation != -1)
{
strByteLen += eosLocation;

// Reset back to our mark and the skip forward so we don't consume bytes
// passed the end of the string
this.inputStream.reset();
this.inputStream.skip(eosLocation + 2);

break;
}
else
{
strByteLen += readSize;
}
}
}
else
{
while (eosLocation < 0)
else
{
int readSize = 0;
try
{
readSize = this.inputStream.available();
}
catch(IOException e)
{
throw new IOException("Error, encountered EOS while constructing var string.");
}

if (readSize > OPTIMIZED_STRING_READ_AHEAD)
{
readSize = OPTIMIZED_STRING_READ_AHEAD;
}

this.inputStream.mark(readSize);
readIntoScratchBuffer(strByteLen, readSize);

for (int j = 0; j < readSize; j++)
{
if (scratchBuffer[strByteLen + j] == '\0')
Expand All @@ -1116,22 +1097,30 @@ private String getNullTerminatedString(HpccSrcType stype, boolean shouldTrim) th
break;
}
}
}

if (eosLocation != -1)
{
strByteLen += eosLocation;
if (eosLocation != -1)
{
strByteLen += eosLocation;

// Reset back to our mark and the skip forward so we don't consume bytes
// passed the end of the string
this.inputStream.reset();
this.inputStream.skip(eosLocation + 1);
// Reset back to our mark and the skip forward so we don't consume bytes
// passed the end of the string
this.inputStream.reset();

break;
if (stype.isUTF16())
{
this.inputStream.skip(eosLocation + 2);
}
else
{
strByteLen += readSize;
this.inputStream.skip(eosLocation + 1);
}

break;
}
else
{
strByteLen += readSize;
}
}

Expand Down Expand Up @@ -1264,26 +1253,10 @@ else if ((this.scratchBuffer[strByteLen + bytesScanned] & 0xF8) == 0xF0)
// Use the second half of the remaining buffer space as a temp place to read in compressed bytes.
// Beginning of the buffer will be used to construct the string

int bytesToRead = compressedLen;
int availableBytes = 0;
try
{
availableBytes = this.inputStream.available();
}
catch(Exception e)
{
throw new IOException("Error, unexpected EOS while constructing QString.");
}

if (bytesToRead > availableBytes)
{
bytesToRead = availableBytes;
}

// Scratch buffer is divided into two parts. First expandedLen bytes are for the final expanded string
// Remaining bytes are for reading in the compressed string.
int readPos = expandedLen + compressedBytesConsumed;
readIntoScratchBuffer(readPos, bytesToRead);
readIntoScratchBuffer(readPos, compressedLen);

// We want to consume only a whole chunk so round off residual chars
// Below we will handle any residual bytes. (strLen % 4)
Expand All @@ -1304,7 +1277,7 @@ else if ((this.scratchBuffer[strByteLen + bytesScanned] & 0xF8) == 0xF0)
compressedBytesConsumed += QSTR_COMPRESSED_CHUNK_LEN;
}

compressedBytesRead += bytesToRead;
compressedBytesRead += compressedLen;
strByteLen += writePos;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
import java.nio.file.Paths;
import java.nio.file.Path;
import java.nio.file.Files;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.security.SecureRandom;
Expand Down Expand Up @@ -176,6 +177,32 @@ public void nullCharTests() throws Exception
}
}

@Test
public void longNullTerminatedStringTest() throws Exception
{
Object[] fields = new Object[1];
fields[0] = generateRandomString(4096);
FieldDef recordDef = new FieldDef("RootRecord", FieldType.RECORD, "rec", 4, false, false, HpccSrcType.LITTLE_ENDIAN, new FieldDef[] {
new FieldDef("varstr", FieldType.VAR_STRING, "VARSTRING", 0, false, false, HpccSrcType.SINGLE_BYTE_CHAR, new FieldDef[0])
});

HPCCRecord record = new HPCCRecord(fields, recordDef);

ByteArrayOutputStream outStream = new ByteArrayOutputStream();
BinaryRecordWriter writer = new BinaryRecordWriter(outStream);
writer.initialize(new HPCCRecordAccessor(recordDef));

writer.writeRecord(record);
writer.finalize();

ByteArrayInputStream inStream = new ByteArrayInputStream(outStream.toByteArray());
BinaryRecordReader reader = new BinaryRecordReader(inStream);
reader.initialize(new HPCCRecordBuilder(recordDef));

HPCCRecord readRecord = (HPCCRecord) reader.getNext();
assertEquals(record, readRecord);
}

@Test
public void integrationReadWriteBackTest() throws Exception
{
Expand Down

0 comments on commit 5e936f0

Please sign in to comment.