@@ -159,6 +159,9 @@ public class BinaryRecordReader implements IRecordReader
159
159
private static final int BUFFER_GROW_SIZE = 8192 ;
160
160
private static final int OPTIMIZED_STRING_READ_AHEAD = 32 ;
161
161
162
+ // Max java UTF16 string length
163
+ private static final int MAX_STRING_LENGTH = 1073741823 ;
164
+
162
165
// DO NOT CHANGE THESE VALUES. HERE FOR CODE READABILITY ONLY
163
166
private static final int QSTR_COMPRESSED_CHUNK_LEN = 3 ;
164
167
private static final int QSTR_EXPANDED_CHUNK_LEN = 4 ;
@@ -483,6 +486,11 @@ private Object parseFlatField(FieldDef fd, boolean isLittleEndian) throws Unpars
483
486
codePoints = ((int ) getInt (4 , isLittleEndian ));
484
487
}
485
488
489
+ if (codePoints > MAX_STRING_LENGTH )
490
+ {
491
+ throw new UnparsableContentException ("String length exceeds maximum supported length: " + MAX_STRING_LENGTH );
492
+ }
493
+
486
494
fieldValue = getString (fd .getSourceType (), codePoints , shouldTrim );
487
495
break ;
488
496
}
@@ -1033,33 +1041,43 @@ private String getNullTerminatedString(HpccSrcType stype, boolean shouldTrim) th
1033
1041
throw new IOException ("Unsupported source type for null terminated string: " + stype );
1034
1042
}
1035
1043
1036
- // Note: separate for loops because consuming 2 bytes at a
1037
- // time makes null check easier. Do not have to check for alignment etc
1044
+ // Read OPTIMIZED_STRING_READ_AHEAD bytes at a time until we find the end of the string
1038
1045
int eosLocation = -1 ;
1039
1046
int strByteLen = 0 ;
1040
- if ( stype . isUTF16 () )
1047
+ while ( eosLocation < 0 )
1041
1048
{
1042
- while (eosLocation < 0 )
1049
+ int readSize = 0 ;
1050
+ try
1043
1051
{
1044
- int readSize = 0 ;
1045
- try
1046
- {
1047
- readSize = this .inputStream .available ();
1048
- }
1049
- catch (Exception e )
1050
- {
1051
- throw new IOException ("Error, unexpected EOS while constructing UTF16 string." );
1052
- }
1052
+ readSize = this .inputStream .available ();
1053
+ }
1054
+ catch (Exception e )
1055
+ {
1056
+ throw new IOException ("Error, unexpected EOS while constructing UTF16 string." );
1057
+ }
1053
1058
1059
+ // Always read an even number of bytes for UTF16
1060
+ if (stype .isUTF16 ()) {
1054
1061
readSize = ((readSize + 1 ) / 2 ) * 2 ;
1055
- if (readSize > OPTIMIZED_STRING_READ_AHEAD )
1056
- {
1057
- readSize = OPTIMIZED_STRING_READ_AHEAD ;
1058
- }
1062
+ }
1059
1063
1060
- this .inputStream .mark (readSize );
1061
- readIntoScratchBuffer (strByteLen , readSize );
1064
+ if (readSize > OPTIMIZED_STRING_READ_AHEAD )
1065
+ {
1066
+ readSize = OPTIMIZED_STRING_READ_AHEAD ;
1067
+ }
1062
1068
1069
+ if ((strByteLen + readSize ) > MAX_STRING_LENGTH )
1070
+ {
1071
+ throw new IOException ("Error, string length exceeds maximum supported length: " + MAX_STRING_LENGTH );
1072
+ }
1073
+
1074
+ this .inputStream .mark (OPTIMIZED_STRING_READ_AHEAD );
1075
+ readIntoScratchBuffer (strByteLen , readSize );
1076
+
1077
+ // Note: separate for loops because consuming 2 bytes at a
1078
+ // time makes null check easier. Do not have to check for alignment etc
1079
+ if (stype .isUTF16 ())
1080
+ {
1063
1081
for (int j = 0 ; j < readSize -1 ; j += 2 )
1064
1082
{
1065
1083
if (scratchBuffer [strByteLen + j ] == '\0' && scratchBuffer [strByteLen + j + 1 ] == '\0' )
@@ -1068,46 +1086,9 @@ private String getNullTerminatedString(HpccSrcType stype, boolean shouldTrim) th
1068
1086
break ;
1069
1087
}
1070
1088
}
1071
-
1072
- if (eosLocation != -1 )
1073
- {
1074
- strByteLen += eosLocation ;
1075
-
1076
- // Reset back to our mark and the skip forward so we don't consume bytes
1077
- // passed the end of the string
1078
- this .inputStream .reset ();
1079
- this .inputStream .skip (eosLocation + 2 );
1080
-
1081
- break ;
1082
- }
1083
- else
1084
- {
1085
- strByteLen += readSize ;
1086
- }
1087
1089
}
1088
- }
1089
- else
1090
- {
1091
- while (eosLocation < 0 )
1090
+ else
1092
1091
{
1093
- int readSize = 0 ;
1094
- try
1095
- {
1096
- readSize = this .inputStream .available ();
1097
- }
1098
- catch (IOException e )
1099
- {
1100
- throw new IOException ("Error, encountered EOS while constructing var string." );
1101
- }
1102
-
1103
- if (readSize > OPTIMIZED_STRING_READ_AHEAD )
1104
- {
1105
- readSize = OPTIMIZED_STRING_READ_AHEAD ;
1106
- }
1107
-
1108
- this .inputStream .mark (readSize );
1109
- readIntoScratchBuffer (strByteLen , readSize );
1110
-
1111
1092
for (int j = 0 ; j < readSize ; j ++)
1112
1093
{
1113
1094
if (scratchBuffer [strByteLen + j ] == '\0' )
@@ -1116,22 +1097,30 @@ private String getNullTerminatedString(HpccSrcType stype, boolean shouldTrim) th
1116
1097
break ;
1117
1098
}
1118
1099
}
1100
+ }
1119
1101
1120
- if (eosLocation != -1 )
1121
- {
1122
- strByteLen += eosLocation ;
1102
+ if (eosLocation != -1 )
1103
+ {
1104
+ strByteLen += eosLocation ;
1123
1105
1124
- // Reset back to our mark and the skip forward so we don't consume bytes
1125
- // passed the end of the string
1126
- this .inputStream .reset ();
1127
- this .inputStream .skip (eosLocation + 1 );
1106
+ // Reset back to our mark and the skip forward so we don't consume bytes
1107
+ // passed the end of the string
1108
+ this .inputStream .reset ();
1128
1109
1129
- break ;
1110
+ if (stype .isUTF16 ())
1111
+ {
1112
+ this .inputStream .skip (eosLocation + 2 );
1130
1113
}
1131
1114
else
1132
1115
{
1133
- strByteLen += readSize ;
1116
+ this . inputStream . skip ( eosLocation + 1 ) ;
1134
1117
}
1118
+
1119
+ break ;
1120
+ }
1121
+ else
1122
+ {
1123
+ strByteLen += readSize ;
1135
1124
}
1136
1125
}
1137
1126
@@ -1264,26 +1253,10 @@ else if ((this.scratchBuffer[strByteLen + bytesScanned] & 0xF8) == 0xF0)
1264
1253
// Use the second half of the remaining buffer space as a temp place to read in compressed bytes.
1265
1254
// Beginning of the buffer will be used to construct the string
1266
1255
1267
- int bytesToRead = compressedLen ;
1268
- int availableBytes = 0 ;
1269
- try
1270
- {
1271
- availableBytes = this .inputStream .available ();
1272
- }
1273
- catch (Exception e )
1274
- {
1275
- throw new IOException ("Error, unexpected EOS while constructing QString." );
1276
- }
1277
-
1278
- if (bytesToRead > availableBytes )
1279
- {
1280
- bytesToRead = availableBytes ;
1281
- }
1282
-
1283
1256
// Scratch buffer is divided into two parts. First expandedLen bytes are for the final expanded string
1284
1257
// Remaining bytes are for reading in the compressed string.
1285
1258
int readPos = expandedLen + compressedBytesConsumed ;
1286
- readIntoScratchBuffer (readPos , bytesToRead );
1259
+ readIntoScratchBuffer (readPos , compressedLen );
1287
1260
1288
1261
// We want to consume only a whole chunk so round off residual chars
1289
1262
// Below we will handle any residual bytes. (strLen % 4)
@@ -1304,7 +1277,7 @@ else if ((this.scratchBuffer[strByteLen + bytesScanned] & 0xF8) == 0xF0)
1304
1277
compressedBytesConsumed += QSTR_COMPRESSED_CHUNK_LEN ;
1305
1278
}
1306
1279
1307
- compressedBytesRead += bytesToRead ;
1280
+ compressedBytesRead += compressedLen ;
1308
1281
strByteLen += writePos ;
1309
1282
}
1310
1283
0 commit comments