@@ -193,9 +193,13 @@ impl TInputProtocol for TCompactSliceInputProtocol<'_> {
193
193
Some ( b) => Ok ( b) ,
194
194
None => {
195
195
let b = self . read_byte ( ) ?;
196
+ // Previous versions of the thrift specification said to use 0 and 1 inside collections,
197
+ // but that differed from existing implementations.
198
+ // The specification was updated in https://github.com/apache/thrift/commit/2c29c5665bc442e703480bb0ee60fe925ffe02e8.
199
+ // At least the go implementation seems to have followed the previously documented values.
196
200
match b {
197
201
0x01 => Ok ( true ) ,
198
- 0x02 => Ok ( false ) ,
202
+ 0x00 | 0x02 => Ok ( false ) ,
199
203
unkn => Err ( thrift:: Error :: Protocol ( thrift:: ProtocolError {
200
204
kind : thrift:: ProtocolErrorKind :: InvalidData ,
201
205
message : format ! ( "cannot convert {} into bool" , unkn) ,
@@ -274,7 +278,12 @@ impl TInputProtocol for TCompactSliceInputProtocol<'_> {
274
278
275
279
fn collection_u8_to_type ( b : u8 ) -> thrift:: Result < TType > {
276
280
match b {
277
- 0x01 => Ok ( TType :: Bool ) ,
281
+ // For historical and compatibility reasons, a reader should be capable to deal with both cases.
282
+ // The only valid value in the original spec was 2, but due to an widespread implementation bug
283
+ // the defacto standard across large parts of the library became 1 instead.
284
+ // As a result, both values are now allowed.
285
+ // https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set
286
+ 0x01 | 0x02 => Ok ( TType :: Bool ) ,
278
287
o => u8_to_type ( o) ,
279
288
}
280
289
}
@@ -305,3 +314,52 @@ fn eof_error() -> thrift::Error {
305
314
message : "Unexpected EOF" . to_string ( ) ,
306
315
} )
307
316
}
317
+
318
+ #[ cfg( test) ]
319
+ mod tests {
320
+ use crate :: format:: { BoundaryOrder , ColumnIndex } ;
321
+ use crate :: thrift:: { TCompactSliceInputProtocol , TSerializable } ;
322
+
323
+ #[ test]
324
+ pub fn read_boolean_list_field_type ( ) {
325
+ // Boolean collection type encoded as 0x01, as used by this crate when writing.
326
+ // Values encoded as 1 (true) or 2 (false) as in the current version of the thrift
327
+ // documentation.
328
+ let bytes = vec ! [ 0x19 , 0x21 , 2 , 1 , 0x19 , 8 , 0x19 , 8 , 0x15 , 0 , 0 ] ;
329
+
330
+ let mut protocol = TCompactSliceInputProtocol :: new ( bytes. as_slice ( ) ) ;
331
+ let index = ColumnIndex :: read_from_in_protocol ( & mut protocol) . unwrap ( ) ;
332
+ let expected = ColumnIndex {
333
+ null_pages : vec ! [ false , true ] ,
334
+ min_values : vec ! [ ] ,
335
+ max_values : vec ! [ ] ,
336
+ boundary_order : BoundaryOrder :: UNORDERED ,
337
+ null_counts : None ,
338
+ repetition_level_histograms : None ,
339
+ definition_level_histograms : None ,
340
+ } ;
341
+
342
+ assert_eq ! ( & index, & expected) ;
343
+ }
344
+
345
+ #[ test]
346
+ pub fn read_boolean_list_alternative_encoding ( ) {
347
+ // Boolean collection type encoded as 0x02, as allowed by the spec.
348
+ // Values encoded as 1 (true) or 0 (false) as before the thrift documentation change on 2024-12-13.
349
+ let bytes = vec ! [ 0x19 , 0x22 , 0 , 1 , 0x19 , 8 , 0x19 , 8 , 0x15 , 0 , 0 ] ;
350
+
351
+ let mut protocol = TCompactSliceInputProtocol :: new ( bytes. as_slice ( ) ) ;
352
+ let index = ColumnIndex :: read_from_in_protocol ( & mut protocol) . unwrap ( ) ;
353
+ let expected = ColumnIndex {
354
+ null_pages : vec ! [ false , true ] ,
355
+ min_values : vec ! [ ] ,
356
+ max_values : vec ! [ ] ,
357
+ boundary_order : BoundaryOrder :: UNORDERED ,
358
+ null_counts : None ,
359
+ repetition_level_histograms : None ,
360
+ definition_level_histograms : None ,
361
+ } ;
362
+
363
+ assert_eq ! ( & index, & expected) ;
364
+ }
365
+ }
0 commit comments