@@ -65,7 +65,7 @@ fn read_buffer(
65
65
( false , Some ( decompressor) ) => decompressor. decompress_to_buffer ( & buf_data) ,
66
66
}
67
67
}
68
- impl ArrayReader < ' _ > {
68
+ impl RecordBatchDecoder < ' _ > {
69
69
/// Coordinates reading arrays based on data types.
70
70
///
71
71
/// `variadic_counts` encodes the number of buffers to read for variadic types (e.g., Utf8View, BinaryView)
@@ -83,18 +83,17 @@ impl ArrayReader<'_> {
83
83
field : & Field ,
84
84
variadic_counts : & mut VecDeque < i64 > ,
85
85
) -> Result < ArrayRef , ArrowError > {
86
- let reader = self ;
87
86
let data_type = field. data_type ( ) ;
88
87
match data_type {
89
88
Utf8 | Binary | LargeBinary | LargeUtf8 => create_primitive_array (
90
- reader . next_node ( field) ?,
89
+ self . next_node ( field) ?,
91
90
data_type,
92
91
& [
93
- reader . next_buffer ( ) ?,
94
- reader . next_buffer ( ) ?,
95
- reader . next_buffer ( ) ?,
92
+ self . next_buffer ( ) ?,
93
+ self . next_buffer ( ) ?,
94
+ self . next_buffer ( ) ?,
96
95
] ,
97
- reader . require_alignment ,
96
+ self . require_alignment ,
98
97
) ,
99
98
BinaryView | Utf8View => {
100
99
let count = variadic_counts
@@ -104,55 +103,55 @@ impl ArrayReader<'_> {
104
103
) ) ) ?;
105
104
let count = count + 2 ; // view and null buffer.
106
105
let buffers = ( 0 ..count)
107
- . map ( |_| reader . next_buffer ( ) )
106
+ . map ( |_| self . next_buffer ( ) )
108
107
. collect :: < Result < Vec < _ > , _ > > ( ) ?;
109
108
create_primitive_array (
110
- reader . next_node ( field) ?,
109
+ self . next_node ( field) ?,
111
110
data_type,
112
111
& buffers,
113
- reader . require_alignment ,
112
+ self . require_alignment ,
114
113
)
115
114
}
116
115
FixedSizeBinary ( _) => create_primitive_array (
117
- reader . next_node ( field) ?,
116
+ self . next_node ( field) ?,
118
117
data_type,
119
- & [ reader . next_buffer ( ) ?, reader . next_buffer ( ) ?] ,
120
- reader . require_alignment ,
118
+ & [ self . next_buffer ( ) ?, self . next_buffer ( ) ?] ,
119
+ self . require_alignment ,
121
120
) ,
122
121
List ( ref list_field) | LargeList ( ref list_field) | Map ( ref list_field, _) => {
123
- let list_node = reader . next_node ( field) ?;
124
- let list_buffers = [ reader . next_buffer ( ) ?, reader . next_buffer ( ) ?] ;
125
- let values = reader . create_array ( list_field, variadic_counts) ?;
122
+ let list_node = self . next_node ( field) ?;
123
+ let list_buffers = [ self . next_buffer ( ) ?, self . next_buffer ( ) ?] ;
124
+ let values = self . create_array ( list_field, variadic_counts) ?;
126
125
create_list_array (
127
126
list_node,
128
127
data_type,
129
128
& list_buffers,
130
129
values,
131
- reader . require_alignment ,
130
+ self . require_alignment ,
132
131
)
133
132
}
134
133
FixedSizeList ( ref list_field, _) => {
135
- let list_node = reader . next_node ( field) ?;
136
- let list_buffers = [ reader . next_buffer ( ) ?] ;
137
- let values = reader . create_array ( list_field, variadic_counts) ?;
134
+ let list_node = self . next_node ( field) ?;
135
+ let list_buffers = [ self . next_buffer ( ) ?] ;
136
+ let values = self . create_array ( list_field, variadic_counts) ?;
138
137
create_list_array (
139
138
list_node,
140
139
data_type,
141
140
& list_buffers,
142
141
values,
143
- reader . require_alignment ,
142
+ self . require_alignment ,
144
143
)
145
144
}
146
145
Struct ( struct_fields) => {
147
- let struct_node = reader . next_node ( field) ?;
148
- let null_buffer = reader . next_buffer ( ) ?;
146
+ let struct_node = self . next_node ( field) ?;
147
+ let null_buffer = self . next_buffer ( ) ?;
149
148
150
149
// read the arrays for each field
151
150
let mut struct_arrays = vec ! [ ] ;
152
151
// TODO investigate whether just knowing the number of buffers could
153
152
// still work
154
153
for struct_field in struct_fields {
155
- let child = reader . create_array ( struct_field, variadic_counts) ?;
154
+ let child = self . create_array ( struct_field, variadic_counts) ?;
156
155
struct_arrays. push ( child) ;
157
156
}
158
157
let null_count = struct_node. null_count ( ) as usize ;
@@ -175,32 +174,32 @@ impl ArrayReader<'_> {
175
174
Ok ( Arc :: new ( struct_array) )
176
175
}
177
176
RunEndEncoded ( run_ends_field, values_field) => {
178
- let run_node = reader . next_node ( field) ?;
179
- let run_ends = reader . create_array ( run_ends_field, variadic_counts) ?;
180
- let values = reader . create_array ( values_field, variadic_counts) ?;
177
+ let run_node = self . next_node ( field) ?;
178
+ let run_ends = self . create_array ( run_ends_field, variadic_counts) ?;
179
+ let values = self . create_array ( values_field, variadic_counts) ?;
181
180
182
181
let run_array_length = run_node. length ( ) as usize ;
183
182
let array_data = ArrayData :: builder ( data_type. clone ( ) )
184
183
. len ( run_array_length)
185
184
. offset ( 0 )
186
185
. add_child_data ( run_ends. into_data ( ) )
187
186
. add_child_data ( values. into_data ( ) )
188
- . align_buffers ( !reader . require_alignment )
187
+ . align_buffers ( !self . require_alignment )
189
188
. build ( ) ?;
190
189
191
190
Ok ( make_array ( array_data) )
192
191
}
193
192
// Create dictionary array from RecordBatch
194
193
Dictionary ( _, _) => {
195
- let index_node = reader . next_node ( field) ?;
196
- let index_buffers = [ reader . next_buffer ( ) ?, reader . next_buffer ( ) ?] ;
194
+ let index_node = self . next_node ( field) ?;
195
+ let index_buffers = [ self . next_buffer ( ) ?, self . next_buffer ( ) ?] ;
197
196
198
197
#[ allow( deprecated) ]
199
198
let dict_id = field. dict_id ( ) . ok_or_else ( || {
200
199
ArrowError :: ParseError ( format ! ( "Field {field} does not have dict id" ) )
201
200
} ) ?;
202
201
203
- let value_array = reader . dictionaries_by_id . get ( & dict_id) . ok_or_else ( || {
202
+ let value_array = self . dictionaries_by_id . get ( & dict_id) . ok_or_else ( || {
204
203
ArrowError :: ParseError ( format ! (
205
204
"Cannot find a dictionary batch with dict id: {dict_id}"
206
205
) )
@@ -211,26 +210,26 @@ impl ArrayReader<'_> {
211
210
data_type,
212
211
& index_buffers,
213
212
value_array. clone ( ) ,
214
- reader . require_alignment ,
213
+ self . require_alignment ,
215
214
)
216
215
}
217
216
Union ( fields, mode) => {
218
- let union_node = reader . next_node ( field) ?;
217
+ let union_node = self . next_node ( field) ?;
219
218
let len = union_node. length ( ) as usize ;
220
219
221
220
// In V4, union types has validity bitmap
222
221
// In V5 and later, union types have no validity bitmap
223
- if reader . version < MetadataVersion :: V5 {
224
- reader . next_buffer ( ) ?;
222
+ if self . version < MetadataVersion :: V5 {
223
+ self . next_buffer ( ) ?;
225
224
}
226
225
227
226
let type_ids: ScalarBuffer < i8 > =
228
- reader . next_buffer ( ) ?. slice_with_length ( 0 , len) . into ( ) ;
227
+ self . next_buffer ( ) ?. slice_with_length ( 0 , len) . into ( ) ;
229
228
230
229
let value_offsets = match mode {
231
230
UnionMode :: Dense => {
232
231
let offsets: ScalarBuffer < i32 > =
233
- reader . next_buffer ( ) ?. slice_with_length ( 0 , len * 4 ) . into ( ) ;
232
+ self . next_buffer ( ) ?. slice_with_length ( 0 , len * 4 ) . into ( ) ;
234
233
Some ( offsets)
235
234
}
236
235
UnionMode :: Sparse => None ,
@@ -239,15 +238,15 @@ impl ArrayReader<'_> {
239
238
let mut children = Vec :: with_capacity ( fields. len ( ) ) ;
240
239
241
240
for ( _id, field) in fields. iter ( ) {
242
- let child = reader . create_array ( field, variadic_counts) ?;
241
+ let child = self . create_array ( field, variadic_counts) ?;
243
242
children. push ( child) ;
244
243
}
245
244
246
245
let array = UnionArray :: try_new ( fields. clone ( ) , type_ids, value_offsets, children) ?;
247
246
Ok ( Arc :: new ( array) )
248
247
}
249
248
Null => {
250
- let node = reader . next_node ( field) ?;
249
+ let node = self . next_node ( field) ?;
251
250
let length = node. length ( ) ;
252
251
let null_count = node. null_count ( ) ;
253
252
@@ -260,17 +259,17 @@ impl ArrayReader<'_> {
260
259
let array_data = ArrayData :: builder ( data_type. clone ( ) )
261
260
. len ( length as usize )
262
261
. offset ( 0 )
263
- . align_buffers ( !reader . require_alignment )
262
+ . align_buffers ( !self . require_alignment )
264
263
. build ( ) ?;
265
264
266
265
// no buffer increases
267
266
Ok ( Arc :: new ( NullArray :: from ( array_data) ) )
268
267
}
269
268
_ => create_primitive_array (
270
- reader . next_node ( field) ?,
269
+ self . next_node ( field) ?,
271
270
data_type,
272
- & [ reader . next_buffer ( ) ?, reader . next_buffer ( ) ?] ,
273
- reader . require_alignment ,
271
+ & [ self . next_buffer ( ) ?, self . next_buffer ( ) ?] ,
272
+ self . require_alignment ,
274
273
) ,
275
274
}
276
275
}
@@ -370,8 +369,11 @@ fn create_dictionary_array(
370
369
}
371
370
}
372
371
373
- /// State for decoding arrays from an encoded [`RecordBatch`]
374
- struct ArrayReader < ' a > {
372
+ /// State for decoding Arrow arrays from an [IPC RecordBatch] structure to
373
+ /// [`RecordBatch`]
374
+ ///
375
+ /// [IPC RecordBatch]: crate::RecordBatch
376
+ struct RecordBatchDecoder < ' a > {
375
377
/// The flatbuffers encoded record batch
376
378
batch : crate :: RecordBatch < ' a > ,
377
379
/// The output schema
@@ -389,14 +391,14 @@ struct ArrayReader<'a> {
389
391
/// The buffers comprising this array
390
392
buffers : VectorIter < ' a , crate :: Buffer > ,
391
393
/// Projection (subset of columns) to read, if any
392
- /// See [`ArrayReader ::with_projection`] for details
394
+ /// See [`RecordBatchDecoder ::with_projection`] for details
393
395
projection : Option < & ' a [ usize ] > ,
394
396
/// Are buffers required to already be aligned? See
395
- /// [`ArrayReader ::with_require_alignment`] for details
397
+ /// [`RecordBatchDecoder ::with_require_alignment`] for details
396
398
require_alignment : bool ,
397
399
}
398
400
399
- impl < ' a > ArrayReader < ' a > {
401
+ impl < ' a > RecordBatchDecoder < ' a > {
400
402
/// Create a reader for decoding arrays from an encoded [`RecordBatch`]
401
403
fn try_new (
402
404
buf : & ' a Buffer ,
@@ -604,7 +606,7 @@ pub fn read_record_batch(
604
606
projection : Option < & [ usize ] > ,
605
607
metadata : & MetadataVersion ,
606
608
) -> Result < RecordBatch , ArrowError > {
607
- ArrayReader :: try_new ( buf, batch, schema, dictionaries_by_id, metadata) ?
609
+ RecordBatchDecoder :: try_new ( buf, batch, schema, dictionaries_by_id, metadata) ?
608
610
. with_projection ( projection)
609
611
. with_require_alignment ( false )
610
612
. read_record_batch ( )
@@ -652,7 +654,7 @@ fn read_dictionary_impl(
652
654
let value = value_type. as_ref ( ) . clone ( ) ;
653
655
let schema = Schema :: new ( vec ! [ Field :: new( "" , value, true ) ] ) ;
654
656
// Read a single column
655
- let record_batch = ArrayReader :: try_new (
657
+ let record_batch = RecordBatchDecoder :: try_new (
656
658
buf,
657
659
batch. data ( ) . unwrap ( ) ,
658
660
Arc :: new ( schema) ,
@@ -876,7 +878,7 @@ impl FileDecoder {
876
878
ArrowError :: IpcError ( "Unable to read IPC message as record batch" . to_string ( ) )
877
879
} ) ?;
878
880
// read the block that makes up the record batch into a buffer
879
- ArrayReader :: try_new (
881
+ RecordBatchDecoder :: try_new (
880
882
& buf. slice ( block. metaDataLength ( ) as _ ) ,
881
883
batch,
882
884
self . schema . clone ( ) ,
@@ -1426,7 +1428,7 @@ impl<R: Read> StreamReader<R> {
1426
1428
let mut buf = MutableBuffer :: from_len_zeroed ( message. bodyLength ( ) as usize ) ;
1427
1429
self . reader . read_exact ( & mut buf) ?;
1428
1430
1429
- ArrayReader :: try_new (
1431
+ RecordBatchDecoder :: try_new (
1430
1432
& buf. into ( ) ,
1431
1433
batch,
1432
1434
self . schema ( ) ,
@@ -2277,7 +2279,7 @@ mod tests {
2277
2279
assert_ne ! ( b. as_ptr( ) . align_offset( 8 ) , 0 ) ;
2278
2280
2279
2281
let ipc_batch = message. header_as_record_batch ( ) . unwrap ( ) ;
2280
- let roundtrip = ArrayReader :: try_new (
2282
+ let roundtrip = RecordBatchDecoder :: try_new (
2281
2283
& b,
2282
2284
ipc_batch,
2283
2285
batch. schema ( ) ,
@@ -2316,7 +2318,7 @@ mod tests {
2316
2318
assert_ne ! ( b. as_ptr( ) . align_offset( 8 ) , 0 ) ;
2317
2319
2318
2320
let ipc_batch = message. header_as_record_batch ( ) . unwrap ( ) ;
2319
- let result = ArrayReader :: try_new (
2321
+ let result = RecordBatchDecoder :: try_new (
2320
2322
& b,
2321
2323
ipc_batch,
2322
2324
batch. schema ( ) ,
0 commit comments