@@ -39,6 +39,7 @@ use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask, PARQUET_FI
39
39
use parquet:: file:: metadata:: { ParquetMetaData , ParquetMetaDataReader } ;
40
40
use parquet:: schema:: types:: { SchemaDescriptor , Type as ParquetType } ;
41
41
42
+ use crate :: arrow:: delete_file_manager:: DeleteFileManager ;
42
43
use crate :: arrow:: record_batch_transformer:: RecordBatchTransformer ;
43
44
use crate :: arrow:: { arrow_schema_to_schema, get_arrow_datum} ;
44
45
use crate :: error:: Result ;
@@ -145,6 +146,7 @@ impl ArrowReader {
145
146
file_io,
146
147
row_group_filtering_enabled,
147
148
row_selection_enabled,
149
+ concurrency_limit_data_files,
148
150
)
149
151
} )
150
152
. map_err ( |err| {
@@ -162,30 +164,24 @@ impl ArrowReader {
162
164
file_io : FileIO ,
163
165
row_group_filtering_enabled : bool ,
164
166
row_selection_enabled : bool ,
167
+ concurrency_limit_data_files : usize ,
165
168
) -> Result < ArrowRecordBatchStream > {
166
- // TODO: add support for delete files
167
- if !task. deletes . is_empty ( ) {
168
- return Err ( Error :: new (
169
- ErrorKind :: FeatureUnsupported ,
170
- "Delete files are not yet supported" ,
171
- ) ) ;
172
- }
173
-
174
- // Get the metadata for the Parquet file we need to read and build
175
- // a reader for the data within
176
- let parquet_file = file_io. new_input ( & task. data_file_path ) ?;
177
- let ( parquet_metadata, parquet_reader) =
178
- try_join ! ( parquet_file. metadata( ) , parquet_file. reader( ) ) ?;
179
- let parquet_file_reader = ArrowFileReader :: new ( parquet_metadata, parquet_reader) ;
180
-
181
- let should_load_page_index = row_selection_enabled && task. predicate . is_some ( ) ;
182
-
183
- // Start creating the record batch stream, which wraps the parquet file reader
184
- let mut record_batch_stream_builder = ParquetRecordBatchStreamBuilder :: new_with_options (
185
- parquet_file_reader,
186
- ArrowReaderOptions :: new ( ) . with_page_index ( should_load_page_index) ,
187
- )
188
- . await ?;
169
+ let should_load_page_index =
170
+ ( row_selection_enabled && task. predicate . is_some ( ) ) || !task. deletes . is_empty ( ) ;
171
+
172
+ // concurrently retrieve delete files and create RecordBatchStreamBuilder
173
+ let ( delete_file_manager, mut record_batch_stream_builder) = try_join ! (
174
+ DeleteFileManager :: load_deletes(
175
+ task. deletes. clone( ) ,
176
+ file_io. clone( ) ,
177
+ concurrency_limit_data_files
178
+ ) ,
179
+ Self :: create_parquet_record_batch_stream_builder(
180
+ & task. data_file_path,
181
+ file_io. clone( ) ,
182
+ should_load_page_index,
183
+ )
184
+ ) ?;
189
185
190
186
// Create a projection mask for the batch stream to select which columns in the
191
187
// Parquet file that we want in the response
@@ -197,7 +193,7 @@ impl ArrowReader {
197
193
) ?;
198
194
record_batch_stream_builder = record_batch_stream_builder. with_projection ( projection_mask) ;
199
195
200
- // RecordBatchTransformer performs any required transformations on the RecordBatches
196
+ // RecordBatchTransformer performs any transformations required on the RecordBatches
201
197
// that come back from the file, such as type promotion, default column insertion
202
198
// and column re-ordering
203
199
let mut record_batch_transformer =
@@ -207,49 +203,102 @@ impl ArrowReader {
207
203
record_batch_stream_builder = record_batch_stream_builder. with_batch_size ( batch_size) ;
208
204
}
209
205
210
- if let Some ( predicate) = & task. predicate {
206
+ let delete_predicate = delete_file_manager. build_delete_predicate ( task. schema . clone ( ) ) ?;
207
+
208
+ // In addition to the optional predicate supplied in the `FileScanTask`,
209
+ // we also have an optional predicate resulting from equality delete files.
210
+ // If both are present, we logical-AND them together to form a single filter
211
+ // predicate that we can pass to the `RecordBatchStreamBuilder`.
212
+ let final_predicate = match ( & task. predicate , delete_predicate) {
213
+ ( None , None ) => None ,
214
+ ( Some ( predicate) , None ) => Some ( predicate. clone ( ) ) ,
215
+ ( None , Some ( ref predicate) ) => Some ( predicate. clone ( ) ) ,
216
+ ( Some ( filter_predicate) , Some ( delete_predicate) ) => {
217
+ Some ( filter_predicate. clone ( ) . and ( delete_predicate) )
218
+ }
219
+ } ;
220
+
221
+ // There are two possible sources both for potential lists of selected RowGroup indices,
222
+ // and for `RowSelection`s.
223
+ // Selected RowGroup index lists can come from two sources:
224
+ // * When there are equality delete files that are applicable;
225
+ // * When there is a scan predicate and row_group_filtering_enabled = true.
226
+ // `RowSelection`s can be created in either or both of the following cases:
227
+ // * When there are positional delete files that are applicable;
228
+ // * When there is a scan predicate and row_selection_enabled = true
229
+ // Note that, in the former case we only perform row group filtering when
230
+ // there is a scan predicate AND row_group_filtering_enabled = true,
231
+ // but we perform row selection filtering if there are applicable
232
+ // equality delete files OR (there is a scan predicate AND row_selection_enabled),
233
+ // since the only implemented method of applying positional deletes is
234
+ // by using a `RowSelection`.
235
+ let mut selected_row_group_indices = None ;
236
+ let mut row_selection = None ;
237
+
238
+ if let Some ( predicate) = final_predicate {
211
239
let ( iceberg_field_ids, field_id_map) = Self :: build_field_id_set_and_map (
212
240
record_batch_stream_builder. parquet_schema ( ) ,
213
- predicate,
241
+ & predicate,
214
242
) ?;
215
243
216
244
let row_filter = Self :: get_row_filter (
217
- predicate,
245
+ & predicate,
218
246
record_batch_stream_builder. parquet_schema ( ) ,
219
247
& iceberg_field_ids,
220
248
& field_id_map,
221
249
) ?;
222
250
record_batch_stream_builder = record_batch_stream_builder. with_row_filter ( row_filter) ;
223
251
224
- let mut selected_row_groups = None ;
225
252
if row_group_filtering_enabled {
226
253
let result = Self :: get_selected_row_group_indices (
227
- predicate,
254
+ & predicate,
228
255
record_batch_stream_builder. metadata ( ) ,
229
256
& field_id_map,
230
257
& task. schema ,
231
258
) ?;
232
259
233
- selected_row_groups = Some ( result) ;
260
+ selected_row_group_indices = Some ( result) ;
234
261
}
235
262
236
263
if row_selection_enabled {
237
- let row_selection = Self :: get_row_selection (
238
- predicate,
264
+ row_selection = Some ( Self :: get_row_selection_for_filter_predicate (
265
+ & predicate,
239
266
record_batch_stream_builder. metadata ( ) ,
240
- & selected_row_groups ,
267
+ & selected_row_group_indices ,
241
268
& field_id_map,
242
269
& task. schema ,
243
- ) ?;
244
-
245
- record_batch_stream_builder =
246
- record_batch_stream_builder. with_row_selection ( row_selection) ;
270
+ ) ?) ;
247
271
}
272
+ }
248
273
249
- if let Some ( selected_row_groups) = selected_row_groups {
250
- record_batch_stream_builder =
251
- record_batch_stream_builder. with_row_groups ( selected_row_groups) ;
252
- }
274
+ let positional_delete_indexes =
275
+ delete_file_manager. get_positional_delete_indexes_for_data_file ( & task. data_file_path ) ;
276
+
277
+ if let Some ( positional_delete_indexes) = positional_delete_indexes {
278
+ let delete_row_selection = Self :: build_deletes_row_selection (
279
+ record_batch_stream_builder. metadata ( ) ,
280
+ & selected_row_group_indices,
281
+ & positional_delete_indexes,
282
+ ) ?;
283
+
284
+ // merge the row selection from the delete files with the row selection
285
+ // from the filter predicate, if there is one from the filter predicate
286
+ row_selection = match row_selection {
287
+ None => Some ( delete_row_selection) ,
288
+ Some ( filter_row_selection) => {
289
+ Some ( filter_row_selection. intersection ( & delete_row_selection) )
290
+ }
291
+ } ;
292
+ }
293
+
294
+ if let Some ( row_selection) = row_selection {
295
+ record_batch_stream_builder =
296
+ record_batch_stream_builder. with_row_selection ( row_selection) ;
297
+ }
298
+
299
+ if let Some ( selected_row_group_indices) = selected_row_group_indices {
300
+ record_batch_stream_builder =
301
+ record_batch_stream_builder. with_row_groups ( selected_row_group_indices) ;
253
302
}
254
303
255
304
// Build the batch stream and send all the RecordBatches that it generates
@@ -265,6 +314,43 @@ impl ArrowReader {
265
314
Ok ( Box :: pin ( record_batch_stream) as ArrowRecordBatchStream )
266
315
}
267
316
317
+ async fn create_parquet_record_batch_stream_builder (
318
+ data_file_path : & str ,
319
+ file_io : FileIO ,
320
+ should_load_page_index : bool ,
321
+ ) -> Result < ParquetRecordBatchStreamBuilder < ArrowFileReader < impl FileRead + Sized > > > {
322
+ // Get the metadata for the Parquet file we need to read and build
323
+ // a reader for the data within
324
+ let parquet_file = file_io. new_input ( data_file_path) ?;
325
+ let ( parquet_metadata, parquet_reader) =
326
+ try_join ! ( parquet_file. metadata( ) , parquet_file. reader( ) ) ?;
327
+ let parquet_file_reader = ArrowFileReader :: new ( parquet_metadata, parquet_reader) ;
328
+
329
+ // Create the record batch stream builder, which wraps the parquet file reader
330
+ let record_batch_stream_builder = ParquetRecordBatchStreamBuilder :: new_with_options (
331
+ parquet_file_reader,
332
+ ArrowReaderOptions :: new ( ) . with_page_index ( should_load_page_index) ,
333
+ )
334
+ . await ?;
335
+ Ok ( record_batch_stream_builder)
336
+ }
337
+
338
+ /// computes a `RowSelection` from positional delete indices.
339
+ ///
340
+ /// Using the Parquet page index, we build a `RowSelection` that rejects rows that are indicated
341
+ /// as having been deleted by a positional delete, taking into account any row groups that have
342
+ /// been skipped entirely by the filter predicate
343
+ #[ allow( unused) ]
344
+ fn build_deletes_row_selection (
345
+ parquet_metadata : & Arc < ParquetMetaData > ,
346
+ selected_row_groups : & Option < Vec < usize > > ,
347
+ positional_deletes : & [ usize ] ,
348
+ ) -> Result < RowSelection > {
349
+ // TODO
350
+
351
+ Ok ( RowSelection :: default ( ) )
352
+ }
353
+
268
354
fn build_field_id_set_and_map (
269
355
parquet_schema : & SchemaDescriptor ,
270
356
predicate : & BoundPredicate ,
@@ -475,7 +561,7 @@ impl ArrowReader {
475
561
Ok ( results)
476
562
}
477
563
478
- fn get_row_selection (
564
+ fn get_row_selection_for_filter_predicate (
479
565
predicate : & BoundPredicate ,
480
566
parquet_metadata : & Arc < ParquetMetaData > ,
481
567
selected_row_groups : & Option < Vec < usize > > ,
0 commit comments