18
18
//! Table scan api.
19
19
20
20
use std:: collections:: HashMap ;
21
- use std:: future:: Future ;
22
- use std:: pin:: Pin ;
23
21
use std:: sync:: { Arc , RwLock } ;
24
- use std:: task:: { Context , Poll } ;
25
22
26
23
use arrow_array:: RecordBatch ;
27
- use futures:: channel:: mpsc:: { channel, Receiver , Sender } ;
24
+ use futures:: channel:: mpsc:: { channel, Sender } ;
28
25
use futures:: stream:: BoxStream ;
29
26
use futures:: { SinkExt , StreamExt , TryFutureExt , TryStreamExt } ;
30
27
use serde:: { Deserialize , Serialize } ;
31
28
32
29
use crate :: arrow:: ArrowReaderBuilder ;
30
+ use crate :: delete_file_index:: DeleteFileIndex ;
33
31
use crate :: expr:: visitors:: expression_evaluator:: ExpressionEvaluator ;
34
32
use crate :: expr:: visitors:: inclusive_metrics_evaluator:: InclusiveMetricsEvaluator ;
35
33
use crate :: expr:: visitors:: inclusive_projection:: InclusiveProjection ;
@@ -39,7 +37,7 @@ use crate::io::object_cache::ObjectCache;
39
37
use crate :: io:: FileIO ;
40
38
use crate :: runtime:: spawn;
41
39
use crate :: spec:: {
42
- DataContentType , DataFile , DataFileFormat , ManifestContentType , ManifestEntryRef , ManifestFile ,
40
+ DataContentType , DataFileFormat , ManifestContentType , ManifestEntryRef , ManifestFile ,
43
41
ManifestList , Schema , SchemaRef , SnapshotRef , TableMetadataRef ,
44
42
} ;
45
43
use crate :: table:: Table ;
@@ -368,9 +366,9 @@ impl TableScan {
368
366
// used to stream the results back to the caller
369
367
let ( file_scan_task_tx, file_scan_task_rx) = channel ( concurrency_limit_manifest_entries) ;
370
368
371
- // used to stream delete files into the DeleteFileManager
369
+ // used to stream delete files into the DeleteFileIndex
372
370
let ( delete_file_tx, delete_file_rx) = channel ( concurrency_limit_manifest_entries) ;
373
- let delete_file_manager = DeleteFileManager :: from_receiver ( delete_file_rx) ;
371
+ let delete_file_index = DeleteFileIndex :: from_receiver ( delete_file_rx) ;
374
372
375
373
let manifest_list = self . plan_context . get_manifest_list ( ) . await ?;
376
374
@@ -381,7 +379,7 @@ impl TableScan {
381
379
manifest_list,
382
380
manifest_entry_data_ctx_tx,
383
381
manifest_entry_delete_ctx_tx,
384
- delete_file_manager . clone ( ) ,
382
+ delete_file_index . clone ( ) ,
385
383
) ?;
386
384
387
385
let mut channel_for_manifest_error = file_scan_task_tx. clone ( ) ;
@@ -593,7 +591,7 @@ struct ManifestFileContext {
593
591
object_cache : Arc < ObjectCache > ,
594
592
snapshot_schema : SchemaRef ,
595
593
expression_evaluator_cache : Arc < ExpressionEvaluatorCache > ,
596
- delete_file_manager : DeleteFileManager ,
594
+ delete_file_index : DeleteFileIndex ,
597
595
}
598
596
599
597
/// Wraps a [`ManifestEntryRef`] alongside the objects that are needed
@@ -606,7 +604,7 @@ struct ManifestEntryContext {
606
604
bound_predicates : Option < Arc < BoundPredicates > > ,
607
605
partition_spec_id : i32 ,
608
606
snapshot_schema : SchemaRef ,
609
- delete_file_manager : DeleteFileManager ,
607
+ delete_file_index : DeleteFileIndex ,
610
608
}
611
609
612
610
impl ManifestFileContext {
@@ -621,7 +619,7 @@ impl ManifestFileContext {
621
619
field_ids,
622
620
mut sender,
623
621
expression_evaluator_cache,
624
- delete_file_manager ,
622
+ delete_file_index ,
625
623
..
626
624
} = self ;
627
625
@@ -636,7 +634,7 @@ impl ManifestFileContext {
636
634
partition_spec_id : manifest_file. partition_spec_id ,
637
635
bound_predicates : bound_predicates. clone ( ) ,
638
636
snapshot_schema : snapshot_schema. clone ( ) ,
639
- delete_file_manager : delete_file_manager . clone ( ) ,
637
+ delete_file_index : delete_file_index . clone ( ) ,
640
638
} ;
641
639
642
640
sender
@@ -654,7 +652,7 @@ impl ManifestEntryContext {
654
652
/// created from it
655
653
async fn into_file_scan_task ( self ) -> Result < FileScanTask > {
656
654
let deletes = self
657
- . delete_file_manager
655
+ . delete_file_index
658
656
. get_deletes_for_data_file ( self . manifest_entry . data_file ( ) )
659
657
. await ?;
660
658
@@ -712,7 +710,7 @@ impl PlanContext {
712
710
manifest_list : Arc < ManifestList > ,
713
711
sender_data : Sender < ManifestEntryContext > ,
714
712
sender_delete : Sender < ManifestEntryContext > ,
715
- delete_file_manager : DeleteFileManager ,
713
+ delete_file_index : DeleteFileIndex ,
716
714
) -> Result < Box < impl Iterator < Item = Result < ManifestFileContext > > > > {
717
715
let manifest_files = manifest_list. entries ( ) . iter ( ) ;
718
716
@@ -741,7 +739,7 @@ impl PlanContext {
741
739
} else {
742
740
sender_delete. clone ( )
743
741
} ,
744
- delete_file_manager . clone ( ) ,
742
+ delete_file_index . clone ( ) ,
745
743
) ;
746
744
747
745
filtered_mfcs. push ( Ok ( mfc) ) ;
@@ -757,7 +755,7 @@ impl PlanContext {
757
755
} else {
758
756
sender_delete. clone ( )
759
757
} ,
760
- delete_file_manager . clone ( ) ,
758
+ delete_file_index . clone ( ) ,
761
759
) ;
762
760
763
761
filtered_mfcs. push ( Ok ( mfc) ) ;
@@ -772,7 +770,7 @@ impl PlanContext {
772
770
manifest_file : & ManifestFile ,
773
771
partition_filter : Option < Arc < BoundPredicate > > ,
774
772
sender : Sender < ManifestEntryContext > ,
775
- delete_file_manager : DeleteFileManager ,
773
+ delete_file_index : DeleteFileIndex ,
776
774
) -> ManifestFileContext {
777
775
let bound_predicates =
778
776
if let ( Some ( ref partition_bound_predicate) , Some ( snapshot_bound_predicate) ) =
@@ -794,7 +792,7 @@ impl PlanContext {
794
792
snapshot_schema : self . snapshot_schema . clone ( ) ,
795
793
field_ids : self . field_ids . clone ( ) ,
796
794
expression_evaluator_cache : self . expression_evaluator_cache . clone ( ) ,
797
- delete_file_manager ,
795
+ delete_file_index ,
798
796
}
799
797
}
800
798
}
@@ -1077,82 +1075,6 @@ impl FileScanTask {
1077
1075
}
1078
1076
}
1079
1077
1080
- type DeleteFileManagerResult = Result < Option < Arc < Vec < FileScanTaskDeleteFile > > > > ;
1081
-
1082
- /// Manages async retrieval of all the delete files from FileIO that are
1083
- /// applicable to the scan. Provides references to them for inclusion within FileScanTasks
1084
- #[ derive( Debug , Clone ) ]
1085
- struct DeleteFileManager {
1086
- files : Arc < RwLock < Option < DeleteFileManagerResult > > > ,
1087
- }
1088
-
1089
- #[ derive( Debug , Clone ) ]
1090
- struct DeleteFileManagerFuture {
1091
- files : Arc < RwLock < Option < DeleteFileManagerResult > > > ,
1092
- }
1093
-
1094
- impl Future for DeleteFileManagerFuture {
1095
- type Output = DeleteFileManagerResult ;
1096
-
1097
- fn poll ( self : Pin < & mut Self > , _cx : & mut Context < ' _ > ) -> Poll < Self :: Output > {
1098
- let Ok ( guard) = self . files . try_read ( ) else {
1099
- return Poll :: Pending ;
1100
- } ;
1101
-
1102
- if let Some ( value) = guard. as_ref ( ) {
1103
- Poll :: Ready ( match value. as_ref ( ) {
1104
- Ok ( deletes) => Ok ( deletes. clone ( ) ) ,
1105
- Err ( err) => Err ( Error :: new ( err. kind ( ) , err. message ( ) ) ) ,
1106
- } )
1107
- } else {
1108
- Poll :: Pending
1109
- }
1110
- }
1111
- }
1112
-
1113
- impl DeleteFileManager {
1114
- pub ( crate ) fn from_receiver ( receiver : Receiver < Result < FileScanTaskDeleteFile > > ) -> Self {
1115
- let delete_file_stream = receiver. boxed ( ) ;
1116
- let files = Arc :: new ( RwLock :: new ( None ) ) ;
1117
-
1118
- spawn ( {
1119
- let files = files. clone ( ) ;
1120
- async move {
1121
- let _ = spawn ( async move {
1122
- let result = delete_file_stream. try_collect :: < Vec < _ > > ( ) . await ;
1123
- let result = result. map ( |files| {
1124
- if files. is_empty ( ) {
1125
- None
1126
- } else {
1127
- Some ( Arc :: new ( files) )
1128
- }
1129
- } ) ;
1130
-
1131
- // Unwrap is ok here since this is the only place where a write lock
1132
- // can be acquired, so the lock can't already have been poisoned
1133
- let mut guard = files. write ( ) . unwrap ( ) ;
1134
- * guard = Some ( result) ;
1135
- } )
1136
- . await ;
1137
- }
1138
- } ) ;
1139
-
1140
- DeleteFileManager { files }
1141
- }
1142
-
1143
- pub ( crate ) fn get_deletes_for_data_file (
1144
- & self ,
1145
- _data_file : & DataFile ,
1146
- ) -> DeleteFileManagerFuture {
1147
- // TODO: in the future we may want to filter out delete files
1148
- // that are not applicable to the DataFile?
1149
-
1150
- DeleteFileManagerFuture {
1151
- files : self . files . clone ( ) ,
1152
- }
1153
- }
1154
- }
1155
-
1156
1078
#[ cfg( test) ]
1157
1079
mod tests {
1158
1080
use std:: collections:: HashMap ;
0 commit comments