@@ -40,7 +40,6 @@ use crate::projection::{
40
40
use crate :: spill:: get_record_batch_memory_size;
41
41
use crate :: ExecutionPlanProperties ;
42
42
use crate :: {
43
- coalesce_partitions:: CoalescePartitionsExec ,
44
43
common:: can_project,
45
44
handle_state,
46
45
hash_utils:: create_hashes,
@@ -792,34 +791,44 @@ impl ExecutionPlan for HashJoinExec {
792
791
) ;
793
792
}
794
793
794
+ if self . mode == PartitionMode :: CollectLeft && left_partitions != 1 {
795
+ return internal_err ! (
796
+ "Invalid HashJoinExec,the output partition count of the left child must be 1 in CollectLeft mode,\
797
+ consider using CoalescePartitionsExec"
798
+ ) ;
799
+ }
800
+
795
801
let join_metrics = BuildProbeJoinMetrics :: new ( partition, & self . metrics ) ;
796
802
let left_fut = match self . mode {
797
- PartitionMode :: CollectLeft => self . left_fut . once ( || {
798
- let reservation =
799
- MemoryConsumer :: new ( "HashJoinInput" ) . register ( context. memory_pool ( ) ) ;
800
- collect_left_input (
801
- None ,
802
- self . random_state . clone ( ) ,
803
- Arc :: clone ( & self . left ) ,
804
- on_left. clone ( ) ,
805
- Arc :: clone ( & context) ,
806
- join_metrics. clone ( ) ,
807
- reservation,
808
- need_produce_result_in_final ( self . join_type ) ,
809
- self . right ( ) . output_partitioning ( ) . partition_count ( ) ,
810
- )
811
- } ) ,
803
+ PartitionMode :: CollectLeft => {
804
+ let left_stream = self . left . execute ( 0 , Arc :: clone ( & context) ) ?;
805
+
806
+ self . left_fut . once ( || {
807
+ let reservation = MemoryConsumer :: new ( "HashJoinInput" )
808
+ . register ( context. memory_pool ( ) ) ;
809
+
810
+ collect_left_input (
811
+ self . random_state . clone ( ) ,
812
+ left_stream,
813
+ on_left. clone ( ) ,
814
+ join_metrics. clone ( ) ,
815
+ reservation,
816
+ need_produce_result_in_final ( self . join_type ) ,
817
+ self . right ( ) . output_partitioning ( ) . partition_count ( ) ,
818
+ )
819
+ } )
820
+ }
812
821
PartitionMode :: Partitioned => {
822
+ let left_stream = self . left . execute ( partition, Arc :: clone ( & context) ) ?;
823
+
813
824
let reservation =
814
825
MemoryConsumer :: new ( format ! ( "HashJoinInput[{partition}]" ) )
815
826
. register ( context. memory_pool ( ) ) ;
816
827
817
828
OnceFut :: new ( collect_left_input (
818
- Some ( partition) ,
819
829
self . random_state . clone ( ) ,
820
- Arc :: clone ( & self . left ) ,
830
+ left_stream ,
821
831
on_left. clone ( ) ,
822
- Arc :: clone ( & context) ,
823
832
join_metrics. clone ( ) ,
824
833
reservation,
825
834
need_produce_result_in_final ( self . join_type ) ,
@@ -930,36 +939,22 @@ impl ExecutionPlan for HashJoinExec {
930
939
931
940
/// Reads the left (build) side of the input, buffering it in memory, to build a
932
941
/// hash table (`LeftJoinData`)
933
- #[ allow( clippy:: too_many_arguments) ]
934
942
async fn collect_left_input (
935
- partition : Option < usize > ,
936
943
random_state : RandomState ,
937
- left : Arc < dyn ExecutionPlan > ,
944
+ left_stream : SendableRecordBatchStream ,
938
945
on_left : Vec < PhysicalExprRef > ,
939
- context : Arc < TaskContext > ,
940
946
metrics : BuildProbeJoinMetrics ,
941
947
reservation : MemoryReservation ,
942
948
with_visited_indices_bitmap : bool ,
943
949
probe_threads_count : usize ,
944
950
) -> Result < JoinLeftData > {
945
- let schema = left. schema ( ) ;
946
-
947
- let ( left_input, left_input_partition) = if let Some ( partition) = partition {
948
- ( left, partition)
949
- } else if left. output_partitioning ( ) . partition_count ( ) != 1 {
950
- ( Arc :: new ( CoalescePartitionsExec :: new ( left) ) as _ , 0 )
951
- } else {
952
- ( left, 0 )
953
- } ;
954
-
955
- // Depending on partition argument load single partition or whole left side in memory
956
- let stream = left_input. execute ( left_input_partition, Arc :: clone ( & context) ) ?;
951
+ let schema = left_stream. schema ( ) ;
957
952
958
953
// This operation performs 2 steps at once:
959
954
// 1. creates a [JoinHashMap] of all batches from the stream
960
955
// 2. stores the batches in a vector.
961
956
let initial = ( Vec :: new ( ) , 0 , metrics, reservation) ;
962
- let ( batches, num_rows, metrics, mut reservation) = stream
957
+ let ( batches, num_rows, metrics, mut reservation) = left_stream
963
958
. try_fold ( initial, |mut acc, batch| async {
964
959
let batch_size = get_record_batch_memory_size ( & batch) ;
965
960
// Reserve memory for incoming batch
@@ -1655,6 +1650,7 @@ impl EmbeddedProjection for HashJoinExec {
1655
1650
#[ cfg( test) ]
1656
1651
mod tests {
1657
1652
use super :: * ;
1653
+ use crate :: coalesce_partitions:: CoalescePartitionsExec ;
1658
1654
use crate :: test:: TestMemoryExec ;
1659
1655
use crate :: {
1660
1656
common, expressions:: Column , repartition:: RepartitionExec , test:: build_table_i32,
@@ -2105,6 +2101,7 @@ mod tests {
2105
2101
let left =
2106
2102
TestMemoryExec :: try_new_exec ( & [ vec ! [ batch1] , vec ! [ batch2] ] , schema, None )
2107
2103
. unwrap ( ) ;
2104
+ let left = Arc :: new ( CoalescePartitionsExec :: new ( left) ) ;
2108
2105
2109
2106
let right = build_table (
2110
2107
( "a1" , & vec ! [ 1 , 2 , 3 ] ) ,
@@ -2177,6 +2174,7 @@ mod tests {
2177
2174
let left =
2178
2175
TestMemoryExec :: try_new_exec ( & [ vec ! [ batch1] , vec ! [ batch2] ] , schema, None )
2179
2176
. unwrap ( ) ;
2177
+ let left = Arc :: new ( CoalescePartitionsExec :: new ( left) ) ;
2180
2178
let right = build_table (
2181
2179
( "a2" , & vec ! [ 20 , 30 , 10 ] ) ,
2182
2180
( "b2" , & vec ! [ 5 , 6 , 4 ] ) ,
0 commit comments