@@ -29,7 +29,6 @@ use crate::joins::utils::{
29
29
need_produce_result_in_final, JoinHashMap , JoinHashMapType ,
30
30
} ;
31
31
use crate :: {
32
- coalesce_batches:: concat_batches,
33
32
coalesce_partitions:: CoalescePartitionsExec ,
34
33
expressions:: Column ,
35
34
expressions:: PhysicalSortExpr ,
@@ -52,10 +51,10 @@ use super::{
52
51
53
52
use arrow:: array:: {
54
53
Array , ArrayRef , BooleanArray , BooleanBufferBuilder , PrimitiveArray , UInt32Array ,
55
- UInt32BufferBuilder , UInt64Array , UInt64BufferBuilder ,
54
+ UInt64Array ,
56
55
} ;
57
56
use arrow:: compute:: kernels:: cmp:: { eq, not_distinct} ;
58
- use arrow:: compute:: { and, take, FilterBuilder } ;
57
+ use arrow:: compute:: { and, concat_batches , take, FilterBuilder } ;
59
58
use arrow:: datatypes:: { Schema , SchemaRef } ;
60
59
use arrow:: record_batch:: RecordBatch ;
61
60
use arrow:: util:: bit_util;
@@ -715,7 +714,10 @@ async fn collect_left_input(
715
714
let mut hashmap = JoinHashMap :: with_capacity ( num_rows) ;
716
715
let mut hashes_buffer = Vec :: new ( ) ;
717
716
let mut offset = 0 ;
718
- for batch in batches. iter ( ) {
717
+
718
+ // Reverse iteration over build-side input batches allows to create FIFO hashmap
719
+ let batches_iter = batches. iter ( ) . rev ( ) ;
720
+ for batch in batches_iter. clone ( ) {
719
721
hashes_buffer. clear ( ) ;
720
722
hashes_buffer. resize ( batch. num_rows ( ) , 0 ) ;
721
723
update_hash (
@@ -726,19 +728,25 @@ async fn collect_left_input(
726
728
& random_state,
727
729
& mut hashes_buffer,
728
730
0 ,
731
+ true ,
729
732
) ?;
730
733
offset += batch. num_rows ( ) ;
731
734
}
732
735
// Merge all batches into a single batch, so we
733
736
// can directly index into the arrays
734
- let single_batch = concat_batches ( & schema, & batches , num_rows ) ?;
737
+ let single_batch = concat_batches ( & schema, batches_iter ) ?;
735
738
let data = JoinLeftData :: new ( hashmap, single_batch, reservation) ;
736
739
737
740
Ok ( data)
738
741
}
739
742
740
- /// Updates `hash` with new entries from [RecordBatch] evaluated against the expressions `on`,
741
- /// assuming that the [RecordBatch] corresponds to the `index`th
743
+ /// Updates `hash_map` with new entries from `batch` evaluated against the expressions `on`
744
+ /// using `offset` as a start value for `batch` row indices.
745
+ ///
746
+ /// `fifo_hashmap` sets the order of iteration over `batch` rows while updating hashmap,
747
+ /// which allows to keep either first (if set to true) or last (if set to false) row index
748
+ /// as a chain head for matching hashes.
749
+ #[ allow( clippy:: too_many_arguments) ]
742
750
pub fn update_hash < T > (
743
751
on : & [ Column ] ,
744
752
batch : & RecordBatch ,
@@ -747,6 +755,7 @@ pub fn update_hash<T>(
747
755
random_state : & RandomState ,
748
756
hashes_buffer : & mut Vec < u64 > ,
749
757
deleted_offset : usize ,
758
+ fifo_hashmap : bool ,
750
759
) -> Result < ( ) >
751
760
where
752
761
T : JoinHashMapType ,
@@ -763,28 +772,18 @@ where
763
772
// For usual JoinHashmap, the implementation is void.
764
773
hash_map. extend_zero ( batch. num_rows ( ) ) ;
765
774
766
- // insert hashes to key of the hashmap
767
- let ( mut_map, mut_list) = hash_map. get_mut ( ) ;
768
- for ( row, hash_value) in hash_values. iter ( ) . enumerate ( ) {
769
- let item = mut_map. get_mut ( * hash_value, |( hash, _) | * hash_value == * hash) ;
770
- if let Some ( ( _, index) ) = item {
771
- // Already exists: add index to next array
772
- let prev_index = * index;
773
- // Store new value inside hashmap
774
- * index = ( row + offset + 1 ) as u64 ;
775
- // Update chained Vec at row + offset with previous value
776
- mut_list[ row + offset - deleted_offset] = prev_index;
777
- } else {
778
- mut_map. insert (
779
- * hash_value,
780
- // store the value + 1 as 0 value reserved for end of list
781
- ( * hash_value, ( row + offset + 1 ) as u64 ) ,
782
- |( hash, _) | * hash,
783
- ) ;
784
- // chained list at (row + offset) is already initialized with 0
785
- // meaning end of list
786
- }
775
+ // Updating JoinHashMap from hash values iterator
776
+ let hash_values_iter = hash_values
777
+ . iter ( )
778
+ . enumerate ( )
779
+ . map ( |( i, val) | ( i + offset, val) ) ;
780
+
781
+ if fifo_hashmap {
782
+ hash_map. update_from_iter ( hash_values_iter. rev ( ) , deleted_offset) ;
783
+ } else {
784
+ hash_map. update_from_iter ( hash_values_iter, deleted_offset) ;
787
785
}
786
+
788
787
Ok ( ( ) )
789
788
}
790
789
@@ -987,6 +986,7 @@ pub fn build_equal_condition_join_indices<T: JoinHashMapType>(
987
986
filter : Option < & JoinFilter > ,
988
987
build_side : JoinSide ,
989
988
deleted_offset : Option < usize > ,
989
+ fifo_hashmap : bool ,
990
990
) -> Result < ( UInt64Array , UInt32Array ) > {
991
991
let keys_values = probe_on
992
992
. iter ( )
@@ -1002,10 +1002,9 @@ pub fn build_equal_condition_join_indices<T: JoinHashMapType>(
1002
1002
hashes_buffer. clear ( ) ;
1003
1003
hashes_buffer. resize ( probe_batch. num_rows ( ) , 0 ) ;
1004
1004
let hash_values = create_hashes ( & keys_values, random_state, hashes_buffer) ?;
1005
- // Using a buffer builder to avoid slower normal builder
1006
- let mut build_indices = UInt64BufferBuilder :: new ( 0 ) ;
1007
- let mut probe_indices = UInt32BufferBuilder :: new ( 0 ) ;
1008
- // The chained list algorithm generates build indices for each probe row in a reversed sequence as such:
1005
+
1006
+ // In case build-side input has not been inverted while JoinHashMap creation, the chained list algorithm
1007
+ // will return build indices for each probe row in a reverse order:
1009
1008
// Build Indices: [5, 4, 3]
1010
1009
// Probe Indices: [1, 1, 1]
1011
1010
//
@@ -1034,44 +1033,17 @@ pub fn build_equal_condition_join_indices<T: JoinHashMapType>(
1034
1033
// (5,1)
1035
1034
//
1036
1035
// With this approach, the lexicographic order on both the probe side and the build side is preserved.
1037
- let hash_map = build_hashmap. get_map ( ) ;
1038
- let next_chain = build_hashmap. get_list ( ) ;
1039
- for ( row, hash_value) in hash_values. iter ( ) . enumerate ( ) . rev ( ) {
1040
- // Get the hash and find it in the build index
1041
-
1042
- // For every item on the build and probe we check if it matches
1043
- // This possibly contains rows with hash collisions,
1044
- // So we have to check here whether rows are equal or not
1045
- if let Some ( ( _, index) ) =
1046
- hash_map. get ( * hash_value, |( hash, _) | * hash_value == * hash)
1047
- {
1048
- let mut i = * index - 1 ;
1049
- loop {
1050
- let build_row_value = if let Some ( offset) = deleted_offset {
1051
- // This arguments means that we prune the next index way before here.
1052
- if i < offset as u64 {
1053
- // End of the list due to pruning
1054
- break ;
1055
- }
1056
- i - offset as u64
1057
- } else {
1058
- i
1059
- } ;
1060
- build_indices. append ( build_row_value) ;
1061
- probe_indices. append ( row as u32 ) ;
1062
- // Follow the chain to get the next index value
1063
- let next = next_chain[ build_row_value as usize ] ;
1064
- if next == 0 {
1065
- // end of list
1066
- break ;
1067
- }
1068
- i = next - 1 ;
1069
- }
1070
- }
1071
- }
1072
- // Reversing both sets of indices
1073
- build_indices. as_slice_mut ( ) . reverse ( ) ;
1074
- probe_indices. as_slice_mut ( ) . reverse ( ) ;
1036
+ let ( mut build_indices, mut probe_indices) = if fifo_hashmap {
1037
+ build_hashmap. get_matched_indices ( hash_values. iter ( ) . enumerate ( ) , deleted_offset)
1038
+ } else {
1039
+ let ( mut matched_build, mut matched_probe) = build_hashmap
1040
+ . get_matched_indices ( hash_values. iter ( ) . enumerate ( ) . rev ( ) , deleted_offset) ;
1041
+
1042
+ matched_build. as_slice_mut ( ) . reverse ( ) ;
1043
+ matched_probe. as_slice_mut ( ) . reverse ( ) ;
1044
+
1045
+ ( matched_build, matched_probe)
1046
+ } ;
1075
1047
1076
1048
let left: UInt64Array = PrimitiveArray :: new ( build_indices. finish ( ) . into ( ) , None ) ;
1077
1049
let right: UInt32Array = PrimitiveArray :: new ( probe_indices. finish ( ) . into ( ) , None ) ;
@@ -1279,6 +1251,7 @@ impl HashJoinStream {
1279
1251
self . filter . as_ref ( ) ,
1280
1252
JoinSide :: Left ,
1281
1253
None ,
1254
+ true ,
1282
1255
) ;
1283
1256
1284
1257
let result = match left_right_indices {
@@ -1393,7 +1366,9 @@ mod tests {
1393
1366
1394
1367
use arrow:: array:: { ArrayRef , Date32Array , Int32Array , UInt32Builder , UInt64Builder } ;
1395
1368
use arrow:: datatypes:: { DataType , Field , Schema } ;
1396
- use datafusion_common:: { assert_batches_sorted_eq, assert_contains, ScalarValue } ;
1369
+ use datafusion_common:: {
1370
+ assert_batches_eq, assert_batches_sorted_eq, assert_contains, ScalarValue ,
1371
+ } ;
1397
1372
use datafusion_execution:: config:: SessionConfig ;
1398
1373
use datafusion_execution:: runtime_env:: { RuntimeConfig , RuntimeEnv } ;
1399
1374
use datafusion_expr:: Operator ;
@@ -1558,7 +1533,9 @@ mod tests {
1558
1533
"| 3 | 5 | 9 | 20 | 5 | 80 |" ,
1559
1534
"+----+----+----+----+----+----+" ,
1560
1535
] ;
1561
- assert_batches_sorted_eq ! ( expected, & batches) ;
1536
+
1537
+ // Inner join output is expected to preserve both inputs order
1538
+ assert_batches_eq ! ( expected, & batches) ;
1562
1539
1563
1540
Ok ( ( ) )
1564
1541
}
@@ -1640,7 +1617,8 @@ mod tests {
1640
1617
"+----+----+----+----+----+----+" ,
1641
1618
] ;
1642
1619
1643
- assert_batches_sorted_eq ! ( expected, & batches) ;
1620
+ // Inner join output is expected to preserve both inputs order
1621
+ assert_batches_eq ! ( expected, & batches) ;
1644
1622
1645
1623
Ok ( ( ) )
1646
1624
}
@@ -1686,7 +1664,8 @@ mod tests {
1686
1664
"+----+----+----+----+----+----+" ,
1687
1665
] ;
1688
1666
1689
- assert_batches_sorted_eq ! ( expected, & batches) ;
1667
+ // Inner join output is expected to preserve both inputs order
1668
+ assert_batches_eq ! ( expected, & batches) ;
1690
1669
1691
1670
Ok ( ( ) )
1692
1671
}
@@ -1740,7 +1719,8 @@ mod tests {
1740
1719
"+----+----+----+----+----+----+" ,
1741
1720
] ;
1742
1721
1743
- assert_batches_sorted_eq ! ( expected, & batches) ;
1722
+ // Inner join output is expected to preserve both inputs order
1723
+ assert_batches_eq ! ( expected, & batches) ;
1744
1724
1745
1725
Ok ( ( ) )
1746
1726
}
@@ -1789,7 +1769,9 @@ mod tests {
1789
1769
"| 1 | 4 | 7 | 10 | 4 | 70 |" ,
1790
1770
"+----+----+----+----+----+----+" ,
1791
1771
] ;
1792
- assert_batches_sorted_eq ! ( expected, & batches) ;
1772
+
1773
+ // Inner join output is expected to preserve both inputs order
1774
+ assert_batches_eq ! ( expected, & batches) ;
1793
1775
1794
1776
// second part
1795
1777
let stream = join. execute ( 1 , task_ctx. clone ( ) ) ?;
@@ -1804,7 +1786,8 @@ mod tests {
1804
1786
"+----+----+----+----+----+----+" ,
1805
1787
] ;
1806
1788
1807
- assert_batches_sorted_eq ! ( expected, & batches) ;
1789
+ // Inner join output is expected to preserve both inputs order
1790
+ assert_batches_eq ! ( expected, & batches) ;
1808
1791
1809
1792
Ok ( ( ) )
1810
1793
}
@@ -2734,6 +2717,7 @@ mod tests {
2734
2717
None ,
2735
2718
JoinSide :: Left ,
2736
2719
None ,
2720
+ false ,
2737
2721
) ?;
2738
2722
2739
2723
let mut left_ids = UInt64Builder :: with_capacity ( 0 ) ;
0 commit comments