18
18
//! Logical plan types
19
19
20
20
use std:: cmp:: Ordering ;
21
- use std:: collections:: { BTreeMap , HashMap , HashSet } ;
21
+ use std:: collections:: { HashMap , HashSet } ;
22
22
use std:: fmt:: { self , Debug , Display , Formatter } ;
23
23
use std:: hash:: { Hash , Hasher } ;
24
24
use std:: str:: FromStr ;
@@ -2681,24 +2681,16 @@ impl Union {
2681
2681
Ok ( Union { inputs, schema } )
2682
2682
}
2683
2683
2684
- /// When constructing a `UNION BY NAME`, we may need to wrap inputs
2684
+ /// When constructing a `UNION BY NAME`, we need to wrap inputs
2685
2685
/// in an additional `Projection` to account for absence of columns
2686
- /// in input schemas.
2686
+ /// in input schemas or differing projection orders .
2687
2687
fn rewrite_inputs_from_schema (
2688
- schema : & DFSchema ,
2688
+ schema : & Arc < DFSchema > ,
2689
2689
inputs : Vec < Arc < LogicalPlan > > ,
2690
2690
) -> Result < Vec < Arc < LogicalPlan > > > {
2691
2691
let schema_width = schema. iter ( ) . count ( ) ;
2692
2692
let mut wrapped_inputs = Vec :: with_capacity ( inputs. len ( ) ) ;
2693
2693
for input in inputs {
2694
- // If the input plan's schema contains the same number of fields
2695
- // as the derived schema, then it does not to be wrapped in an
2696
- // additional `Projection`.
2697
- if input. schema ( ) . iter ( ) . count ( ) == schema_width {
2698
- wrapped_inputs. push ( input) ;
2699
- continue ;
2700
- }
2701
-
2702
2694
// Any columns that exist within the derived schema but do not exist
2703
2695
// within an input's schema should be replaced with `NULL` aliased
2704
2696
// to the appropriate column in the derived schema.
@@ -2713,9 +2705,9 @@ impl Union {
2713
2705
expr. push ( Expr :: Literal ( ScalarValue :: Null ) . alias ( column. name ( ) ) ) ;
2714
2706
}
2715
2707
}
2716
- wrapped_inputs. push ( Arc :: new ( LogicalPlan :: Projection ( Projection :: try_new (
2717
- expr, input,
2718
- ) ? ) ) ) ;
2708
+ wrapped_inputs. push ( Arc :: new ( LogicalPlan :: Projection (
2709
+ Projection :: try_new_with_schema ( expr, input, Arc :: clone ( schema ) ) ? ,
2710
+ ) ) ) ;
2719
2711
}
2720
2712
2721
2713
Ok ( wrapped_inputs)
@@ -2749,45 +2741,60 @@ impl Union {
2749
2741
inputs : & [ Arc < LogicalPlan > ] ,
2750
2742
loose_types : bool ,
2751
2743
) -> Result < DFSchemaRef > {
2752
- type FieldData < ' a > = ( & ' a DataType , bool , Vec < & ' a HashMap < String , String > > ) ;
2753
- // Prefer `BTreeMap` as it produces items in order by key when iterated over
2754
- let mut cols: BTreeMap < & str , FieldData > = BTreeMap :: new ( ) ;
2744
+ type FieldData < ' a > =
2745
+ ( & ' a DataType , bool , Vec < & ' a HashMap < String , String > > , usize ) ;
2746
+ let mut cols: Vec < ( & str , FieldData ) > = Vec :: new ( ) ;
2755
2747
for input in inputs. iter ( ) {
2756
2748
for field in input. schema ( ) . fields ( ) {
2757
- match cols. entry ( field. name ( ) ) {
2758
- std:: collections:: btree_map:: Entry :: Occupied ( mut occupied) => {
2759
- let ( data_type, is_nullable, metadata) = occupied. get_mut ( ) ;
2760
- if !loose_types && * data_type != field. data_type ( ) {
2761
- return plan_err ! (
2762
- "Found different types for field {}" ,
2763
- field. name( )
2764
- ) ;
2765
- }
2766
-
2767
- metadata. push ( field. metadata ( ) ) ;
2768
- // If the field is nullable in any one of the inputs,
2769
- // then the field in the final schema is also nullable.
2770
- * is_nullable |= field. is_nullable ( ) ;
2749
+ if let Some ( ( _, ( data_type, is_nullable, metadata, occurrences) ) ) =
2750
+ cols. iter_mut ( ) . find ( |( name, _) | name == field. name ( ) )
2751
+ {
2752
+ if !loose_types && * data_type != field. data_type ( ) {
2753
+ return plan_err ! (
2754
+ "Found different types for field {}" ,
2755
+ field. name( )
2756
+ ) ;
2771
2757
}
2772
- std:: collections:: btree_map:: Entry :: Vacant ( vacant) => {
2773
- vacant. insert ( (
2758
+
2759
+ metadata. push ( field. metadata ( ) ) ;
2760
+ // If the field is nullable in any one of the inputs,
2761
+ // then the field in the final schema is also nullable.
2762
+ * is_nullable |= field. is_nullable ( ) ;
2763
+ * occurrences += 1 ;
2764
+ } else {
2765
+ cols. push ( (
2766
+ field. name ( ) ,
2767
+ (
2774
2768
field. data_type ( ) ,
2775
2769
field. is_nullable ( ) ,
2776
2770
vec ! [ field. metadata( ) ] ,
2777
- ) ) ;
2778
- }
2771
+ 1 ,
2772
+ ) ,
2773
+ ) ) ;
2779
2774
}
2780
2775
}
2781
2776
}
2782
2777
2783
2778
let union_fields = cols
2784
2779
. into_iter ( )
2785
- . map ( |( name, ( data_type, is_nullable, unmerged_metadata) ) | {
2786
- let mut field = Field :: new ( name, data_type. clone ( ) , is_nullable) ;
2787
- field. set_metadata ( intersect_maps ( unmerged_metadata) ) ;
2780
+ . map (
2781
+ |( name, ( data_type, is_nullable, unmerged_metadata, occurrences) ) | {
2782
+ // If the final number of occurrences of the field is less
2783
+ // than the number of inputs (i.e. the field is missing from
2784
+ // one or more inputs), then it must be treated as nullable.
2785
+ let final_is_nullable = if occurrences == inputs. len ( ) {
2786
+ is_nullable
2787
+ } else {
2788
+ true
2789
+ } ;
2788
2790
2789
- ( None , Arc :: new ( field) )
2790
- } )
2791
+ let mut field =
2792
+ Field :: new ( name, data_type. clone ( ) , final_is_nullable) ;
2793
+ field. set_metadata ( intersect_maps ( unmerged_metadata) ) ;
2794
+
2795
+ ( None , Arc :: new ( field) )
2796
+ } ,
2797
+ )
2791
2798
. collect :: < Vec < ( Option < TableReference > , _ ) > > ( ) ;
2792
2799
2793
2800
let union_schema_metadata =
0 commit comments