12
12
// See the License for the specific language governing permissions and
13
13
// limitations under the License.
14
14
15
+ use std:: collections:: BTreeSet ;
15
16
use std:: collections:: HashSet ;
16
17
use std:: sync:: Arc ;
17
18
18
19
use databend_common_ast:: Span ;
20
+ use databend_common_catalog:: table_context:: TableContext ;
21
+ use databend_common_exception:: ErrorCode ;
19
22
use databend_common_exception:: Result ;
23
+ use databend_common_expression:: type_check:: common_super_type;
20
24
use databend_common_expression:: types:: DataType ;
25
+ use databend_common_expression:: types:: NumberScalar ;
26
+ use databend_common_expression:: ColumnBuilder ;
27
+ use databend_common_expression:: Scalar ;
28
+ use databend_common_expression:: ScalarRef ;
29
+ use databend_common_functions:: BUILTIN_FUNCTIONS ;
21
30
22
31
use crate :: binder:: ColumnBindingBuilder ;
23
32
use crate :: binder:: JoinPredicate ;
@@ -30,12 +39,16 @@ use crate::optimizer::ColumnSet;
30
39
use crate :: optimizer:: RelExpr ;
31
40
use crate :: optimizer:: SExpr ;
32
41
use crate :: plans:: BoundColumnRef ;
42
+ use crate :: plans:: CastExpr ;
43
+ use crate :: plans:: ComparisonOp ;
44
+ use crate :: plans:: ConstantExpr ;
33
45
use crate :: plans:: Filter ;
34
46
use crate :: plans:: FunctionCall ;
35
47
use crate :: plans:: Join ;
36
48
use crate :: plans:: JoinEquiCondition ;
37
49
use crate :: plans:: JoinType ;
38
50
use crate :: plans:: RelOp ;
51
+ use crate :: plans:: RelOperator ;
39
52
use crate :: plans:: ScalarExpr ;
40
53
use crate :: plans:: SubqueryExpr ;
41
54
use crate :: plans:: SubqueryType ;
@@ -53,8 +66,12 @@ use crate::MetadataRef;
53
66
/// Correlated exists subquery -> Marker join
54
67
///
55
68
/// More information can be found in the paper: Unnesting Arbitrary Queries
56
- pub fn decorrelate_subquery ( metadata : MetadataRef , s_expr : SExpr ) -> Result < SExpr > {
57
- let mut rewriter = SubqueryRewriter :: new ( metadata, None ) ;
69
+ pub fn decorrelate_subquery (
70
+ ctx : Arc < dyn TableContext > ,
71
+ metadata : MetadataRef ,
72
+ s_expr : SExpr ,
73
+ ) -> Result < SExpr > {
74
+ let mut rewriter = SubqueryRewriter :: new ( ctx, metadata, None ) ;
58
75
rewriter. rewrite ( & s_expr)
59
76
}
60
77
@@ -517,4 +534,286 @@ impl SubqueryRewriter {
517
534
true
518
535
} ) )
519
536
}
537
+
538
+ // Try folding the subquery into a constant value expression,
539
+ // which turns the join plan into a filter plan, so that the bloom filter
540
+ // can be used to reduce the amount of data that needs to be read.
541
+ pub fn try_fold_constant_subquery (
542
+ & self ,
543
+ subquery : & SubqueryExpr ,
544
+ ) -> Result < Option < ScalarExpr > > {
545
+ // (1) EvalScalar
546
+ // \
547
+ // DummyTableScan
548
+ //
549
+ // (2) EvalScalar
550
+ // \
551
+ // EvalScalar
552
+ // \
553
+ // ProjectSet
554
+ // \
555
+ // DummyTableScan
556
+ let matchers = vec ! [
557
+ Matcher :: MatchOp {
558
+ op_type: RelOp :: EvalScalar ,
559
+ children: vec![ Matcher :: MatchOp {
560
+ op_type: RelOp :: DummyTableScan ,
561
+ children: vec![ ] ,
562
+ } ] ,
563
+ } ,
564
+ Matcher :: MatchOp {
565
+ op_type: RelOp :: EvalScalar ,
566
+ children: vec![ Matcher :: MatchOp {
567
+ op_type: RelOp :: EvalScalar ,
568
+ children: vec![ Matcher :: MatchOp {
569
+ op_type: RelOp :: ProjectSet ,
570
+ children: vec![ Matcher :: MatchOp {
571
+ op_type: RelOp :: DummyTableScan ,
572
+ children: vec![ ] ,
573
+ } ] ,
574
+ } ] ,
575
+ } ] ,
576
+ } ,
577
+ ] ;
578
+
579
+ let mut matched = false ;
580
+ for matcher in matchers {
581
+ if matcher. matches ( & subquery. subquery ) {
582
+ matched = true ;
583
+ break ;
584
+ }
585
+ }
586
+ if !matched {
587
+ return Ok ( None ) ;
588
+ }
589
+
590
+ let child = subquery. subquery . child ( 0 ) ?;
591
+ if let RelOperator :: DummyTableScan ( _) = child. plan ( ) {
592
+ // subquery is a simple constant value.
593
+ // for example: `SELECT * FROM t WHERE id = (select 1);`
594
+ if let RelOperator :: EvalScalar ( eval) = subquery. subquery . plan ( ) {
595
+ if eval. items . len ( ) != 1 {
596
+ return Ok ( None ) ;
597
+ }
598
+ let Ok ( const_scalar) = ConstantExpr :: try_from ( eval. items [ 0 ] . scalar . clone ( ) ) else {
599
+ return Ok ( None ) ;
600
+ } ;
601
+ match ( & subquery. child_expr , subquery. compare_op ) {
602
+ ( Some ( child_expr) , Some ( compare_op) ) => {
603
+ let func_name = compare_op. to_func_name ( ) . to_string ( ) ;
604
+ let func = ScalarExpr :: FunctionCall ( FunctionCall {
605
+ span : subquery. span ,
606
+ func_name,
607
+ params : vec ! [ ] ,
608
+ arguments : vec ! [ * child_expr. clone( ) , const_scalar. into( ) ] ,
609
+ } ) ;
610
+ return Ok ( Some ( func) ) ;
611
+ }
612
+ ( None , None ) => match subquery. typ {
613
+ SubqueryType :: Scalar => {
614
+ return Ok ( Some ( const_scalar. into ( ) ) ) ;
615
+ }
616
+ SubqueryType :: Exists => {
617
+ return Ok ( Some ( ScalarExpr :: ConstantExpr ( ConstantExpr {
618
+ span : subquery. span ,
619
+ value : Scalar :: Boolean ( true ) ,
620
+ } ) ) ) ;
621
+ }
622
+ SubqueryType :: NotExists => {
623
+ return Ok ( Some ( ScalarExpr :: ConstantExpr ( ConstantExpr {
624
+ span : subquery. span ,
625
+ value : Scalar :: Boolean ( false ) ,
626
+ } ) ) ) ;
627
+ }
628
+ _ => { }
629
+ } ,
630
+ ( _, _) => { }
631
+ }
632
+ }
633
+ } else {
634
+ // subquery is a set returning function return constant values.
635
+ // for example: `SELECT * FROM t WHERE id IN (SELECT * FROM UNNEST(SPLIT('1,2,3', ',')) AS t1);`
636
+ let mut output_column_index = None ;
637
+ if let RelOperator :: EvalScalar ( eval) = subquery. subquery . plan ( ) {
638
+ if eval. items . len ( ) != 1 {
639
+ return Ok ( None ) ;
640
+ }
641
+ if let ScalarExpr :: BoundColumnRef ( bound_column) = & eval. items [ 0 ] . scalar {
642
+ output_column_index = Some ( bound_column. column . index ) ;
643
+ }
644
+ }
645
+ if output_column_index. is_none ( ) {
646
+ return Ok ( None ) ;
647
+ }
648
+ let output_column_index = output_column_index. unwrap ( ) ;
649
+
650
+ let mut srf_column_index = None ;
651
+ if let RelOperator :: EvalScalar ( eval) = child. plan ( ) {
652
+ if eval. items . len ( ) != 1 || eval. items [ 0 ] . index != output_column_index {
653
+ return Ok ( None ) ;
654
+ }
655
+ if let ScalarExpr :: FunctionCall ( get_func) = & eval. items [ 0 ] . scalar {
656
+ if get_func. func_name == "get"
657
+ && get_func. arguments . len ( ) == 1
658
+ && get_func. params . len ( ) == 1
659
+ && get_func. params [ 0 ] == Scalar :: Number ( NumberScalar :: Int64 ( 1 ) )
660
+ {
661
+ if let ScalarExpr :: BoundColumnRef ( bound_column) = & get_func. arguments [ 0 ] {
662
+ srf_column_index = Some ( bound_column. column . index ) ;
663
+ }
664
+ }
665
+ }
666
+ }
667
+ if srf_column_index. is_none ( ) {
668
+ return Ok ( None ) ;
669
+ }
670
+ let srf_column_index = srf_column_index. unwrap ( ) ;
671
+
672
+ let project_set_expr = child. child ( 0 ) ?;
673
+ if let RelOperator :: ProjectSet ( project_set) = project_set_expr. plan ( ) {
674
+ if project_set. srfs . len ( ) != 1
675
+ || project_set. srfs [ 0 ] . index != srf_column_index
676
+ || subquery. compare_op != Some ( ComparisonOp :: Equal )
677
+ || subquery. typ != SubqueryType :: Any
678
+ {
679
+ return Ok ( None ) ;
680
+ }
681
+ let Ok ( srf) = FunctionCall :: try_from ( project_set. srfs [ 0 ] . scalar . clone ( ) ) else {
682
+ return Ok ( None ) ;
683
+ } ;
684
+ if srf. arguments . len ( ) != 1 {
685
+ return Ok ( None ) ;
686
+ }
687
+ let Ok ( const_scalar) = ConstantExpr :: try_from ( srf. arguments [ 0 ] . clone ( ) ) else {
688
+ return Ok ( None ) ;
689
+ } ;
690
+ let Some ( child_expr) = & subquery. child_expr else {
691
+ return Ok ( None ) ;
692
+ } ;
693
+ match & const_scalar. value {
694
+ Scalar :: EmptyArray => {
695
+ return Ok ( Some ( ScalarExpr :: ConstantExpr ( ConstantExpr {
696
+ span : subquery. span ,
697
+ value : Scalar :: Null ,
698
+ } ) ) ) ;
699
+ }
700
+ Scalar :: Array ( array_column) => {
701
+ let mut values = BTreeSet :: new ( ) ;
702
+ for scalar in array_column. iter ( ) {
703
+ // Ignoring NULL values in equivalent filter
704
+ if scalar == ScalarRef :: Null {
705
+ continue ;
706
+ }
707
+ values. insert ( scalar. to_owned ( ) ) ;
708
+ }
709
+ // If there are no equivalent values, the filter condition does not match,
710
+ // return a NULL value.
711
+ if values. is_empty ( ) {
712
+ return Ok ( Some ( ScalarExpr :: ConstantExpr ( ConstantExpr {
713
+ span : subquery. span ,
714
+ value : Scalar :: Null ,
715
+ } ) ) ) ;
716
+ }
717
+ // If the number of values more than `inlist_to_join_threshold`, need convert to join.
718
+ if values. len ( ) >= self . ctx . get_settings ( ) . get_inlist_to_join_threshold ( ) ? {
719
+ return Ok ( None ) ;
720
+ }
721
+ // If the number of values more than `max_inlist_to_or`, use contains function instead of or.
722
+ if values. len ( ) > self . ctx . get_settings ( ) . get_max_inlist_to_or ( ) ? as usize {
723
+ let value_type = values. first ( ) . unwrap ( ) . as_ref ( ) . infer_data_type ( ) ;
724
+ let mut builder =
725
+ ColumnBuilder :: with_capacity ( & value_type, values. len ( ) ) ;
726
+ for value in values. into_iter ( ) {
727
+ builder. push ( value. as_ref ( ) ) ;
728
+ }
729
+ let array_value = ScalarExpr :: ConstantExpr ( ConstantExpr {
730
+ span : subquery. span ,
731
+ value : Scalar :: Array ( builder. build ( ) ) ,
732
+ } ) ;
733
+
734
+ let expr_type = child_expr. data_type ( ) ?;
735
+ let common_type = common_super_type (
736
+ value_type. clone ( ) ,
737
+ expr_type. clone ( ) ,
738
+ & BUILTIN_FUNCTIONS . default_cast_rules ,
739
+ )
740
+ . ok_or_else ( || {
741
+ ErrorCode :: IllegalDataType ( format ! (
742
+ "Cannot find common type for inlist subquery value {:?} and expr {:?}" ,
743
+ & array_value, & child_expr
744
+ ) )
745
+ } ) ?;
746
+
747
+ let mut arguments = Vec :: with_capacity ( 2 ) ;
748
+ if value_type != common_type {
749
+ arguments. push ( ScalarExpr :: CastExpr ( CastExpr {
750
+ span : subquery. span ,
751
+ is_try : false ,
752
+ argument : Box :: new ( array_value) ,
753
+ target_type : Box :: new ( DataType :: Array ( Box :: new (
754
+ common_type. clone ( ) ,
755
+ ) ) ) ,
756
+ } ) ) ;
757
+ } else {
758
+ arguments. push ( array_value) ;
759
+ }
760
+ if expr_type != common_type {
761
+ arguments. push ( ScalarExpr :: CastExpr ( CastExpr {
762
+ span : subquery. span ,
763
+ is_try : false ,
764
+ argument : Box :: new ( * child_expr. clone ( ) ) ,
765
+ target_type : Box :: new ( common_type. clone ( ) ) ,
766
+ } ) ) ;
767
+ } else {
768
+ arguments. push ( * child_expr. clone ( ) ) ;
769
+ }
770
+ let func = ScalarExpr :: FunctionCall ( FunctionCall {
771
+ span : subquery. span ,
772
+ func_name : "contains" . to_string ( ) ,
773
+ params : vec ! [ ] ,
774
+ arguments,
775
+ } ) ;
776
+ return Ok ( Some ( func) ) ;
777
+ }
778
+
779
+ let mut funcs = Vec :: with_capacity ( values. len ( ) ) ;
780
+ for value in values. into_iter ( ) {
781
+ let scalar_value = ScalarExpr :: ConstantExpr ( ConstantExpr {
782
+ span : subquery. span ,
783
+ value,
784
+ } ) ;
785
+ let func = ScalarExpr :: FunctionCall ( FunctionCall {
786
+ span : subquery. span ,
787
+ func_name : "eq" . to_string ( ) ,
788
+ params : vec ! [ ] ,
789
+ arguments : vec ! [ * child_expr. clone( ) , scalar_value] ,
790
+ } ) ;
791
+ funcs. push ( func) ;
792
+ }
793
+ let or_func = funcs
794
+ . into_iter ( )
795
+ . fold ( None , |mut acc, func| {
796
+ match acc. as_mut ( ) {
797
+ None => acc = Some ( func) ,
798
+ Some ( acc) => {
799
+ * acc = ScalarExpr :: FunctionCall ( FunctionCall {
800
+ span : subquery. span ,
801
+ func_name : "or" . to_string ( ) ,
802
+ params : vec ! [ ] ,
803
+ arguments : vec ! [ acc. clone( ) , func] ,
804
+ } ) ;
805
+ }
806
+ }
807
+ acc
808
+ } )
809
+ . unwrap ( ) ;
810
+ return Ok ( Some ( or_func) ) ;
811
+ }
812
+ _ => { }
813
+ }
814
+ }
815
+ }
816
+
817
+ Ok ( None )
818
+ }
520
819
}
0 commit comments