1212// See the License for the specific language governing permissions and
1313// limitations under the License.
1414
15+ use std:: collections:: BTreeSet ;
1516use std:: collections:: HashSet ;
1617use std:: sync:: Arc ;
1718
1819use databend_common_ast:: Span ;
20+ use databend_common_catalog:: table_context:: TableContext ;
21+ use databend_common_exception:: ErrorCode ;
1922use databend_common_exception:: Result ;
23+ use databend_common_expression:: type_check:: common_super_type;
2024use databend_common_expression:: types:: DataType ;
25+ use databend_common_expression:: types:: NumberScalar ;
26+ use databend_common_expression:: ColumnBuilder ;
27+ use databend_common_expression:: Scalar ;
28+ use databend_common_expression:: ScalarRef ;
29+ use databend_common_functions:: BUILTIN_FUNCTIONS ;
2130
2231use crate :: binder:: ColumnBindingBuilder ;
2332use crate :: binder:: JoinPredicate ;
@@ -30,12 +39,16 @@ use crate::optimizer::ColumnSet;
3039use crate :: optimizer:: RelExpr ;
3140use crate :: optimizer:: SExpr ;
3241use crate :: plans:: BoundColumnRef ;
42+ use crate :: plans:: CastExpr ;
43+ use crate :: plans:: ComparisonOp ;
44+ use crate :: plans:: ConstantExpr ;
3345use crate :: plans:: Filter ;
3446use crate :: plans:: FunctionCall ;
3547use crate :: plans:: Join ;
3648use crate :: plans:: JoinEquiCondition ;
3749use crate :: plans:: JoinType ;
3850use crate :: plans:: RelOp ;
51+ use crate :: plans:: RelOperator ;
3952use crate :: plans:: ScalarExpr ;
4053use crate :: plans:: SubqueryExpr ;
4154use crate :: plans:: SubqueryType ;
@@ -53,8 +66,12 @@ use crate::MetadataRef;
5366/// Correlated exists subquery -> Marker join
5467///
5568/// More information can be found in the paper: Unnesting Arbitrary Queries
56- pub fn decorrelate_subquery ( metadata : MetadataRef , s_expr : SExpr ) -> Result < SExpr > {
57- let mut rewriter = SubqueryRewriter :: new ( metadata, None ) ;
69+ pub fn decorrelate_subquery (
70+ ctx : Arc < dyn TableContext > ,
71+ metadata : MetadataRef ,
72+ s_expr : SExpr ,
73+ ) -> Result < SExpr > {
74+ let mut rewriter = SubqueryRewriter :: new ( ctx, metadata, None ) ;
5875 rewriter. rewrite ( & s_expr)
5976}
6077
@@ -517,4 +534,286 @@ impl SubqueryRewriter {
517534 true
518535 } ) )
519536 }
537+
538+ // Try folding the subquery into a constant value expression,
539+ // which turns the join plan into a filter plan, so that the bloom filter
540+ // can be used to reduce the amount of data that needs to be read.
541+ pub fn try_fold_constant_subquery (
542+ & self ,
543+ subquery : & SubqueryExpr ,
544+ ) -> Result < Option < ScalarExpr > > {
545+ // (1) EvalScalar
546+ // \
547+ // DummyTableScan
548+ //
549+ // (2) EvalScalar
550+ // \
551+ // EvalScalar
552+ // \
553+ // ProjectSet
554+ // \
555+ // DummyTableScan
556+ let matchers = vec ! [
557+ Matcher :: MatchOp {
558+ op_type: RelOp :: EvalScalar ,
559+ children: vec![ Matcher :: MatchOp {
560+ op_type: RelOp :: DummyTableScan ,
561+ children: vec![ ] ,
562+ } ] ,
563+ } ,
564+ Matcher :: MatchOp {
565+ op_type: RelOp :: EvalScalar ,
566+ children: vec![ Matcher :: MatchOp {
567+ op_type: RelOp :: EvalScalar ,
568+ children: vec![ Matcher :: MatchOp {
569+ op_type: RelOp :: ProjectSet ,
570+ children: vec![ Matcher :: MatchOp {
571+ op_type: RelOp :: DummyTableScan ,
572+ children: vec![ ] ,
573+ } ] ,
574+ } ] ,
575+ } ] ,
576+ } ,
577+ ] ;
578+
579+ let mut matched = false ;
580+ for matcher in matchers {
581+ if matcher. matches ( & subquery. subquery ) {
582+ matched = true ;
583+ break ;
584+ }
585+ }
586+ if !matched {
587+ return Ok ( None ) ;
588+ }
589+
590+ let child = subquery. subquery . child ( 0 ) ?;
591+ if let RelOperator :: DummyTableScan ( _) = child. plan ( ) {
592+ // subquery is a simple constant value.
593+ // for example: `SELECT * FROM t WHERE id = (select 1);`
594+ if let RelOperator :: EvalScalar ( eval) = subquery. subquery . plan ( ) {
595+ if eval. items . len ( ) != 1 {
596+ return Ok ( None ) ;
597+ }
598+ let Ok ( const_scalar) = ConstantExpr :: try_from ( eval. items [ 0 ] . scalar . clone ( ) ) else {
599+ return Ok ( None ) ;
600+ } ;
601+ match ( & subquery. child_expr , subquery. compare_op ) {
602+ ( Some ( child_expr) , Some ( compare_op) ) => {
603+ let func_name = compare_op. to_func_name ( ) . to_string ( ) ;
604+ let func = ScalarExpr :: FunctionCall ( FunctionCall {
605+ span : subquery. span ,
606+ func_name,
607+ params : vec ! [ ] ,
608+ arguments : vec ! [ * child_expr. clone( ) , const_scalar. into( ) ] ,
609+ } ) ;
610+ return Ok ( Some ( func) ) ;
611+ }
612+ ( None , None ) => match subquery. typ {
613+ SubqueryType :: Scalar => {
614+ return Ok ( Some ( const_scalar. into ( ) ) ) ;
615+ }
616+ SubqueryType :: Exists => {
617+ return Ok ( Some ( ScalarExpr :: ConstantExpr ( ConstantExpr {
618+ span : subquery. span ,
619+ value : Scalar :: Boolean ( true ) ,
620+ } ) ) ) ;
621+ }
622+ SubqueryType :: NotExists => {
623+ return Ok ( Some ( ScalarExpr :: ConstantExpr ( ConstantExpr {
624+ span : subquery. span ,
625+ value : Scalar :: Boolean ( false ) ,
626+ } ) ) ) ;
627+ }
628+ _ => { }
629+ } ,
630+ ( _, _) => { }
631+ }
632+ }
633+ } else {
634+ // subquery is a set returning function return constant values.
635+ // for example: `SELECT * FROM t WHERE id IN (SELECT * FROM UNNEST(SPLIT('1,2,3', ',')) AS t1);`
636+ let mut output_column_index = None ;
637+ if let RelOperator :: EvalScalar ( eval) = subquery. subquery . plan ( ) {
638+ if eval. items . len ( ) != 1 {
639+ return Ok ( None ) ;
640+ }
641+ if let ScalarExpr :: BoundColumnRef ( bound_column) = & eval. items [ 0 ] . scalar {
642+ output_column_index = Some ( bound_column. column . index ) ;
643+ }
644+ }
645+ if output_column_index. is_none ( ) {
646+ return Ok ( None ) ;
647+ }
648+ let output_column_index = output_column_index. unwrap ( ) ;
649+
650+ let mut srf_column_index = None ;
651+ if let RelOperator :: EvalScalar ( eval) = child. plan ( ) {
652+ if eval. items . len ( ) != 1 || eval. items [ 0 ] . index != output_column_index {
653+ return Ok ( None ) ;
654+ }
655+ if let ScalarExpr :: FunctionCall ( get_func) = & eval. items [ 0 ] . scalar {
656+ if get_func. func_name == "get"
657+ && get_func. arguments . len ( ) == 1
658+ && get_func. params . len ( ) == 1
659+ && get_func. params [ 0 ] == Scalar :: Number ( NumberScalar :: Int64 ( 1 ) )
660+ {
661+ if let ScalarExpr :: BoundColumnRef ( bound_column) = & get_func. arguments [ 0 ] {
662+ srf_column_index = Some ( bound_column. column . index ) ;
663+ }
664+ }
665+ }
666+ }
667+ if srf_column_index. is_none ( ) {
668+ return Ok ( None ) ;
669+ }
670+ let srf_column_index = srf_column_index. unwrap ( ) ;
671+
672+ let project_set_expr = child. child ( 0 ) ?;
673+ if let RelOperator :: ProjectSet ( project_set) = project_set_expr. plan ( ) {
674+ if project_set. srfs . len ( ) != 1
675+ || project_set. srfs [ 0 ] . index != srf_column_index
676+ || subquery. compare_op != Some ( ComparisonOp :: Equal )
677+ || subquery. typ != SubqueryType :: Any
678+ {
679+ return Ok ( None ) ;
680+ }
681+ let Ok ( srf) = FunctionCall :: try_from ( project_set. srfs [ 0 ] . scalar . clone ( ) ) else {
682+ return Ok ( None ) ;
683+ } ;
684+ if srf. arguments . len ( ) != 1 {
685+ return Ok ( None ) ;
686+ }
687+ let Ok ( const_scalar) = ConstantExpr :: try_from ( srf. arguments [ 0 ] . clone ( ) ) else {
688+ return Ok ( None ) ;
689+ } ;
690+ let Some ( child_expr) = & subquery. child_expr else {
691+ return Ok ( None ) ;
692+ } ;
693+ match & const_scalar. value {
694+ Scalar :: EmptyArray => {
695+ return Ok ( Some ( ScalarExpr :: ConstantExpr ( ConstantExpr {
696+ span : subquery. span ,
697+ value : Scalar :: Null ,
698+ } ) ) ) ;
699+ }
700+ Scalar :: Array ( array_column) => {
701+ let mut values = BTreeSet :: new ( ) ;
702+ for scalar in array_column. iter ( ) {
703+ // Ignoring NULL values in equivalent filter
704+ if scalar == ScalarRef :: Null {
705+ continue ;
706+ }
707+ values. insert ( scalar. to_owned ( ) ) ;
708+ }
709+ // If there are no equivalent values, the filter condition does not match,
710+ // return a NULL value.
711+ if values. is_empty ( ) {
712+ return Ok ( Some ( ScalarExpr :: ConstantExpr ( ConstantExpr {
713+ span : subquery. span ,
714+ value : Scalar :: Null ,
715+ } ) ) ) ;
716+ }
717+ // If the number of values more than `inlist_to_join_threshold`, need convert to join.
718+ if values. len ( ) >= self . ctx . get_settings ( ) . get_inlist_to_join_threshold ( ) ? {
719+ return Ok ( None ) ;
720+ }
721+ // If the number of values more than `max_inlist_to_or`, use contains function instead of or.
722+ if values. len ( ) > self . ctx . get_settings ( ) . get_max_inlist_to_or ( ) ? as usize {
723+ let value_type = values. first ( ) . unwrap ( ) . as_ref ( ) . infer_data_type ( ) ;
724+ let mut builder =
725+ ColumnBuilder :: with_capacity ( & value_type, values. len ( ) ) ;
726+ for value in values. into_iter ( ) {
727+ builder. push ( value. as_ref ( ) ) ;
728+ }
729+ let array_value = ScalarExpr :: ConstantExpr ( ConstantExpr {
730+ span : subquery. span ,
731+ value : Scalar :: Array ( builder. build ( ) ) ,
732+ } ) ;
733+
734+ let expr_type = child_expr. data_type ( ) ?;
735+ let common_type = common_super_type (
736+ value_type. clone ( ) ,
737+ expr_type. clone ( ) ,
738+ & BUILTIN_FUNCTIONS . default_cast_rules ,
739+ )
740+ . ok_or_else ( || {
741+ ErrorCode :: IllegalDataType ( format ! (
742+ "Cannot find common type for inlist subquery value {:?} and expr {:?}" ,
743+ & array_value, & child_expr
744+ ) )
745+ } ) ?;
746+
747+ let mut arguments = Vec :: with_capacity ( 2 ) ;
748+ if value_type != common_type {
749+ arguments. push ( ScalarExpr :: CastExpr ( CastExpr {
750+ span : subquery. span ,
751+ is_try : false ,
752+ argument : Box :: new ( array_value) ,
753+ target_type : Box :: new ( DataType :: Array ( Box :: new (
754+ common_type. clone ( ) ,
755+ ) ) ) ,
756+ } ) ) ;
757+ } else {
758+ arguments. push ( array_value) ;
759+ }
760+ if expr_type != common_type {
761+ arguments. push ( ScalarExpr :: CastExpr ( CastExpr {
762+ span : subquery. span ,
763+ is_try : false ,
764+ argument : Box :: new ( * child_expr. clone ( ) ) ,
765+ target_type : Box :: new ( common_type. clone ( ) ) ,
766+ } ) ) ;
767+ } else {
768+ arguments. push ( * child_expr. clone ( ) ) ;
769+ }
770+ let func = ScalarExpr :: FunctionCall ( FunctionCall {
771+ span : subquery. span ,
772+ func_name : "contains" . to_string ( ) ,
773+ params : vec ! [ ] ,
774+ arguments,
775+ } ) ;
776+ return Ok ( Some ( func) ) ;
777+ }
778+
779+ let mut funcs = Vec :: with_capacity ( values. len ( ) ) ;
780+ for value in values. into_iter ( ) {
781+ let scalar_value = ScalarExpr :: ConstantExpr ( ConstantExpr {
782+ span : subquery. span ,
783+ value,
784+ } ) ;
785+ let func = ScalarExpr :: FunctionCall ( FunctionCall {
786+ span : subquery. span ,
787+ func_name : "eq" . to_string ( ) ,
788+ params : vec ! [ ] ,
789+ arguments : vec ! [ * child_expr. clone( ) , scalar_value] ,
790+ } ) ;
791+ funcs. push ( func) ;
792+ }
793+ let or_func = funcs
794+ . into_iter ( )
795+ . fold ( None , |mut acc, func| {
796+ match acc. as_mut ( ) {
797+ None => acc = Some ( func) ,
798+ Some ( acc) => {
799+ * acc = ScalarExpr :: FunctionCall ( FunctionCall {
800+ span : subquery. span ,
801+ func_name : "or" . to_string ( ) ,
802+ params : vec ! [ ] ,
803+ arguments : vec ! [ acc. clone( ) , func] ,
804+ } ) ;
805+ }
806+ }
807+ acc
808+ } )
809+ . unwrap ( ) ;
810+ return Ok ( Some ( or_func) ) ;
811+ }
812+ _ => { }
813+ }
814+ }
815+ }
816+
817+ Ok ( None )
818+ }
520819}
0 commit comments