From 271d288c9f1dd3f3e7915716e57d51d36598fe46 Mon Sep 17 00:00:00 2001 From: Benjamin Owad Date: Tue, 10 Dec 2024 17:23:09 -0500 Subject: [PATCH] Subquery Unnesting: Exists + In Support (#259) - Support uncorrelated/correlated IN (ANY) and EXISTS clauses (This should effectively make subquery unnesting feature complete!) - TPC-H Q4, Q16, Q20, and Q22 working - The remaining queries mostly seem to have plans that are too bad (Q18 has some other issue, [fix in the pipeline](https://github.com/cmu-db/optd/pull/261) but it still seems like the plan is too slow). --- optd-datafusion-bridge/src/from_optd.rs | 25 ++- optd-datafusion-bridge/src/into_optd.rs | 89 ++++++-- .../src/adv_stats/filter.rs | 10 +- .../src/adv_stats/join.rs | 8 +- optd-datafusion-repr/src/explain.rs | 2 +- optd-datafusion-repr/src/memo_ext.rs | 4 +- optd-datafusion-repr/src/plan_nodes.rs | 8 +- optd-datafusion-repr/src/plan_nodes/join.rs | 1 + optd-datafusion-repr/src/plan_nodes/macros.rs | 8 +- .../src/plan_nodes/predicates/func_pred.rs | 3 + .../src/plan_nodes/subquery.rs | 26 ++- .../src/properties/column_ref.rs | 17 +- optd-datafusion-repr/src/properties/schema.rs | 26 ++- .../src/rules/filter_pushdown.rs | 2 +- optd-datafusion-repr/src/rules/macros.rs | 41 ++-- .../project_filter_transpose.rs | 5 +- .../src/rules/subquery/depjoin_pushdown.rs | 186 ++++++++++++----- optd-sqllogictest/slt/tpch-q16.slt | 68 ++++++ optd-sqllogictest/slt/tpch-q16.slt.disabled | 68 ------ .../{tpch-q20.slt.disabled => tpch-q20.slt} | 0 .../{tpch-q22.slt.disabled => tpch-q22.slt} | 0 .../slt/{tpch-q4.slt.disabled => tpch-q4.slt} | 0 optd-sqllogictest/slt/unnest-exists-2.slt | 27 +++ optd-sqllogictest/slt/unnest-exists-uncor.slt | 17 ++ optd-sqllogictest/slt/unnest-exists.slt | 18 ++ optd-sqllogictest/slt/unnest-in-exists.slt | 20 ++ optd-sqllogictest/slt/unnest-in-uncor.slt | 13 ++ optd-sqllogictest/slt/unnest-in.slt | 14 ++ optd-sqllogictest/slt/unnest-not-in-uncor.slt | 61 ++++++ .../subqueries/subquery_unnesting.planner.sql | 8 +- .../tests/tpch/q11.planner.sql | 2 +- .../tests/tpch/q15.planner.sql | 2 +- .../tests/tpch/q16.planner.sql | 106 ++++++++++ optd-sqlplannertest/tests/tpch/q16.yml | 35 ++++ .../tests/tpch/q17.planner.sql | 2 +- optd-sqlplannertest/tests/tpch/q2.planner.sql | 2 +- .../tests/tpch/q20.planner.sql | 194 ++++++++++++++++++ optd-sqlplannertest/tests/tpch/q20.yml | 42 ++++ .../tests/tpch/q22.planner.sql | 170 +++++++++++++++ optd-sqlplannertest/tests/tpch/q22.yml | 42 ++++ optd-sqlplannertest/tests/tpch/q4.planner.sql | 91 ++++++++ optd-sqlplannertest/tests/tpch/q4.yml | 26 +++ 42 files changed, 1306 insertions(+), 183 deletions(-) create mode 100644 optd-sqllogictest/slt/tpch-q16.slt delete mode 100644 optd-sqllogictest/slt/tpch-q16.slt.disabled rename optd-sqllogictest/slt/{tpch-q20.slt.disabled => tpch-q20.slt} (100%) rename optd-sqllogictest/slt/{tpch-q22.slt.disabled => tpch-q22.slt} (100%) rename optd-sqllogictest/slt/{tpch-q4.slt.disabled => tpch-q4.slt} (100%) create mode 100644 optd-sqllogictest/slt/unnest-exists-2.slt create mode 100644 optd-sqllogictest/slt/unnest-exists-uncor.slt create mode 100644 optd-sqllogictest/slt/unnest-exists.slt create mode 100644 optd-sqllogictest/slt/unnest-in-exists.slt create mode 100644 optd-sqllogictest/slt/unnest-in-uncor.slt create mode 100644 optd-sqllogictest/slt/unnest-in.slt create mode 100644 optd-sqllogictest/slt/unnest-not-in-uncor.slt create mode 100644 optd-sqlplannertest/tests/tpch/q16.planner.sql create mode 100644 optd-sqlplannertest/tests/tpch/q16.yml create mode 100644 optd-sqlplannertest/tests/tpch/q20.planner.sql create mode 100644 optd-sqlplannertest/tests/tpch/q20.yml create mode 100644 optd-sqlplannertest/tests/tpch/q22.planner.sql create mode 100644 optd-sqlplannertest/tests/tpch/q22.yml create mode 100644 optd-sqlplannertest/tests/tpch/q4.planner.sql create mode 100644 optd-sqlplannertest/tests/tpch/q4.yml diff --git a/optd-datafusion-bridge/src/from_optd.rs b/optd-datafusion-bridge/src/from_optd.rs index e0b0906d..6c4b47dd 100644 --- a/optd-datafusion-bridge/src/from_optd.rs +++ b/optd-datafusion-bridge/src/from_optd.rs @@ -195,6 +195,18 @@ impl OptdPlanContext<'_> { Some(else_expr), )?) } + FuncType::Not => { + let expr = args[0].clone(); + Ok(physical_expr::expressions::not(expr)?) + } + FuncType::IsNull => { + let expr = args[0].clone(); + Ok(physical_expr::expressions::is_null(expr)?) + } + FuncType::IsNotNull => { + let expr = args[0].clone(); + Ok(physical_expr::expressions::is_not_null(expr)?) + } _ => unreachable!(), } } @@ -464,14 +476,21 @@ impl OptdPlanContext<'_> { let physical_expr = self.conv_from_optd_expr(node.cond(), &Arc::new(filter_schema.clone()))?; - if node.join_type() == JoinType::Cross { + if *node.join_type() == JoinType::Cross { return Ok(Arc::new(CrossJoinExec::new(left_exec, right_exec)) as Arc); } let join_type = match node.join_type() { - JoinType::Inner => datafusion::logical_expr::JoinType::Inner, - JoinType::LeftOuter => datafusion::logical_expr::JoinType::Left, + JoinType::Inner => datafusion_expr::JoinType::Inner, + JoinType::FullOuter => datafusion_expr::JoinType::Full, + JoinType::LeftOuter => datafusion_expr::JoinType::Left, + JoinType::RightOuter => datafusion_expr::JoinType::Right, + JoinType::LeftSemi => datafusion_expr::JoinType::LeftSemi, + JoinType::RightSemi => datafusion_expr::JoinType::RightSemi, + JoinType::LeftAnti => datafusion_expr::JoinType::LeftAnti, + JoinType::RightAnti => datafusion_expr::JoinType::RightAnti, + JoinType::LeftMark => datafusion_expr::JoinType::LeftMark, _ => unimplemented!(), }; diff --git a/optd-datafusion-bridge/src/into_optd.rs b/optd-datafusion-bridge/src/into_optd.rs index b7cc9a51..0b8537a1 100644 --- a/optd-datafusion-bridge/src/into_optd.rs +++ b/optd-datafusion-bridge/src/into_optd.rs @@ -3,6 +3,8 @@ // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT. +use std::sync::Arc; + use anyhow::{bail, Result}; use datafusion::common::DFSchema; use datafusion::logical_expr::{self, logical_plan, LogicalPlan, Operator}; @@ -15,7 +17,7 @@ use optd_datafusion_repr::plan_nodes::{ ConstantPred, DfReprPlanNode, DfReprPredNode, ExternColumnRefPred, FuncPred, FuncType, InListPred, JoinType, LikePred, ListPred, LogOpPred, LogOpType, LogicalAgg, LogicalEmptyRelation, LogicalFilter, LogicalJoin, LogicalLimit, LogicalProjection, LogicalScan, - LogicalSort, RawDependentJoin, SortOrderPred, SortOrderType, + LogicalSort, RawDependentJoin, SortOrderPred, SortOrderType, SubqueryType, }; use optd_datafusion_repr::properties::schema::Schema as OptdSchema; @@ -24,15 +26,18 @@ use crate::OptdPlanContext; impl OptdPlanContext<'_> { fn subqueries_to_dependent_joins( &mut self, - subqueries: &[&Subquery], + subqueries: Vec<(&Subquery, SubqueryType)>, input: ArcDfPlanNode, input_schema: &DFSchema, ) -> Result { let mut node = input; - for Subquery { - subquery, - outer_ref_columns, - } in subqueries.iter() + for ( + Subquery { + subquery, + outer_ref_columns, + }, + sq_typ, + ) in subqueries.into_iter() { let subquery_root = self.conv_into_optd_plan_node(subquery, Some(input_schema))?; let dep_join = RawDependentJoin::new( @@ -56,7 +61,7 @@ impl OptdPlanContext<'_> { }) .collect(), ), - JoinType::Cross, + sq_typ, ); node = dep_join.into_plan_node(); } @@ -92,7 +97,7 @@ impl OptdPlanContext<'_> { expr: &'a logical_expr::Expr, context: &DFSchema, dep_ctx: Option<&DFSchema>, - subqueries: &mut Vec<&'a Subquery>, + subqueries: &mut Vec<(&'a Subquery, SubqueryType)>, ) -> Result { use logical_expr::Expr; match expr { @@ -257,6 +262,18 @@ impl OptdPlanContext<'_> { ) .into_pred_node()) } + Expr::Not(x) => { + let expr = self.conv_into_optd_expr(x.as_ref(), context, dep_ctx, subqueries)?; + Ok(FuncPred::new(FuncType::Not, ListPred::new(vec![expr])).into_pred_node()) + } + Expr::IsNull(x) => { + let expr = self.conv_into_optd_expr(x.as_ref(), context, dep_ctx, subqueries)?; + Ok(FuncPred::new(FuncType::IsNull, ListPred::new(vec![expr])).into_pred_node()) + } + Expr::IsNotNull(x) => { + let expr = self.conv_into_optd_expr(x.as_ref(), context, dep_ctx, subqueries)?; + Ok(FuncPred::new(FuncType::IsNotNull, ListPred::new(vec![expr])).into_pred_node()) + } Expr::Between(x) => { let expr = self.conv_into_optd_expr(x.expr.as_ref(), context, dep_ctx, subqueries)?; @@ -288,9 +305,53 @@ impl OptdPlanContext<'_> { // This relies on a left-deep tree of dependent joins being // generated below this node, in response to all pushed subqueries. let new_column_ref_idx = context.fields().len() + subqueries.len(); - subqueries.push(sq); + subqueries.push((sq, SubqueryType::Scalar)); Ok(ColumnRefPred::new(new_column_ref_idx).into_pred_node()) } + Expr::Exists(ex) => { + let sq = &ex.subquery; + let negated = ex.negated; + + let new_column_ref_idx = context.fields().len() + subqueries.len(); + subqueries.push((sq, SubqueryType::Exists)); + if negated { + Ok(FuncPred::new( + FuncType::Not, + ListPred::new( + vec![ColumnRefPred::new(new_column_ref_idx).into_pred_node()], + ), + ) + .into_pred_node()) + } else { + Ok(ColumnRefPred::new(new_column_ref_idx).into_pred_node()) + } + } + Expr::InSubquery(insq) => { + let sq = &insq.subquery; + let expr = + self.conv_into_optd_expr(insq.expr.as_ref(), context, dep_ctx, subqueries)?; + let negated = insq.negated; + + let new_column_ref_idx = context.fields().len() + subqueries.len(); + subqueries.push(( + sq, + SubqueryType::Any { + pred: Arc::unwrap_or_clone(expr), + op: BinOpType::Eq, + }, + )); + if negated { + Ok(FuncPred::new( + FuncType::Not, + ListPred::new( + vec![ColumnRefPred::new(new_column_ref_idx).into_pred_node()], + ), + ) + .into_pred_node()) + } else { + Ok(ColumnRefPred::new(new_column_ref_idx).into_pred_node()) + } + } _ => bail!("Unsupported expression: {:?}", expr), } } @@ -308,7 +369,7 @@ impl OptdPlanContext<'_> { dep_ctx, &mut subqueries, )?; - let input = self.subqueries_to_dependent_joins(&subqueries, input, node.input.schema())?; + let input = self.subqueries_to_dependent_joins(subqueries, input, node.input.schema())?; Ok(LogicalProjection::new(input, expr_list)) } @@ -326,7 +387,7 @@ impl OptdPlanContext<'_> { &mut subqueries, )?; - let input = self.subqueries_to_dependent_joins(&subqueries, input, node.input.schema())?; + let input = self.subqueries_to_dependent_joins(subqueries, input, node.input.schema())?; Ok(LogicalFilter::new(input, expr)) } @@ -336,7 +397,7 @@ impl OptdPlanContext<'_> { exprs: &'a [logical_expr::Expr], context: &DFSchema, dep_ctx: Option<&DFSchema>, - subqueries: &mut Vec<&'a Subquery>, + subqueries: &mut Vec<(&'a Subquery, SubqueryType)>, ) -> Result { let exprs = exprs .iter() @@ -350,7 +411,7 @@ impl OptdPlanContext<'_> { exprs: &'a [logical_expr::SortExpr], context: &DFSchema, dep_ctx: Option<&DFSchema>, - subqueries: &mut Vec<&'a Subquery>, + subqueries: &mut Vec<(&'a Subquery, SubqueryType)>, ) -> Result { let exprs = exprs .iter() @@ -453,7 +514,7 @@ impl OptdPlanContext<'_> { DFJoinType::RightAnti => JoinType::RightAnti, DFJoinType::LeftSemi => JoinType::LeftSemi, DFJoinType::RightSemi => JoinType::RightSemi, - _ => unimplemented!(), + DFJoinType::LeftMark => JoinType::LeftMark, }; let mut log_ops = Vec::with_capacity(node.on.len()); let mut subqueries = vec![]; diff --git a/optd-datafusion-repr-adv-cost/src/adv_stats/filter.rs b/optd-datafusion-repr-adv-cost/src/adv_stats/filter.rs index 5850d59c..a3d2ab6f 100644 --- a/optd-datafusion-repr-adv-cost/src/adv_stats/filter.rs +++ b/optd-datafusion-repr-adv-cost/src/adv_stats/filter.rs @@ -66,7 +66,10 @@ impl< ) -> f64 { match &expr_tree.typ { DfPredType::Constant(_) => Self::get_constant_selectivity(expr_tree), - DfPredType::ColumnRef => unimplemented!("check bool type or else panic"), + DfPredType::ColumnRef => { + // TODO: Check that field is of bool type + 0.5 // TODO: placeholder---how can we get the selectivity? + } DfPredType::UnOp(un_op_typ) => { assert!(expr_tree.children.len() == 1); let child = expr_tree.child(0); @@ -104,7 +107,10 @@ impl< DfPredType::LogOp(log_op_typ) => { self.get_log_op_selectivity(*log_op_typ, &expr_tree.children, schema, column_refs) } - DfPredType::Func(_) => unimplemented!("check bool type or else panic"), + DfPredType::Func(_) => { + // TODO: Check that field is of bool type + 0.5 // TODO: placeholder---how can we get the selectivity? + } DfPredType::SortOrder(_) => { panic!("the selectivity of sort order expressions is undefined") } diff --git a/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs b/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs index 5aa8fb6c..0f0053e0 100644 --- a/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs +++ b/optd-datafusion-repr-adv-cost/src/adv_stats/join.rs @@ -198,6 +198,8 @@ impl< ); join_filter_selectivity } + // TODO: Does this make sense? + JoinType::LeftMark => f64::max(inner_join_selectivity, 1.0 / right_row_cnt), _ => unimplemented!("join_typ={} is not implemented", join_typ), } } @@ -359,7 +361,11 @@ impl< &self, base_col_refs: HashSet, ) -> f64 { - assert!(base_col_refs.len() > 1); + // Hack to avoid issue w/ self joins...unsure if this is a good idea + if base_col_refs.len() <= 1 { + return 1.0; + } + let num_base_col_refs = base_col_refs.len(); base_col_refs .into_iter() diff --git a/optd-datafusion-repr/src/explain.rs b/optd-datafusion-repr/src/explain.rs index d7eaa686..068353b3 100644 --- a/optd-datafusion-repr/src/explain.rs +++ b/optd-datafusion-repr/src/explain.rs @@ -75,7 +75,7 @@ pub fn explain_plan_node( DfNodeType::RawDepJoin(_) => RawDependentJoin::from_plan_node(node) .unwrap() .explain(meta_map), - DfNodeType::DepJoin(_) => DependentJoin::from_plan_node(node) + DfNodeType::DepJoin => DependentJoin::from_plan_node(node) .unwrap() .explain(meta_map), DfNodeType::Scan => LogicalScan::from_plan_node(node).unwrap().explain(meta_map), diff --git a/optd-datafusion-repr/src/memo_ext.rs b/optd-datafusion-repr/src/memo_ext.rs index 642dd711..3c0f7bb5 100644 --- a/optd-datafusion-repr/src/memo_ext.rs +++ b/optd-datafusion-repr/src/memo_ext.rs @@ -41,7 +41,7 @@ fn enumerate_join_order_expr_inner + ?Sized>( visited: &mut HashMap>, ) -> Vec { let expr = memo.get_expr_memoed(current); - match expr.typ { + match &expr.typ { DfNodeType::Scan => { let table = memo.get_pred(expr.predicates[0]); // TODO: use unified repr let table = ConstantPred::from_pred_node(table) @@ -50,7 +50,7 @@ fn enumerate_join_order_expr_inner + ?Sized>( .as_str(); vec![LogicalJoinOrder::Table(table)] } - DfNodeType::Join(_) | DfNodeType::DepJoin(_) | DfNodeType::RawDepJoin(_) => { + DfNodeType::Join(_) | DfNodeType::DepJoin | DfNodeType::RawDepJoin(_) => { // Assume child 0 == left, child 1 == right let left = expr.children[0]; let right = expr.children[1]; diff --git a/optd-datafusion-repr/src/plan_nodes.rs b/optd-datafusion-repr/src/plan_nodes.rs index 72f1a766..e986f313 100644 --- a/optd-datafusion-repr/src/plan_nodes.rs +++ b/optd-datafusion-repr/src/plan_nodes.rs @@ -39,7 +39,7 @@ use pretty_xmlish::{Pretty, PrettyConfig}; pub use projection::{LogicalProjection, PhysicalProjection}; pub use scan::{LogicalScan, PhysicalScan}; pub use sort::{LogicalSort, PhysicalSort}; -pub use subquery::{DependentJoin, RawDependentJoin}; // Add missing import +pub use subquery::{DependentJoin, RawDependentJoin, SubqueryType}; use crate::explain::{explain_plan_node, explain_pred_node}; @@ -69,7 +69,7 @@ impl std::fmt::Display for DfPredType { /// DfNodeType FAQ: /// - The define_plan_node!() macro defines what the children of each join node are -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum DfNodeType { // Developers: update `is_logical` function after adding new plan nodes // Plan nodes @@ -77,8 +77,8 @@ pub enum DfNodeType { Filter, Scan, Join(JoinType), - RawDepJoin(JoinType), - DepJoin(JoinType), + RawDepJoin(SubqueryType), + DepJoin, Sort, Agg, EmptyRelation, diff --git a/optd-datafusion-repr/src/plan_nodes/join.rs b/optd-datafusion-repr/src/plan_nodes/join.rs index 84dbc033..d506449f 100644 --- a/optd-datafusion-repr/src/plan_nodes/join.rs +++ b/optd-datafusion-repr/src/plan_nodes/join.rs @@ -20,6 +20,7 @@ pub enum JoinType { RightSemi, LeftAnti, RightAnti, + LeftMark, } impl Display for JoinType { diff --git a/optd-datafusion-repr/src/plan_nodes/macros.rs b/optd-datafusion-repr/src/plan_nodes/macros.rs index a674ec1e..232e60d7 100644 --- a/optd-datafusion-repr/src/plan_nodes/macros.rs +++ b/optd-datafusion-repr/src/plan_nodes/macros.rs @@ -18,7 +18,7 @@ macro_rules! define_plan_node { fn from_plan_node(plan_node: ArcDfPlanNode) -> Option { #[allow(unused_variables)] - if let DfNodeType :: $variant $( ($inner_name) )? = plan_node.typ { + if let DfNodeType :: $variant $( ($inner_name) )? = &plan_node.typ { Some(Self(plan_node)) } else { None @@ -105,9 +105,9 @@ macro_rules! define_plan_node { )* $( - pub fn $inner_name(&self) -> JoinType { - if let DfNodeType :: $variant ($inner_name) = self.0 .typ { - return $inner_name; + pub fn $inner_name(&self) -> &$inner_typ { + if let DfNodeType :: $variant ($inner_name) = &self.0.typ { + return &$inner_name; } else { unreachable!(); } diff --git a/optd-datafusion-repr/src/plan_nodes/predicates/func_pred.rs b/optd-datafusion-repr/src/plan_nodes/predicates/func_pred.rs index 72f2c126..ccda1205 100644 --- a/optd-datafusion-repr/src/plan_nodes/predicates/func_pred.rs +++ b/optd-datafusion-repr/src/plan_nodes/predicates/func_pred.rs @@ -15,6 +15,9 @@ pub enum FuncType { Scalar(String, DataType), Agg(String), Case, + Not, + IsNull, + IsNotNull, } impl std::fmt::Display for FuncType { diff --git a/optd-datafusion-repr/src/plan_nodes/subquery.rs b/optd-datafusion-repr/src/plan_nodes/subquery.rs index abd400bc..a0c8d6c6 100644 --- a/optd-datafusion-repr/src/plan_nodes/subquery.rs +++ b/optd-datafusion-repr/src/plan_nodes/subquery.rs @@ -3,11 +3,31 @@ // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT. +use core::fmt; +use std::fmt::Display; + use super::macros::define_plan_node; use super::{ - ArcDfPlanNode, ArcDfPredNode, DfNodeType, DfPlanNode, DfReprPlanNode, JoinType, ListPred, + ArcDfPlanNode, ArcDfPredNode, BinOpType, DfNodeType, DfPlanNode, DfPredNode, DfReprPlanNode, + ListPred, }; +/// These are the only three fundamental types of subqueries. +/// Refer to the Unnesting Arbitrary Queries talk by Mark Raasveldt for +/// info on how to translate other subquery types to these three. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum SubqueryType { + Scalar, + Exists, + Any { pred: DfPredNode, op: BinOpType }, +} + +impl Display for SubqueryType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self) + } +} + #[derive(Clone, Debug)] pub struct RawDependentJoin(pub ArcDfPlanNode); @@ -19,7 +39,7 @@ define_plan_node!( ], [ { 0, cond: ArcDfPredNode }, { 1, extern_cols: ListPred } - ], { join_type: JoinType } + ], { sq_type: SubqueryType } ); #[derive(Clone, Debug)] @@ -33,5 +53,5 @@ define_plan_node!( ], [ { 0, cond: ArcDfPredNode }, { 1, extern_cols: ListPred } - ], { join_type: JoinType } + ] ); diff --git a/optd-datafusion-repr/src/properties/column_ref.rs b/optd-datafusion-repr/src/properties/column_ref.rs index 22fb602b..7846f2b7 100644 --- a/optd-datafusion-repr/src/properties/column_ref.rs +++ b/optd-datafusion-repr/src/properties/column_ref.rs @@ -16,7 +16,7 @@ use super::DEFAULT_NAME; use crate::{ plan_nodes::{ decode_empty_relation_schema, ArcDfPredNode, BinOpType, ConstantPred, DfNodeType, - DfPredType, DfReprPredNode, JoinType, LogOpType, + DfPredType, DfReprPredNode, JoinType, LogOpType, SubqueryType, }, utils::DisjointSets, }; @@ -451,9 +451,7 @@ impl LogicalPropertyBuilder for ColumnRefPropertyBuilder { GroupColumnRefs::new(column_refs, child.output_correlation.clone()) } // Should account for all physical join types. - DfNodeType::Join(join_type) - | DfNodeType::RawDepJoin(join_type) - | DfNodeType::DepJoin(join_type) => { + DfNodeType::Join(join_type) => { // Concatenate left and right children column refs. let column_refs = Self::concat_children_col_refs(&children[0..2]); // Merge the equal columns of two children as input correlation. @@ -502,6 +500,17 @@ impl LogicalPropertyBuilder for ColumnRefPropertyBuilder { }; GroupColumnRefs::new(column_refs, output_correlation) } + DfNodeType::RawDepJoin(sq_type) => match sq_type { + SubqueryType::Scalar => { + self.derive(DfNodeType::Join(JoinType::Inner), predicates, children) + } + SubqueryType::Exists | SubqueryType::Any { pred: _, op: _ } => { + self.derive(DfNodeType::Join(JoinType::LeftMark), predicates, children) + } + }, + DfNodeType::DepJoin => { + self.derive(DfNodeType::Join(JoinType::Inner), predicates, children) + } DfNodeType::Agg => { let child = children[0]; // Group by columns first. diff --git a/optd-datafusion-repr/src/properties/schema.rs b/optd-datafusion-repr/src/properties/schema.rs index 932da32f..de9d2261 100644 --- a/optd-datafusion-repr/src/properties/schema.rs +++ b/optd-datafusion-repr/src/properties/schema.rs @@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize}; use super::DEFAULT_NAME; use crate::plan_nodes::{ decode_empty_relation_schema, ArcDfPredNode, ConstantPred, ConstantType, DfNodeType, - DfPredType, DfReprPredNode, FuncType, + DfPredType, DfReprPredNode, FuncType, JoinType, SubqueryType, }; #[derive(Clone, Debug, Serialize, Deserialize)] @@ -177,9 +177,7 @@ impl LogicalPropertyBuilder for SchemaPropertyBuilder { } DfNodeType::Projection => Self::derive_for_predicate(predicates[0].clone()), DfNodeType::Filter | DfNodeType::Limit | DfNodeType::Sort => children[0].clone(), - DfNodeType::RawDepJoin(join_type) - | DfNodeType::Join(join_type) - | DfNodeType::DepJoin(join_type) => { + DfNodeType::Join(join_type) => { use crate::plan_nodes::JoinType::*; match join_type { Inner | LeftOuter | RightOuter | FullOuter | Cross => { @@ -190,8 +188,28 @@ impl LogicalPropertyBuilder for SchemaPropertyBuilder { } LeftSemi | LeftAnti => children[0].clone(), RightSemi | RightAnti => children[1].clone(), + LeftMark => { + let mut schema = children[0].clone(); + schema.fields.push(Field { + name: "exists".to_string(), + typ: ConstantType::Bool, + nullable: false, + }); + schema + } } } + DfNodeType::RawDepJoin(sq_type) => match sq_type { + SubqueryType::Scalar => { + self.derive(DfNodeType::Join(JoinType::Inner), predicates, children) + } + SubqueryType::Exists | SubqueryType::Any { pred: _, op: _ } => { + self.derive(DfNodeType::Join(JoinType::LeftMark), predicates, children) + } + }, + DfNodeType::DepJoin => { + self.derive(DfNodeType::Join(JoinType::Inner), predicates, children) + } DfNodeType::EmptyRelation => decode_empty_relation_schema(&predicates[1]), x => unimplemented!("cannot derive schema property for {}", x), } diff --git a/optd-datafusion-repr/src/rules/filter_pushdown.rs b/optd-datafusion-repr/src/rules/filter_pushdown.rs index c576b844..439bd28a 100644 --- a/optd-datafusion-repr/src/rules/filter_pushdown.rs +++ b/optd-datafusion-repr/src/rules/filter_pushdown.rs @@ -270,7 +270,7 @@ fn filter_join_transpose( } _ => { // We don't support modifying the join condition for other join types yet - LogicalJoin::new_unchecked(new_left, new_right, join_cond, join_typ) + LogicalJoin::new_unchecked(new_left, new_right, join_cond, *join_typ) } }; diff --git a/optd-datafusion-repr/src/rules/macros.rs b/optd-datafusion-repr/src/rules/macros.rs index 47703e66..420e2963 100644 --- a/optd-datafusion-repr/src/rules/macros.rs +++ b/optd-datafusion-repr/src/rules/macros.rs @@ -4,21 +4,30 @@ // https://opensource.org/licenses/MIT. macro_rules! define_matcher { - ( ( $typ:expr $(, $children:tt )* ) ) => { - RuleMatcher::MatchNode { - typ: $typ, - children: vec![ - $( crate::rules::macros::define_matcher!($children) ),* - ], + ( $discriminant:expr, ( $typ:expr $(, $children:tt )* ) ) => { + if $discriminant { + RuleMatcher::MatchDiscriminant { + typ_discriminant: std::mem::discriminant(&$typ), + children: vec![ + $( crate::rules::macros::define_matcher!($discriminant, $children) ),* + ], + } + } else { + RuleMatcher::MatchNode { + typ: $typ, + children: vec![ + $( crate::rules::macros::define_matcher!($discriminant, $children) ),* + ], + } } }; - ( $pick_one:tt ) => { + ( $discriminant:expr, $pick_one:tt ) => { RuleMatcher::Any }; } macro_rules! define_rule_inner { - ($rule_type:expr, $name:ident, $apply:ident, $($matcher:tt)+) => { + ($rule_type:expr, $discriminant:expr, $name:ident, $apply:ident, $($matcher:tt)+) => { pub struct $name { matcher: RuleMatcher, } @@ -27,7 +36,7 @@ macro_rules! define_rule_inner { pub fn new() -> Self { #[allow(unused_imports)] use DfNodeType::*; - let matcher = crate::rules::macros::define_matcher!($($matcher)+); + let matcher = crate::rules::macros::define_matcher! { $discriminant, $($matcher)+ }; Self { matcher } } } @@ -60,14 +69,22 @@ macro_rules! define_rule_inner { macro_rules! define_rule { ($name:ident, $apply:ident, $($matcher:tt)+) => { - crate::rules::macros::define_rule_inner! { false, $name, $apply, $($matcher)+ } + crate::rules::macros::define_rule_inner! { false, false, $name, $apply, $($matcher)+ } + }; +} + +macro_rules! define_rule_discriminant { + ($name:ident, $apply:ident, $($matcher:tt)+) => { + crate::rules::macros::define_rule_inner! { false, true, $name, $apply, $($matcher)+ } }; } macro_rules! define_impl_rule { ($name:ident, $apply:ident, $($matcher:tt)+) => { - crate::rules::macros::define_rule_inner! { true, $name, $apply, $($matcher)+ } + crate::rules::macros::define_rule_inner! { true, false, $name, $apply, $($matcher)+ } }; } -pub(crate) use {define_impl_rule, define_matcher, define_rule, define_rule_inner}; +pub(crate) use { + define_impl_rule, define_matcher, define_rule, define_rule_discriminant, define_rule_inner, +}; diff --git a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs index a31da316..9485dde3 100644 --- a/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs +++ b/optd-datafusion-repr/src/rules/project_transpose/project_filter_transpose.rs @@ -100,7 +100,10 @@ fn apply_filter_project_transpose( let exprs = proj.exprs(); let cond = filter.cond(); - let proj_col_map = ProjectionMapping::build(&exprs).unwrap(); + let Some(proj_col_map) = ProjectionMapping::build(&exprs) else { + return vec![]; + }; + let rewritten_cond = proj_col_map.rewrite_filter_cond(cond, false); let new_filter_node = LogicalFilter::new_unchecked(child, rewritten_cond); diff --git a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs index e1dcb3ea..67a7164f 100644 --- a/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs +++ b/optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs @@ -11,9 +11,9 @@ use crate::plan_nodes::{ ArcDfPlanNode, ArcDfPredNode, BinOpPred, BinOpType, ColumnRefPred, ConstantPred, DependentJoin, DfNodeType, DfPredType, DfReprPlanNode, DfReprPredNode, ExternColumnRefPred, FuncPred, FuncType, JoinType, ListPred, LogOpPred, LogOpType, LogicalAgg, LogicalFilter, LogicalJoin, - LogicalProjection, PredExt, RawDependentJoin, + LogicalLimit, LogicalProjection, PredExt, RawDependentJoin, SubqueryType, }; -use crate::rules::macros::define_rule; +use crate::rules::macros::{define_rule, define_rule_discriminant}; use crate::OptimizerExt; /// Like rewrite_column_refs, except it translates ExternColumnRefs into ColumnRefs @@ -47,10 +47,10 @@ fn rewrite_extern_column_refs( ) } -define_rule!( +define_rule_discriminant!( DepInitialDistinct, apply_dep_initial_distinct, - (RawDepJoin(JoinType::Cross), left, right) + (RawDepJoin(SubqueryType::Scalar), left, right) ); /// Initial rule to generate a join above this dependent join, and push the dependent @@ -79,8 +79,70 @@ fn apply_dep_initial_distinct( .map(|x| ExternColumnRefPred::from_pred_node(x).unwrap().index()) .collect::>(); - // If we have no correlated columns, just emit a cross join instead + // If we have no correlated columns, we can skip the whole dependent join step if correlated_col_indices.is_empty() { + let res = match join.sq_type() { + SubqueryType::Scalar => LogicalJoin::new_unchecked( + left, + right, + ConstantPred::bool(true).into_pred_node(), + JoinType::Cross, + ) + .into_plan_node(), + SubqueryType::Exists => { + let right_lim_1 = LogicalLimit::new_unchecked( + right, + ConstantPred::int64(0).into_pred_node(), + ConstantPred::int64(1).into_pred_node(), + ) + .into_plan_node(); + let right_count_star = LogicalAgg::new( + right_lim_1, + ListPred::new(vec![FuncPred::new( + FuncType::Agg("count".to_string()), + ListPred::new(vec![ConstantPred::int64(1).into_pred_node()]), + ) + .into_pred_node()]), + ListPred::new(vec![]), + ) + .into_plan_node(); + + let count_star_to_bool_proj = LogicalProjection::new( + right_count_star, + ListPred::new(vec![BinOpPred::new( + ColumnRefPred::new(0).into_pred_node(), + ConstantPred::int64(0).into_pred_node(), + BinOpType::Gt, + ) + .into_pred_node()]), + ) + .into_plan_node(); + + LogicalJoin::new_unchecked( + left, + count_star_to_bool_proj, + ConstantPred::bool(true).into_pred_node(), + JoinType::Cross, + ) + .into_plan_node() + } + SubqueryType::Any { pred, op } => LogicalJoin::new_unchecked( + left, + right, + BinOpPred::new( + pred.clone().into(), + ColumnRefPred::new(left_schema_size).into_pred_node(), + *op, + ) + .into_pred_node(), + JoinType::LeftMark, + ) + .into_plan_node(), + }; + + return vec![res.into()]; + } + if correlated_col_indices.is_empty() && matches!(join.sq_type(), SubqueryType::Scalar) { let new_join = LogicalJoin::new_unchecked( left, right, @@ -104,13 +166,9 @@ fn apply_dep_initial_distinct( ), ); - let new_dep_join = DependentJoin::new_unchecked( - distinct_agg_node.into_plan_node(), - right, - cond, - extern_cols, - JoinType::Cross, - ); + let new_dep_join_schema_size = correlated_col_indices.len() + right_schema_size; + let new_dep_join = + DependentJoin::new_unchecked(distinct_agg_node.into_plan_node(), right, cond, extern_cols); // Our join condition is going to make sure that all of the correlated columns // in the right side are equal to their equivalent columns in the left side. @@ -120,53 +178,82 @@ fn apply_dep_initial_distinct( // // This is because the aggregate we install on the right side will map the // correlated columns to their respective indices as shown. - let join_cond = LogOpPred::new( - LogOpType::And, - correlated_col_indices - .iter() - .enumerate() - .map(|(i, x)| { - assert!(i + left_schema_size < left_schema_size + right_schema_size); - BinOpPred::new( - ColumnRefPred::new(*x).into_pred_node(), - ColumnRefPred::new(i + left_schema_size).into_pred_node(), - BinOpType::Eq, - ) - .into_pred_node() - }) - .collect(), - ); + debug_assert!(!correlated_col_indices.is_empty()); + let join_cond = match join.sq_type() { + SubqueryType::Scalar | SubqueryType::Exists => LogOpPred::new( + LogOpType::And, + correlated_col_indices + .iter() + .enumerate() + .map(|(i, x)| { + assert!(i + left_schema_size < left_schema_size + new_dep_join_schema_size); + BinOpPred::new( + ColumnRefPred::new(*x).into_pred_node(), + ColumnRefPred::new(i + left_schema_size).into_pred_node(), + BinOpType::Eq, + ) + .into_pred_node() + }) + .collect(), + ), + SubqueryType::Any { pred, op } => LogOpPred::new( + LogOpType::And, + correlated_col_indices + .iter() + .enumerate() + .map(|(i, _)| { + assert!(i + left_schema_size < left_schema_size + new_dep_join_schema_size); + BinOpPred::new( + pred.clone().into(), + ColumnRefPred::new(i + left_schema_size).into_pred_node(), + *op, + ) + .into_pred_node() + }) + .collect(), + ), + }; + + let join_type = match join.sq_type() { + SubqueryType::Scalar => JoinType::Inner, + SubqueryType::Exists | SubqueryType::Any { pred: _, op: _ } => JoinType::LeftMark, + }; let new_join = LogicalJoin::new_unchecked( left, new_dep_join.into_plan_node(), join_cond.into_pred_node(), - JoinType::Inner, + join_type, ); // Ensure that the schema above the new_join is the same as it was before // for correctness (Project the left side of the new join, // plus the *right side of the right side*) - let new_proj = LogicalProjection::new( - new_join.into_plan_node(), - ListPred::new( - (0..left_schema_size) - .chain( - (left_schema_size + correlated_col_indices.len()) - ..(left_schema_size + correlated_col_indices.len() + right_schema_size), - ) - .map(|x| ColumnRefPred::new(x).into_pred_node()) - .collect(), - ), - ); + let node = if matches!(join.sq_type(), SubqueryType::Scalar) { + LogicalProjection::new( + new_join.into_plan_node(), + ListPred::new( + (0..left_schema_size) + .chain( + (left_schema_size + correlated_col_indices.len()) + ..(left_schema_size + correlated_col_indices.len() + right_schema_size), + ) + .map(|x| ColumnRefPred::new(x).into_pred_node()) + .collect(), + ), + ) + .into_plan_node() + } else { + new_join.into_plan_node() + }; - vec![new_proj.into_plan_node().into()] + vec![node.into()] } define_rule!( DepJoinPastProj, apply_dep_join_past_proj, - (DepJoin(JoinType::Cross), left, (Projection, right)) + (DepJoin, left, (Projection, right)) ); /// Pushes a dependent join past a projection node. @@ -203,8 +290,7 @@ fn apply_dep_join_past_proj( .collect(), ); - let new_dep_join = - DependentJoin::new_unchecked(left, right, cond, extern_cols, JoinType::Cross); + let new_dep_join = DependentJoin::new_unchecked(left, right, cond, extern_cols); let new_proj = LogicalProjection::new(new_dep_join.into_plan_node(), new_proj_exprs); vec![new_proj.into_plan_node().into()] @@ -213,7 +299,7 @@ fn apply_dep_join_past_proj( define_rule!( DepJoinPastFilter, apply_dep_join_past_filter, - (DepJoin(JoinType::Cross), left, (Filter, right)) + (DepJoin, left, (Filter, right)) ); /// Pushes a dependent join past a projection node. @@ -266,7 +352,6 @@ fn apply_dep_join_past_filter( .map(|x| ExternColumnRefPred::new(x).into_pred_node()) .collect(), ), - JoinType::Cross, ); let new_filter = LogicalFilter::new(new_dep_join.into_plan_node(), rewritten_expr); @@ -277,7 +362,7 @@ fn apply_dep_join_past_filter( define_rule!( DepJoinPastAgg, apply_dep_join_past_agg, - (DepJoin(JoinType::Cross), left, (Agg, right)) + (DepJoin, left, (Agg, right)) ); /// Pushes a dependent join past an aggregation node @@ -341,8 +426,7 @@ fn apply_dep_join_past_agg( .collect(), ); - let new_dep_join = - DependentJoin::new_unchecked(left.clone(), right, cond, extern_cols, JoinType::Cross); + let new_dep_join = DependentJoin::new_unchecked(left.clone(), right, cond, extern_cols); let new_agg_exprs_size = new_exprs.len(); let new_agg_groups_size = new_groups.len(); @@ -434,7 +518,7 @@ fn apply_dep_join_past_agg( define_rule!( DepJoinEliminate, apply_dep_join_eliminate_at_scan, // TODO matching is all wrong - (DepJoin(JoinType::Cross), left, right) + (DepJoin, left, right) ); /// If we've gone all the way down to the scan node, we can swap the dependent join diff --git a/optd-sqllogictest/slt/tpch-q16.slt b/optd-sqllogictest/slt/tpch-q16.slt new file mode 100644 index 00000000..b2c6a9c4 --- /dev/null +++ b/optd-sqllogictest/slt/tpch-q16.slt @@ -0,0 +1,68 @@ +include _tpch_tables.slt.part + +query +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'MEDIUM POLISHED%' + and p_size in (49, 14, 23, 45, 19, 3, 36, 9) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size; +---- +Brand#11 PROMO ANODIZED TIN 45 4 +Brand#11 SMALL PLATED COPPER 45 4 +Brand#11 STANDARD POLISHED TIN 45 4 +Brand#13 MEDIUM ANODIZED STEEL 36 4 +Brand#13 SMALL BRUSHED NICKEL 19 4 +Brand#14 SMALL ANODIZED NICKEL 45 4 +Brand#15 LARGE ANODIZED BRASS 45 4 +Brand#21 LARGE BURNISHED COPPER 19 4 +Brand#23 ECONOMY BRUSHED COPPER 9 4 +Brand#24 MEDIUM PLATED STEEL 19 4 +Brand#25 MEDIUM PLATED BRASS 45 4 +Brand#25 SMALL BURNISHED COPPER 3 4 +Brand#31 ECONOMY PLATED STEEL 23 4 +Brand#31 PROMO POLISHED TIN 23 4 +Brand#32 MEDIUM BURNISHED BRASS 49 4 +Brand#33 LARGE BRUSHED TIN 36 4 +Brand#33 SMALL BURNISHED NICKEL 3 4 +Brand#34 LARGE PLATED BRASS 45 4 +Brand#34 MEDIUM BRUSHED COPPER 9 4 +Brand#34 SMALL PLATED BRASS 14 4 +Brand#35 STANDARD ANODIZED STEEL 23 4 +Brand#43 MEDIUM ANODIZED BRASS 14 4 +Brand#43 PROMO POLISHED BRASS 19 4 +Brand#43 SMALL BRUSHED NICKEL 9 4 +Brand#44 SMALL PLATED COPPER 19 4 +Brand#51 ECONOMY POLISHED STEEL 49 4 +Brand#52 MEDIUM BURNISHED TIN 45 4 +Brand#52 SMALL BURNISHED NICKEL 14 4 +Brand#53 LARGE BURNISHED NICKEL 23 4 +Brand#53 MEDIUM BRUSHED COPPER 3 4 +Brand#53 STANDARD PLATED STEEL 45 4 +Brand#54 ECONOMY ANODIZED BRASS 9 4 +Brand#55 STANDARD ANODIZED BRASS 36 4 +Brand#55 STANDARD BRUSHED COPPER 3 4 diff --git a/optd-sqllogictest/slt/tpch-q16.slt.disabled b/optd-sqllogictest/slt/tpch-q16.slt.disabled deleted file mode 100644 index f480ca1b..00000000 --- a/optd-sqllogictest/slt/tpch-q16.slt.disabled +++ /dev/null @@ -1,68 +0,0 @@ -include _tpch_tables.slt.part - -query -select - p_brand, - p_type, - p_size, - count(distinct ps_suppkey) as supplier_cnt -from - partsupp, - part -where - p_partkey = ps_partkey - and p_brand <> 'Brand#45' - and p_type not like 'MEDIUM POLISHED%' - and p_size in (49, 14, 23, 45, 19, 3, 36, 9) - and ps_suppkey not in ( - select - s_suppkey - from - supplier - where - s_comment like '%Customer%Complaints%' - ) -group by - p_brand, - p_type, - p_size -order by - supplier_cnt desc, - p_brand, - p_type, - p_size; ----- -Brand#11 PROMO ANODIZED TIN 45 4 -Brand#11 SMALL PLATED COPPER 45 4 -Brand#11 STANDARD POLISHED TIN 45 4 -Brand#13 MEDIUM ANODIZED STEEL 36 4 -Brand#14 SMALL ANODIZED NICKEL 45 4 -Brand#15 LARGE ANODIZED BRASS 45 4 -Brand#21 LARGE BURNISHED COPPER 19 4 -Brand#23 ECONOMY BRUSHED COPPER 9 4 -Brand#25 MEDIUM PLATED BRASS 45 4 -Brand#31 ECONOMY PLATED STEEL 23 4 -Brand#31 PROMO POLISHED TIN 23 4 -Brand#32 MEDIUM BURNISHED BRASS 49 4 -Brand#33 LARGE BRUSHED TIN 36 4 -Brand#33 SMALL BURNISHED NICKEL 3 4 -Brand#34 LARGE PLATED BRASS 45 4 -Brand#34 MEDIUM BRUSHED COPPER 9 4 -Brand#34 SMALL PLATED BRASS 14 4 -Brand#35 STANDARD ANODIZED STEEL 23 4 -Brand#43 PROMO POLISHED BRASS 19 4 -Brand#43 SMALL BRUSHED NICKEL 9 4 -Brand#44 SMALL PLATED COPPER 19 4 -Brand#52 MEDIUM BURNISHED TIN 45 4 -Brand#52 SMALL BURNISHED NICKEL 14 4 -Brand#53 MEDIUM BRUSHED COPPER 3 4 -Brand#55 STANDARD ANODIZED BRASS 36 4 -Brand#55 STANDARD BRUSHED COPPER 3 4 -Brand#13 SMALL BRUSHED NICKEL 19 2 -Brand#25 SMALL BURNISHED COPPER 3 2 -Brand#43 MEDIUM ANODIZED BRASS 14 2 -Brand#53 STANDARD PLATED STEEL 45 2 -Brand#24 MEDIUM PLATED STEEL 19 1 -Brand#51 ECONOMY POLISHED STEEL 49 1 -Brand#53 LARGE BURNISHED NICKEL 23 1 -Brand#54 ECONOMY ANODIZED BRASS 9 1 diff --git a/optd-sqllogictest/slt/tpch-q20.slt.disabled b/optd-sqllogictest/slt/tpch-q20.slt similarity index 100% rename from optd-sqllogictest/slt/tpch-q20.slt.disabled rename to optd-sqllogictest/slt/tpch-q20.slt diff --git a/optd-sqllogictest/slt/tpch-q22.slt.disabled b/optd-sqllogictest/slt/tpch-q22.slt similarity index 100% rename from optd-sqllogictest/slt/tpch-q22.slt.disabled rename to optd-sqllogictest/slt/tpch-q22.slt diff --git a/optd-sqllogictest/slt/tpch-q4.slt.disabled b/optd-sqllogictest/slt/tpch-q4.slt similarity index 100% rename from optd-sqllogictest/slt/tpch-q4.slt.disabled rename to optd-sqllogictest/slt/tpch-q4.slt diff --git a/optd-sqllogictest/slt/unnest-exists-2.slt b/optd-sqllogictest/slt/unnest-exists-2.slt new file mode 100644 index 00000000..f3e026a9 --- /dev/null +++ b/optd-sqllogictest/slt/unnest-exists-2.slt @@ -0,0 +1,27 @@ +include _tpch_tables.slt.part + +query +SELECT + c.c_custkey, + c.c_name +FROM + customer c +WHERE + EXISTS ( + SELECT 1 + FROM orders o + WHERE o.o_custkey = c.c_custkey + AND o.o_orderstatus = 'O' + AND o.o_orderdate > '1998-08-01' + ) +AND NOT EXISTS ( + SELECT 1 + FROM orders o + JOIN lineitem l ON o.o_orderkey = l.l_orderkey + WHERE o.o_custkey = c.c_custkey + AND o.o_orderstatus = 'R' + AND o.o_orderdate > '1998-08-01' + AND o.o_totalprice > 5000 +); +---- +88 Customer#000000088 diff --git a/optd-sqllogictest/slt/unnest-exists-uncor.slt b/optd-sqllogictest/slt/unnest-exists-uncor.slt new file mode 100644 index 00000000..932d5e5c --- /dev/null +++ b/optd-sqllogictest/slt/unnest-exists-uncor.slt @@ -0,0 +1,17 @@ +include _tpch_tables.slt.part + +query +SELECT c_name +FROM customer c +WHERE c_nationkey IN ( + SELECT n_nationkey + FROM nation + WHERE n_name = 'GERMANY' +); +---- +Customer#000000062 +Customer#000000071 +Customer#000000093 +Customer#000000119 +Customer#000000129 +Customer#000000136 diff --git a/optd-sqllogictest/slt/unnest-exists.slt b/optd-sqllogictest/slt/unnest-exists.slt new file mode 100644 index 00000000..8cefa55b --- /dev/null +++ b/optd-sqllogictest/slt/unnest-exists.slt @@ -0,0 +1,18 @@ +include _tpch_tables.slt.part + +query +SELECT + c_custkey, + c_name +FROM + customer c +WHERE + EXISTS ( + SELECT 1 + FROM orders o + WHERE o.o_custkey = c.c_custkey + AND o.o_orderstatus = 'O' + AND o.o_orderdate > '1998-08-01' + ); +---- +88 Customer#000000088 diff --git a/optd-sqllogictest/slt/unnest-in-exists.slt b/optd-sqllogictest/slt/unnest-in-exists.slt new file mode 100644 index 00000000..bf9d02a5 --- /dev/null +++ b/optd-sqllogictest/slt/unnest-in-exists.slt @@ -0,0 +1,20 @@ +include _tpch_tables.slt.part + +query +SELECT c.c_custkey, c.c_name +FROM customer c +WHERE c.c_custkey IN ( + SELECT o.o_custkey + FROM orders o + WHERE o.o_custkey = c.c_custkey AND o.o_totalprice > 250000 +) +AND EXISTS ( + SELECT 1 + FROM orders o + WHERE o.o_custkey = c.c_custkey + AND o.o_orderstatus = 'O' + ) +order by c.c_custkey; +---- +10 Customer#000000010 +70 Customer#000000070 diff --git a/optd-sqllogictest/slt/unnest-in-uncor.slt b/optd-sqllogictest/slt/unnest-in-uncor.slt new file mode 100644 index 00000000..ad4841e5 --- /dev/null +++ b/optd-sqllogictest/slt/unnest-in-uncor.slt @@ -0,0 +1,13 @@ +include _tpch_tables.slt.part + +query +SELECT c.c_custkey, c.c_name +FROM customer c +WHERE c.c_custkey IN ( + SELECT o.o_custkey + FROM orders o + WHERE o.o_totalprice > 250000 +) order by c.c_custkey; +---- +10 Customer#000000010 +70 Customer#000000070 diff --git a/optd-sqllogictest/slt/unnest-in.slt b/optd-sqllogictest/slt/unnest-in.slt new file mode 100644 index 00000000..dfd3cc7b --- /dev/null +++ b/optd-sqllogictest/slt/unnest-in.slt @@ -0,0 +1,14 @@ +include _tpch_tables.slt.part + +query +SELECT c.c_custkey, c.c_name +FROM customer c +WHERE c.c_custkey IN ( + SELECT o.o_custkey + FROM orders o + WHERE o.o_custkey = c.c_custkey AND o.o_totalprice > 250000 +) +ORDER BY c.c_custkey; +---- +10 Customer#000000010 +70 Customer#000000070 diff --git a/optd-sqllogictest/slt/unnest-not-in-uncor.slt b/optd-sqllogictest/slt/unnest-not-in-uncor.slt new file mode 100644 index 00000000..226c7ebf --- /dev/null +++ b/optd-sqllogictest/slt/unnest-not-in-uncor.slt @@ -0,0 +1,61 @@ +include _tpch_tables.slt.part + +query +SELECT c.c_custkey, c.c_name +FROM customer c +WHERE c.c_custkey NOT IN ( + SELECT o.o_custkey + FROM orders o + WHERE o.o_orderstatus = 'O' +) order by c.c_custkey; +---- +3 Customer#000000003 +6 Customer#000000006 +9 Customer#000000009 +12 Customer#000000012 +15 Customer#000000015 +18 Customer#000000018 +21 Customer#000000021 +24 Customer#000000024 +27 Customer#000000027 +30 Customer#000000030 +33 Customer#000000033 +36 Customer#000000036 +39 Customer#000000039 +42 Customer#000000042 +45 Customer#000000045 +48 Customer#000000048 +51 Customer#000000051 +54 Customer#000000054 +57 Customer#000000057 +60 Customer#000000060 +63 Customer#000000063 +66 Customer#000000066 +69 Customer#000000069 +72 Customer#000000072 +75 Customer#000000075 +78 Customer#000000078 +81 Customer#000000081 +84 Customer#000000084 +87 Customer#000000087 +90 Customer#000000090 +93 Customer#000000093 +96 Customer#000000096 +99 Customer#000000099 +102 Customer#000000102 +105 Customer#000000105 +108 Customer#000000108 +111 Customer#000000111 +114 Customer#000000114 +117 Customer#000000117 +120 Customer#000000120 +123 Customer#000000123 +126 Customer#000000126 +129 Customer#000000129 +132 Customer#000000132 +135 Customer#000000135 +138 Customer#000000138 +141 Customer#000000141 +144 Customer#000000144 +147 Customer#000000147 +150 Customer#000000150 diff --git a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql index 471f28a9..93f180ce 100644 --- a/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql +++ b/optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql @@ -16,7 +16,7 @@ LogicalProjection { exprs: [ #0, #1 ] } ├── cond:Gt │ ├── #2 │ └── 100(i64) - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0 ] } └── LogicalAgg @@ -102,7 +102,7 @@ LogicalProjection { exprs: [ #0, #1 ] } ├── cond:Gt │ ├── #2 │ └── 100(i64) - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0 ] } └── LogicalAgg @@ -220,7 +220,7 @@ select t1v1, (select sum(t2v3) from t2 where t2v1 = t1v1) as sum from t1; /* LogicalProjection { exprs: [ #0, #2 ] } -└── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] } +└── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0 ] } └── LogicalAgg @@ -296,7 +296,7 @@ LogicalProjection { exprs: [ #0, #1 ] } ├── cond:Gt │ ├── #2 │ └── 100(i64) - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalScan { table: t1 } └── LogicalProjection { exprs: [ #0 ] } └── LogicalAgg diff --git a/optd-sqlplannertest/tests/tpch/q11.planner.sql b/optd-sqlplannertest/tests/tpch/q11.planner.sql index 8c0fdf39..3e31bceb 100644 --- a/optd-sqlplannertest/tests/tpch/q11.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q11.planner.sql @@ -36,7 +36,7 @@ LogicalSort ├── cond:Gt │ ├── Cast { cast_to: Decimal128(38, 15), child: #1 } │ └── #2 - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [] } ├── LogicalAgg │ ├── exprs:Agg(Sum) │ │ └── Mul diff --git a/optd-sqlplannertest/tests/tpch/q15.planner.sql b/optd-sqlplannertest/tests/tpch/q15.planner.sql index 0b50b7fb..d66bc87d 100644 --- a/optd-sqlplannertest/tests/tpch/q15.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q15.planner.sql @@ -46,7 +46,7 @@ LogicalSort │ └── Eq │ ├── #8 │ └── #9 - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [] } ├── LogicalJoin { join_type: Cross, cond: true } │ ├── LogicalScan { table: supplier } │ └── LogicalProjection { exprs: [ #0, #1 ] } diff --git a/optd-sqlplannertest/tests/tpch/q16.planner.sql b/optd-sqlplannertest/tests/tpch/q16.planner.sql new file mode 100644 index 00000000..b1adf6f6 --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q16.planner.sql @@ -0,0 +1,106 @@ +-- TPC-H Q16 +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'MEDIUM POLISHED%' + and p_size in (49, 14, 23, 45, 19, 3, 36, 9) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size; + +/* +LogicalSort +├── exprs: +│ ┌── SortOrder { order: Desc } +│ │ └── #3 +│ ├── SortOrder { order: Asc } +│ │ └── #0 +│ ├── SortOrder { order: Asc } +│ │ └── #1 +│ └── SortOrder { order: Asc } +│ └── #2 +└── LogicalProjection { exprs: [ #0, #1, #2, #3 ] } + └── LogicalAgg + ├── exprs:Agg(Count) + │ └── [ #1 ] + ├── groups: [ #8, #9, #10 ] + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #5 + │ │ └── #0 + │ ├── Neq + │ │ ├── #8 + │ │ └── "Brand#45" + │ ├── Like { expr: #9, pattern: "MEDIUM POLISHED%", negated: true, case_insensitive: false } + │ ├── InList { expr: Cast { cast_to: Int64, child: #10 }, list: [ 49(i64), 14(i64), 23(i64), 45(i64), 19(i64), 3(i64), 36(i64), 9(i64) ], negated: false } + │ └── Not + │ └── [ #14 ] + └── RawDependentJoin { sq_type: Any { pred: PredNode { typ: ColumnRef, children: [], data: Some(UInt64(1)) }, op: Eq }, cond: true, extern_cols: [] } + ├── LogicalJoin { join_type: Cross, cond: true } + │ ├── LogicalScan { table: partsupp } + │ └── LogicalScan { table: part } + └── LogicalProjection { exprs: [ #0 ] } + └── LogicalFilter { cond: Like { expr: #6, pattern: "%Customer%Complaints%", negated: false, case_insensitive: false } } + └── LogicalScan { table: supplier } +PhysicalSort +├── exprs: +│ ┌── SortOrder { order: Desc } +│ │ └── #3 +│ ├── SortOrder { order: Asc } +│ │ └── #0 +│ ├── SortOrder { order: Asc } +│ │ └── #1 +│ └── SortOrder { order: Asc } +│ └── #2 +└── PhysicalAgg + ├── aggrs:Agg(Count) + │ └── [ #1 ] + ├── groups: [ #8, #9, #10 ] + └── PhysicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #5 + │ │ └── #0 + │ ├── Neq + │ │ ├── #8 + │ │ └── "Brand#45" + │ ├── Like { expr: #9, pattern: "MEDIUM POLISHED%", negated: true, case_insensitive: false } + │ ├── InList { expr: Cast { cast_to: Int64, child: #10 }, list: [ 49(i64), 14(i64), 23(i64), 45(i64), 19(i64), 3(i64), 36(i64), 9(i64) ], negated: false } + │ └── Not + │ └── [ #14 ] + └── PhysicalNestedLoopJoin + ├── join_type: LeftMark + ├── cond:Eq + │ ├── #1 + │ └── #14 + ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalScan { table: partsupp } + │ └── PhysicalScan { table: part } + └── PhysicalProjection { exprs: [ #0 ] } + └── PhysicalFilter { cond: Like { expr: #6, pattern: "%Customer%Complaints%", negated: false, case_insensitive: false } } + └── PhysicalScan { table: supplier } +*/ + diff --git a/optd-sqlplannertest/tests/tpch/q16.yml b/optd-sqlplannertest/tests/tpch/q16.yml new file mode 100644 index 00000000..dfcb58ae --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q16.yml @@ -0,0 +1,35 @@ +- sql: | + select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt + from + partsupp, + part + where + p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'MEDIUM POLISHED%' + and p_size in (49, 14, 23, 45, 19, 3, 36, 9) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) + group by + p_brand, + p_type, + p_size + order by + supplier_cnt desc, + p_brand, + p_type, + p_size; + desc: TPC-H Q16 + before: ["include_sql:schema.sql"] + tasks: + - explain:logical_optd,physical_optd diff --git a/optd-sqlplannertest/tests/tpch/q17.planner.sql b/optd-sqlplannertest/tests/tpch/q17.planner.sql index 72de706f..c2aa8009 100644 --- a/optd-sqlplannertest/tests/tpch/q17.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q17.planner.sql @@ -43,7 +43,7 @@ LogicalProjection │ └── Lt │ ├── Cast { cast_to: Decimal128(30, 15), child: #4 } │ └── #25 - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#16) ] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#16) ] } ├── LogicalJoin { join_type: Cross, cond: true } │ ├── LogicalScan { table: lineitem } │ └── LogicalScan { table: part } diff --git a/optd-sqlplannertest/tests/tpch/q2.planner.sql b/optd-sqlplannertest/tests/tpch/q2.planner.sql index 7e500b90..90fb0d2c 100644 --- a/optd-sqlplannertest/tests/tpch/q2.planner.sql +++ b/optd-sqlplannertest/tests/tpch/q2.planner.sql @@ -81,7 +81,7 @@ LogicalLimit { skip: 0(i64), fetch: 100(i64) } │ └── Eq │ ├── #19 │ └── #28 - └── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] } + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#0) ] } ├── LogicalJoin { join_type: Cross, cond: true } │ ├── LogicalJoin { join_type: Cross, cond: true } │ │ ├── LogicalJoin { join_type: Cross, cond: true } diff --git a/optd-sqlplannertest/tests/tpch/q20.planner.sql b/optd-sqlplannertest/tests/tpch/q20.planner.sql new file mode 100644 index 00000000..e5e6065d --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q20.planner.sql @@ -0,0 +1,194 @@ +-- TPC-H Q20 +select + s_name, + s_address +from + supplier, + nation +where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'indian%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date '1996-01-01' + and l_shipdate < date '1996-01-01' + interval '1' year + ) + ) + and s_nationkey = n_nationkey + and n_name = 'IRAQ' +order by + s_name; + +/* +LogicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── LogicalProjection { exprs: [ #1, #2 ] } + └── LogicalFilter + ├── cond:And + │ ├── #11 + │ ├── Eq + │ │ ├── #3 + │ │ └── #7 + │ └── Eq + │ ├── #8 + │ └── "IRAQ" + └── RawDependentJoin { sq_type: Any { pred: PredNode { typ: ColumnRef, children: [], data: Some(UInt64(0)) }, op: Eq }, cond: true, extern_cols: [] } + ├── LogicalJoin { join_type: Cross, cond: true } + │ ├── LogicalScan { table: supplier } + │ └── LogicalScan { table: nation } + └── LogicalProjection { exprs: [ #1 ] } + └── LogicalFilter + ├── cond:And + │ ├── #5 + │ └── Gt + │ ├── Cast { cast_to: Float64, child: #2 } + │ └── #6 + └── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [ Extern(#0), Extern(#1) ] } + ├── RawDependentJoin { sq_type: Any { pred: PredNode { typ: ColumnRef, children: [], data: Some(UInt64(0)) }, op: Eq }, cond: true, extern_cols: [] } + │ ├── LogicalScan { table: partsupp } + │ └── LogicalProjection { exprs: [ #0 ] } + │ └── LogicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } + │ └── LogicalScan { table: part } + └── LogicalProjection + ├── exprs:Mul + │ ├── 0.5(float) + │ └── Cast { cast_to: Float64, child: #0 } + └── LogicalAgg + ├── exprs:Agg(Sum) + │ └── [ #4 ] + ├── groups: [] + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #1 + │ │ └── Extern(#0) + │ ├── Eq + │ │ ├── #2 + │ │ └── Extern(#1) + │ ├── Geq + │ │ ├── #10 + │ │ └── Cast { cast_to: Date32, child: "1996-01-01" } + │ └── Lt + │ ├── #10 + │ └── Add + │ ├── Cast { cast_to: Date32, child: "1996-01-01" } + │ └── INTERVAL_MONTH_DAY_NANO (12, 0, 0) + └── LogicalScan { table: lineitem } +PhysicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── PhysicalProjection { exprs: [ #1, #2 ] } + └── PhysicalFilter + ├── cond:And + │ ├── #11 + │ ├── Eq + │ │ ├── #3 + │ │ └── #7 + │ └── Eq + │ ├── #8 + │ └── "IRAQ" + └── PhysicalNestedLoopJoin + ├── join_type: LeftMark + ├── cond:Eq + │ ├── #0 + │ └── #11 + ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalScan { table: supplier } + │ └── PhysicalScan { table: nation } + └── PhysicalProjection { exprs: [ #4 ] } + └── PhysicalFilter + ├── cond:And + │ ├── #8 + │ └── Gt + │ ├── Cast { cast_to: Float64, child: #5 } + │ └── #2 + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #1 ], right_keys: [ #0, #1 ] } + ├── PhysicalProjection + │ ├── exprs: + │ │ ┌── #0 + │ │ ├── #1 + │ │ └── Mul + │ │ ├── 0.5(float) + │ │ └── Cast { cast_to: Float64, child: #2 } + │ └── PhysicalProjection { exprs: [ #0, #1, #4 ] } + │ └── PhysicalNestedLoopJoin + │ ├── join_type: LeftOuter + │ ├── cond:And + │ │ ├── Eq + │ │ │ ├── #0 + │ │ │ └── #2 + │ │ └── Eq + │ │ ├── #1 + │ │ └── #3 + │ ├── PhysicalAgg { aggrs: [], groups: [ #0, #1 ] } + │ │ └── PhysicalNestedLoopJoin + │ │ ├── join_type: LeftMark + │ │ ├── cond:Eq + │ │ │ ├── #0 + │ │ │ └── #5 + │ │ ├── PhysicalScan { table: partsupp } + │ │ └── PhysicalProjection { exprs: [ #0 ] } + │ │ └── PhysicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } + │ │ └── PhysicalScan { table: part } + │ └── PhysicalAgg + │ ├── aggrs:Agg(Sum) + │ │ └── [ #6 ] + │ ├── groups: [ #0, #1 ] + │ └── PhysicalFilter + │ ├── cond:And + │ │ ├── Eq + │ │ │ ├── #3 + │ │ │ └── #0 + │ │ ├── Eq + │ │ │ ├── #4 + │ │ │ └── #1 + │ │ ├── Geq + │ │ │ ├── #12 + │ │ │ └── Cast { cast_to: Date32, child: "1996-01-01" } + │ │ └── Lt + │ │ ├── #12 + │ │ └── Add + │ │ ├── Cast { cast_to: Date32, child: "1996-01-01" } + │ │ └── INTERVAL_MONTH_DAY_NANO (12, 0, 0) + │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalAgg { aggrs: [], groups: [ #0, #1 ] } + │ │ └── PhysicalNestedLoopJoin + │ │ ├── join_type: LeftMark + │ │ ├── cond:Eq + │ │ │ ├── #0 + │ │ │ └── #5 + │ │ ├── PhysicalScan { table: partsupp } + │ │ └── PhysicalProjection { exprs: [ #0 ] } + │ │ └── PhysicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } + │ │ └── PhysicalScan { table: part } + │ └── PhysicalScan { table: lineitem } + └── PhysicalNestedLoopJoin + ├── join_type: LeftMark + ├── cond:Eq + │ ├── #0 + │ └── #5 + ├── PhysicalScan { table: partsupp } + └── PhysicalProjection { exprs: [ #0 ] } + └── PhysicalFilter { cond: Like { expr: #1, pattern: "indian%", negated: false, case_insensitive: false } } + └── PhysicalScan { table: part } +*/ + diff --git a/optd-sqlplannertest/tests/tpch/q20.yml b/optd-sqlplannertest/tests/tpch/q20.yml new file mode 100644 index 00000000..e94ebb5e --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q20.yml @@ -0,0 +1,42 @@ +- sql: | + select + s_name, + s_address + from + supplier, + nation + where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'indian%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date '1996-01-01' + and l_shipdate < date '1996-01-01' + interval '1' year + ) + ) + and s_nationkey = n_nationkey + and n_name = 'IRAQ' + order by + s_name; + desc: TPC-H Q20 + before: ["include_sql:schema.sql"] + tasks: + - explain:logical_optd,physical_optd diff --git a/optd-sqlplannertest/tests/tpch/q22.planner.sql b/optd-sqlplannertest/tests/tpch/q22.planner.sql new file mode 100644 index 00000000..89f93534 --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q22.planner.sql @@ -0,0 +1,170 @@ +-- TPC-H Q22 +select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal +from + ( + select + substring(c_phone from 1 for 2) as cntrycode, + c_acctbal + from + customer + where + substring(c_phone from 1 for 2) in + ('13', '31', '23', '29', '30', '18', '17') + and c_acctbal > ( + select + avg(c_acctbal) + from + customer + where + c_acctbal > 0.00 + and substring(c_phone from 1 for 2) in + ('13', '31', '23', '29', '30', '18', '17') + ) + and not exists ( + select + * + from + orders + where + o_custkey = c_custkey + ) + ) as custsale +group by + cntrycode +order by + cntrycode; + +/* +LogicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── LogicalProjection { exprs: [ #0, #1, #2 ] } + └── LogicalAgg + ├── exprs: + │ ┌── Agg(Count) + │ │ └── [ 1(i64) ] + │ └── Agg(Sum) + │ └── [ #1 ] + ├── groups: [ #0 ] + └── LogicalProjection + ├── exprs: + │ ┌── Scalar(Substr) + │ │ └── [ #4, 1(i64), 2(i64) ] + │ └── #5 + └── LogicalFilter + ├── cond:And + │ ├── InList + │ │ ├── expr:Scalar(Substr) + │ │ │ └── [ #4, 1(i64), 2(i64) ] + │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] + │ │ ├── negated: false + + │ ├── Gt + │ │ ├── Cast { cast_to: Decimal128(19, 6), child: #5 } + │ │ └── #8 + │ └── Not + │ └── [ #9 ] + └── RawDependentJoin { sq_type: Exists, cond: true, extern_cols: [ Extern(#0) ] } + ├── RawDependentJoin { sq_type: Scalar, cond: true, extern_cols: [] } + │ ├── LogicalScan { table: customer } + │ └── LogicalProjection { exprs: [ #0 ] } + │ └── LogicalAgg + │ ├── exprs:Agg(Avg) + │ │ └── [ #5 ] + │ ├── groups: [] + │ └── LogicalFilter + │ ├── cond:And + │ │ ├── Gt + │ │ │ ├── Cast { cast_to: Decimal128(30, 15), child: #5 } + │ │ │ └── Cast { cast_to: Decimal128(30, 15), child: 0(float) } + │ │ └── InList + │ │ ├── expr:Scalar(Substr) + │ │ │ └── [ #4, 1(i64), 2(i64) ] + │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] + │ │ ├── negated: false + + │ └── LogicalScan { table: customer } + └── LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8 ] } + └── LogicalFilter + ├── cond:Eq + │ ├── #1 + │ └── Extern(#0) + └── LogicalScan { table: orders } +PhysicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── PhysicalAgg + ├── aggrs: + │ ┌── Agg(Count) + │ │ └── [ 1(i64) ] + │ └── Agg(Sum) + │ └── [ #1 ] + ├── groups: [ #0 ] + └── PhysicalProjection + ├── exprs: + │ ┌── Scalar(Substr) + │ │ └── [ #4, 1(i64), 2(i64) ] + │ └── #5 + └── PhysicalFilter + ├── cond:And + │ ├── InList + │ │ ├── expr:Scalar(Substr) + │ │ │ └── [ #4, 1(i64), 2(i64) ] + │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] + │ │ ├── negated: false + + │ ├── Gt + │ │ ├── Cast { cast_to: Decimal128(19, 6), child: #5 } + │ │ └── #8 + │ └── Not + │ └── [ #9 ] + └── PhysicalNestedLoopJoin + ├── join_type: LeftMark + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #9 + ├── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalScan { table: customer } + │ └── PhysicalAgg + │ ├── aggrs:Agg(Avg) + │ │ └── [ #5 ] + │ ├── groups: [] + │ └── PhysicalFilter + │ ├── cond:And + │ │ ├── Gt + │ │ │ ├── Cast { cast_to: Decimal128(30, 15), child: #5 } + │ │ │ └── Cast { cast_to: Decimal128(30, 15), child: 0(float) } + │ │ └── InList + │ │ ├── expr:Scalar(Substr) + │ │ │ └── [ #4, 1(i64), 2(i64) ] + │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] + │ │ ├── negated: false + + │ └── PhysicalScan { table: customer } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] } + ├── PhysicalAgg { aggrs: [], groups: [ #0 ] } + │ └── PhysicalNestedLoopJoin { join_type: Cross, cond: true } + │ ├── PhysicalScan { table: customer } + │ └── PhysicalAgg + │ ├── aggrs:Agg(Avg) + │ │ └── [ #5 ] + │ ├── groups: [] + │ └── PhysicalFilter + │ ├── cond:And + │ │ ├── Gt + │ │ │ ├── Cast { cast_to: Decimal128(30, 15), child: #5 } + │ │ │ └── Cast { cast_to: Decimal128(30, 15), child: 0(float) } + │ │ └── InList + │ │ ├── expr:Scalar(Substr) + │ │ │ └── [ #4, 1(i64), 2(i64) ] + │ │ ├── list: [ "13", "31", "23", "29", "30", "18", "17" ] + │ │ ├── negated: false + + │ └── PhysicalScan { table: customer } + └── PhysicalScan { table: orders } +*/ + diff --git a/optd-sqlplannertest/tests/tpch/q22.yml b/optd-sqlplannertest/tests/tpch/q22.yml new file mode 100644 index 00000000..55678824 --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q22.yml @@ -0,0 +1,42 @@ +- sql: | + select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal + from + ( + select + substring(c_phone from 1 for 2) as cntrycode, + c_acctbal + from + customer + where + substring(c_phone from 1 for 2) in + ('13', '31', '23', '29', '30', '18', '17') + and c_acctbal > ( + select + avg(c_acctbal) + from + customer + where + c_acctbal > 0.00 + and substring(c_phone from 1 for 2) in + ('13', '31', '23', '29', '30', '18', '17') + ) + and not exists ( + select + * + from + orders + where + o_custkey = c_custkey + ) + ) as custsale + group by + cntrycode + order by + cntrycode; + desc: TPC-H Q22 + before: ["include_sql:schema.sql"] + tasks: + - explain:logical_optd,physical_optd diff --git a/optd-sqlplannertest/tests/tpch/q4.planner.sql b/optd-sqlplannertest/tests/tpch/q4.planner.sql new file mode 100644 index 00000000..99bc78dc --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q4.planner.sql @@ -0,0 +1,91 @@ +-- TPC-H Q4 +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority; + +/* +LogicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── LogicalProjection { exprs: [ #0, #1 ] } + └── LogicalAgg + ├── exprs:Agg(Count) + │ └── [ 1(i64) ] + ├── groups: [ #5 ] + └── LogicalFilter + ├── cond:And + │ ├── Geq + │ │ ├── #4 + │ │ └── Cast { cast_to: Date32, child: "1993-07-01" } + │ ├── Lt + │ │ ├── #4 + │ │ └── Add + │ │ ├── Cast { cast_to: Date32, child: "1993-07-01" } + │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) + │ └── #9 + └── RawDependentJoin { sq_type: Exists, cond: true, extern_cols: [ Extern(#0) ] } + ├── LogicalScan { table: orders } + └── LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15 ] } + └── LogicalFilter + ├── cond:And + │ ├── Eq + │ │ ├── #0 + │ │ └── Extern(#0) + │ └── Lt + │ ├── #11 + │ └── #12 + └── LogicalScan { table: lineitem } +PhysicalSort +├── exprs:SortOrder { order: Asc } +│ └── #0 +└── PhysicalAgg + ├── aggrs:Agg(Count) + │ └── [ 1(i64) ] + ├── groups: [ #5 ] + └── PhysicalFilter + ├── cond:And + │ ├── Geq + │ │ ├── #4 + │ │ └── Cast { cast_to: Date32, child: "1993-07-01" } + │ ├── Lt + │ │ ├── #4 + │ │ └── Add + │ │ ├── Cast { cast_to: Date32, child: "1993-07-01" } + │ │ └── INTERVAL_MONTH_DAY_NANO (3, 0, 0) + │ └── #9 + └── PhysicalNestedLoopJoin + ├── join_type: LeftMark + ├── cond:And + │ └── Eq + │ ├── #0 + │ └── #9 + ├── PhysicalScan { table: orders } + └── PhysicalProjection { exprs: [ #16, #0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15 ] } + └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } + ├── PhysicalFilter + │ ├── cond:Lt + │ │ ├── #11 + │ │ └── #12 + │ └── PhysicalScan { table: lineitem } + └── PhysicalAgg { aggrs: [], groups: [ #0 ] } + └── PhysicalScan { table: orders } +*/ + diff --git a/optd-sqlplannertest/tests/tpch/q4.yml b/optd-sqlplannertest/tests/tpch/q4.yml new file mode 100644 index 00000000..a30999b2 --- /dev/null +++ b/optd-sqlplannertest/tests/tpch/q4.yml @@ -0,0 +1,26 @@ +- sql: | + select + o_orderpriority, + count(*) as order_count + from + orders + where + o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) + group by + o_orderpriority + order by + o_orderpriority; + desc: TPC-H Q4 + before: ["include_sql:schema.sql"] + tasks: + - explain:logical_optd,physical_optd