Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit e3c8f2f

Browse files
authored
Merge pull request #49 from cmu-db/logical_optimizer_rules
feat: [Logical Optimizer]Eliminate Join Rule
2 parents d36dfd5 + 08791ad commit e3c8f2f

File tree

8 files changed

+82
-45
lines changed

8 files changed

+82
-45
lines changed

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

optd-datafusion-bridge/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ edition = "2021"
88
[dependencies]
99
arrow-schema = "*"
1010
datafusion = "32.0.0"
11+
datafusion-expr = "32.0.0"
1112
async-trait = "0.1"
1213
optd-core = { path = "../optd-core" }
1314
optd-datafusion-repr = { path = "../optd-datafusion-repr" }

optd-datafusion-bridge/src/into_optd.rs

+29-7
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use datafusion::{
44
logical_expr::{self, logical_plan, LogicalPlan, Operator},
55
scalar::ScalarValue,
66
};
7+
use datafusion_expr::Expr as DFExpr;
78
use optd_core::rel_node::RelNode;
89
use optd_datafusion_repr::plan_nodes::{
910
BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, Expr, ExprList, FuncExpr, FuncType,
@@ -117,7 +118,7 @@ impl OptdPlanContext<'_> {
117118
expr,
118119
)
119120
.into_expr())
120-
}
121+
}
121122
_ => bail!("Unsupported expression: {:?}", expr),
122123
}
123124
}
@@ -215,12 +216,33 @@ impl OptdPlanContext<'_> {
215216
}
216217

217218
if log_ops.is_empty() {
218-
Ok(LogicalJoin::new(
219-
left,
220-
right,
221-
ConstantExpr::bool(true).into_expr(),
222-
join_type,
223-
))
219+
// optd currently only supports
220+
// 1. normal equal condition join
221+
// select * from a join b on a.id = b.id
222+
// 2. join on false/true
223+
// select * from a join b on false/true
224+
// 3. join on other literals or other filters are not supported
225+
// instead of converting them to a join on true, we bail out
226+
227+
match node.filter {
228+
Some(DFExpr::Literal(ScalarValue::Boolean(Some(val)))) => {
229+
return Ok(LogicalJoin::new(
230+
left,
231+
right,
232+
ConstantExpr::bool(val).into_expr(),
233+
join_type,
234+
));
235+
}
236+
None => {
237+
return Ok(LogicalJoin::new(
238+
left,
239+
right,
240+
ConstantExpr::bool(true).into_expr(),
241+
join_type,
242+
));
243+
}
244+
_ => bail!("unsupported join filter: {:?}", node.filter),
245+
}
224246
} else if log_ops.len() == 1 {
225247
Ok(LogicalJoin::new(left, right, log_ops.remove(0), join_type))
226248
} else {

optd-datafusion-repr/src/lib.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ use optd_core::cascades::{CascadesOptimizer, GroupId, OptimizerProperties};
88
use plan_nodes::{OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode};
99
use properties::schema::{Catalog, SchemaPropertyBuilder};
1010
use rules::{
11-
HashJoinRule, JoinAssocRule, JoinCommuteRule, PhysicalConversionRule, ProjectionPullUpJoin,
11+
EliminateJoinRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, PhysicalConversionRule,
12+
ProjectionPullUpJoin,
1213
};
1314

1415
pub use adaptive::PhysicalCollector;
@@ -46,6 +47,8 @@ impl DatafusionOptimizer {
4647
rules.push(Arc::new(JoinCommuteRule::new()));
4748
rules.push(Arc::new(JoinAssocRule::new()));
4849
rules.push(Arc::new(ProjectionPullUpJoin::new()));
50+
rules.push(Arc::new(EliminateJoinRule::new()));
51+
4952
let cost_model = AdaptiveCostModel::new(50);
5053
Self {
5154
runtime_statistics: cost_model.get_runtime_map(),

optd-datafusion-repr/src/rules.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,7 @@ mod macros;
44
mod physical;
55

66
// pub use filter_join::FilterJoinPullUpRule;
7-
pub use joins::{HashJoinRule, JoinAssocRule, JoinCommuteRule, ProjectionPullUpJoin};
7+
pub use joins::{
8+
EliminateJoinRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, ProjectionPullUpJoin,
9+
};
810
pub use physical::PhysicalConversionRule;

optd-datafusion-repr/src/rules/joins.rs

+42-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::collections::HashMap;
22
use std::sync::Arc;
3+
use std::vec;
34

45
use itertools::Itertools;
56
use optd_core::optimizer::Optimizer;
@@ -8,8 +9,9 @@ use optd_core::rules::{Rule, RuleMatcher};
89

910
use super::macros::{define_impl_rule, define_rule};
1011
use crate::plan_nodes::{
11-
BinOpExpr, BinOpType, ColumnRefExpr, Expr, ExprList, JoinType, LogicalJoin, LogicalProjection,
12-
OptRelNode, OptRelNodeTyp, PhysicalHashJoin, PlanNode,
12+
BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ConstantType, Expr, ExprList, JoinType,
13+
LogicalEmptyRelation, LogicalJoin, LogicalProjection, OptRelNode, OptRelNodeTyp,
14+
PhysicalHashJoin, PlanNode,
1315
};
1416
use crate::properties::schema::SchemaPropertyBuilder;
1517

@@ -78,6 +80,44 @@ fn apply_join_commute(
7880
vec![node.as_ref().clone()]
7981
}
8082

83+
define_rule!(
84+
EliminateJoinRule,
85+
apply_eliminate_join,
86+
(Join(JoinType::Inner), left, right, [cond])
87+
);
88+
89+
/// Eliminate logical join with constant predicates
90+
/// True predicates becomes CrossJoin (not yet implemented)
91+
/// False predicates become EmptyRelation (not yet implemented)
92+
#[allow(unused_variables)]
93+
fn apply_eliminate_join(
94+
optimizer: &impl Optimizer<OptRelNodeTyp>,
95+
EliminateJoinRulePicks { left, right, cond }: EliminateJoinRulePicks,
96+
) -> Vec<RelNode<OptRelNodeTyp>> {
97+
if let OptRelNodeTyp::Constant(const_type) = cond.typ {
98+
if const_type == ConstantType::Bool {
99+
if let Some(data) = cond.data {
100+
if data.as_bool() {
101+
// change it to cross join if filter is always true
102+
let node = LogicalJoin::new(
103+
PlanNode::from_group(left.into()),
104+
PlanNode::from_group(right.into()),
105+
ConstantExpr::bool(true).into_expr(),
106+
JoinType::Cross,
107+
);
108+
return vec![node.into_rel_node().as_ref().clone()];
109+
} else {
110+
// No need to handle schema here, as all exprs in the same group
111+
// will have same logical properties
112+
let node = LogicalEmptyRelation::new(false);
113+
return vec![node.into_rel_node().as_ref().clone()];
114+
}
115+
}
116+
}
117+
}
118+
vec![]
119+
}
120+
81121
// (A join B) join C -> A join (B join C)
82122
define_rule!(
83123
JoinAssocRule,

optd-sqlplannertest/tests/empty_relation.planner.sql

+2-33
Original file line numberDiff line numberDiff line change
@@ -33,44 +33,13 @@ PhysicalProjection
3333
-- Test whether the optimizer eliminates join to empty relation
3434
select * from t1 inner join t2 on false;
3535
select 64+1 from t1 inner join t2 on false;
36-
select 64+1 from t1 inner join t2 on 1=0;
3736

3837
/*
3938
LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
40-
└── LogicalJoin { join_type: Inner, cond: true }
39+
└── LogicalJoin { join_type: Inner, cond: false }
4140
├── LogicalScan { table: t1 }
4241
└── LogicalScan { table: t2 }
4342
PhysicalProjection { exprs: [ #0, #1, #2, #3 ] }
44-
└── PhysicalProjection { exprs: [ #2, #3, #0, #1 ] }
45-
└── PhysicalNestedLoopJoin { join_type: Inner, cond: true }
46-
├── PhysicalScan { table: t2 }
47-
└── PhysicalScan { table: t1 }
48-
0 0 0 200
49-
0 0 1 201
50-
0 0 2 202
51-
1 1 0 200
52-
1 1 1 201
53-
1 1 2 202
54-
2 2 0 200
55-
2 2 1 201
56-
2 2 2 202
57-
65
58-
65
59-
65
60-
65
61-
65
62-
65
63-
65
64-
65
65-
65
66-
65
67-
65
68-
65
69-
65
70-
65
71-
65
72-
65
73-
65
74-
65
43+
└── PhysicalEmptyRelation { produce_one_row: false }
7544
*/
7645

optd-sqlplannertest/tests/empty_relation.yml

-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
- sql: |
1616
select * from t1 inner join t2 on false;
1717
select 64+1 from t1 inner join t2 on false;
18-
select 64+1 from t1 inner join t2 on 1=0;
1918
desc: Test whether the optimizer eliminates join to empty relation
2019
tasks:
2120
- explain:logical_optd,physical_optd

0 commit comments

Comments
 (0)