Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 1da620f

Browse files
authored
Feat: simplify join cond rule and accepting all kinds of inner join conditions including non equal ones (#115)
# Major Changes - SimplifyJoinCondRule, just like SimplifyFilter in pr https://github.com/cmu-db/optd/pull/108 - Accepting all kinds of conditions for inner join from datafusion - before this pr, optd only supports inner join on equal conditions and inner join on True/False - after this pr, optd now supports queries like `select x inner join y on x.a=y.a and x.a<5`. These are good testcases for push filter into join and filter push down. P.S.: the core fix for merge group in this pr is based on pr https://github.com/cmu-db/optd/pull/114 --------- Signed-off-by: AveryQi115 <[email protected]>
1 parent 195626d commit 1da620f

File tree

7 files changed

+235
-34
lines changed

7 files changed

+235
-34
lines changed

optd-datafusion-bridge/src/into_optd.rs

+13-24
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ use datafusion::{
44
logical_expr::{self, logical_plan, LogicalPlan, Operator},
55
scalar::ScalarValue,
66
};
7-
use datafusion_expr::Expr as DFExpr;
87
use optd_core::rel_node::RelNode;
98
use optd_datafusion_repr::plan_nodes::{
109
BetweenExpr, BinOpExpr, BinOpType, CastExpr, ColumnRefExpr, ConstantExpr, Expr, ExprList,
@@ -314,35 +313,25 @@ impl OptdPlanContext<'_> {
314313
let expr = BinOpExpr::new(left, right, op).into_expr();
315314
log_ops.push(expr);
316315
}
316+
if node.filter.is_some() {
317+
let filter =
318+
self.conv_into_optd_expr(node.filter.as_ref().unwrap(), node.schema.as_ref())?;
319+
log_ops.push(filter);
320+
}
317321

318322
if log_ops.is_empty() {
319-
// optd currently only supports
320-
// 1. normal equal condition join
321-
// select * from a join b on a.id = b.id
322-
// 2. join on false/true
323-
// select * from a join b on false/true
324-
// 3. join on other literals or other filters are not supported
325-
// instead of converting them to a join on true, we bail out
326-
327-
match node.filter {
328-
Some(DFExpr::Literal(ScalarValue::Boolean(Some(val)))) => Ok(LogicalJoin::new(
329-
left,
330-
right,
331-
ConstantExpr::bool(val).into_expr(),
332-
join_type,
333-
)),
334-
None => Ok(LogicalJoin::new(
335-
left,
336-
right,
337-
ConstantExpr::bool(true).into_expr(),
338-
join_type,
339-
)),
340-
_ => bail!("unsupported join filter: {:?}", node.filter),
341-
}
323+
Ok(LogicalJoin::new(
324+
left,
325+
right,
326+
ConstantExpr::bool(true).into_expr(),
327+
join_type,
328+
))
342329
} else if log_ops.len() == 1 {
343330
Ok(LogicalJoin::new(left, right, log_ops.remove(0), join_type))
344331
} else {
345332
let expr_list = ExprList::new(log_ops);
333+
// the expr from filter is already flattened in conv_into_optd_expr
334+
let expr_list = flatten_nested_logical(LogOpType::And, expr_list);
346335
Ok(LogicalJoin::new(
347336
left,
348337
right,

optd-datafusion-repr/src/lib.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use properties::{
1818
use rules::{
1919
EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, EliminateFilterRule,
2020
EliminateJoinRule, EliminateLimitRule, HashJoinRule, JoinAssocRule, JoinCommuteRule,
21-
PhysicalConversionRule, ProjectionPullUpJoin, SimplifyFilterRule,
21+
PhysicalConversionRule, ProjectionPullUpJoin, SimplifyFilterRule, SimplifyJoinCondRule,
2222
};
2323

2424
pub use optd_core::rel_node::Value;
@@ -57,6 +57,7 @@ impl DatafusionOptimizer {
5757
let rules = PhysicalConversionRule::all_conversions();
5858
let mut rule_wrappers = vec![
5959
RuleWrapper::new_heuristic(Arc::new(SimplifyFilterRule::new())),
60+
RuleWrapper::new_heuristic(Arc::new(SimplifyJoinCondRule::new())),
6061
RuleWrapper::new_heuristic(Arc::new(EliminateFilterRule::new())),
6162
RuleWrapper::new_heuristic(Arc::new(EliminateJoinRule::new())),
6263
RuleWrapper::new_heuristic(Arc::new(EliminateLimitRule::new())),
@@ -66,8 +67,8 @@ impl DatafusionOptimizer {
6667
for rule in rules {
6768
rule_wrappers.push(RuleWrapper::new_cascades(rule));
6869
}
69-
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(HashJoinRule::new())));
70-
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(JoinCommuteRule::new())));
70+
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(HashJoinRule::new()))); // 17
71+
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(JoinCommuteRule::new()))); // 18
7172
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(JoinAssocRule::new())));
7273
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(
7374
ProjectionPullUpJoin::new(),

optd-datafusion-repr/src/rules.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ pub use eliminate_duplicated_expr::{
1111
EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule,
1212
};
1313
pub use eliminate_limit::EliminateLimitRule;
14-
pub use filter::{EliminateFilterRule, SimplifyFilterRule};
14+
pub use filter::{EliminateFilterRule, SimplifyFilterRule, SimplifyJoinCondRule};
1515
pub use joins::{
1616
EliminateJoinRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, ProjectionPullUpJoin,
1717
};

optd-datafusion-repr/src/rules/filter.rs

+34-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ use std::sync::Arc;
33

44
use super::macros::define_rule;
55
use crate::plan_nodes::{
6-
ConstantExpr, ConstantType, Expr, ExprList, LogOpExpr, LogOpType, LogicalEmptyRelation,
7-
OptRelNode, OptRelNodeTyp,
6+
ConstantExpr, ConstantType, Expr, ExprList, JoinType, LogOpExpr, LogOpType,
7+
LogicalEmptyRelation, LogicalJoin, OptRelNode, OptRelNodeTyp, PlanNode,
88
};
99
use crate::properties::schema::SchemaPropertyBuilder;
1010
use crate::OptRelNodeRef;
@@ -120,6 +120,38 @@ fn apply_simplify_filter(
120120
}
121121
}
122122

123+
// Same as SimplifyFilterRule, but for innerJoin conditions
124+
define_rule!(
125+
SimplifyJoinCondRule,
126+
apply_simplify_join_cond,
127+
(Join(JoinType::Inner), left, right, [cond])
128+
);
129+
130+
fn apply_simplify_join_cond(
131+
_optimizer: &impl Optimizer<OptRelNodeTyp>,
132+
SimplifyJoinCondRulePicks { left, right, cond }: SimplifyJoinCondRulePicks,
133+
) -> Vec<RelNode<OptRelNodeTyp>> {
134+
match cond.typ {
135+
OptRelNodeTyp::LogOp(_) => {
136+
let mut changed = false;
137+
let new_log_expr = simplify_log_expr(Arc::new(cond), &mut changed);
138+
if changed {
139+
let join_node = LogicalJoin::new(
140+
PlanNode::from_group(left.into()),
141+
PlanNode::from_group(right.into()),
142+
Expr::from_rel_node(new_log_expr).unwrap(),
143+
JoinType::Inner,
144+
);
145+
return vec![join_node.into_rel_node().as_ref().clone()];
146+
}
147+
vec![]
148+
}
149+
_ => {
150+
vec![]
151+
}
152+
}
153+
}
154+
123155
define_rule!(
124156
EliminateFilterRule,
125157
apply_eliminate_filter,

optd-sqlplannertest/tests/filter.planner.sql

+137
Original file line numberDiff line numberDiff line change
@@ -183,3 +183,140 @@ PhysicalProjection { exprs: [ #0, #1, #2, #3 ] }
183183
2 2 2 202
184184
*/
185185

186+
-- Test SimplifyJoinCondRule and EliminateJoinRule (false filter to empty relation)
187+
select * from t1 inner join t2 on t1v1 = t2v1 and false;
188+
189+
/*
190+
LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
191+
└── LogicalJoin
192+
├── join_type: Inner
193+
├── cond:And
194+
│ ├── Eq
195+
│ │ ├── #0
196+
│ │ └── #2
197+
│ └── false
198+
├── LogicalScan { table: t1 }
199+
└── LogicalScan { table: t2 }
200+
PhysicalProjection { exprs: [ #0, #1, #2, #3 ] }
201+
└── PhysicalEmptyRelation { produce_one_row: false }
202+
*/
203+
204+
-- Test SimplifyJoinCondRule (skip true filter for and)
205+
select * from t1 inner join t2 on t1v1 = t2v1 and t1v1 = t2v3 and true;
206+
207+
/*
208+
LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
209+
└── LogicalJoin
210+
├── join_type: Inner
211+
├── cond:And
212+
│ ├── Eq
213+
│ │ ├── #0
214+
│ │ └── #2
215+
│ ├── Eq
216+
│ │ ├── #0
217+
│ │ └── #3
218+
│ └── true
219+
├── LogicalScan { table: t1 }
220+
└── LogicalScan { table: t2 }
221+
PhysicalProjection { exprs: [ #0, #1, #2, #3 ] }
222+
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #0 ], right_keys: [ #0, #1 ] }
223+
├── PhysicalScan { table: t1 }
224+
└── PhysicalScan { table: t2 }
225+
*/
226+
227+
-- Test SimplifyJoinCondRule (skip true filter for and)
228+
select * from t1 inner join t2 on t1v1 = t2v1 or t1v1 = t2v3 and true;
229+
230+
/*
231+
LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
232+
└── LogicalJoin
233+
├── join_type: Inner
234+
├── cond:Or
235+
│ ├── Eq
236+
│ │ ├── #0
237+
│ │ └── #2
238+
│ └── And
239+
│ ├── Eq
240+
│ │ ├── #0
241+
│ │ └── #3
242+
│ └── true
243+
├── LogicalScan { table: t1 }
244+
└── LogicalScan { table: t2 }
245+
PhysicalProjection { exprs: [ #0, #1, #2, #3 ] }
246+
└── PhysicalNestedLoopJoin
247+
├── join_type: Inner
248+
├── cond:Or
249+
│ ├── Eq
250+
│ │ ├── #0
251+
│ │ └── #2
252+
│ └── Eq
253+
│ ├── #0
254+
│ └── #3
255+
├── PhysicalScan { table: t1 }
256+
└── PhysicalScan { table: t2 }
257+
0 0 0 200
258+
1 1 1 201
259+
2 2 2 202
260+
*/
261+
262+
-- Test SimplifyJoinCondRule, EliminateFilter (repace true filter for or)
263+
select * from t1 inner join t2 on t1v1 = t2v1 or t1v1 = t2v3 or true;
264+
265+
/*
266+
LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
267+
└── LogicalJoin
268+
├── join_type: Inner
269+
├── cond:Or
270+
│ ├── Eq
271+
│ │ ├── #0
272+
│ │ └── #2
273+
│ ├── Eq
274+
│ │ ├── #0
275+
│ │ └── #3
276+
│ └── true
277+
├── LogicalScan { table: t1 }
278+
└── LogicalScan { table: t2 }
279+
PhysicalProjection { exprs: [ #0, #1, #2, #3 ] }
280+
└── PhysicalNestedLoopJoin { join_type: Cross, cond: true }
281+
├── PhysicalScan { table: t1 }
282+
└── PhysicalScan { table: t2 }
283+
0 0 0 200
284+
0 0 1 201
285+
0 0 2 202
286+
1 1 0 200
287+
1 1 1 201
288+
1 1 2 202
289+
2 2 0 200
290+
2 2 1 201
291+
2 2 2 202
292+
*/
293+
294+
-- Test SimplifyJoinCondRule (remove duplicates)
295+
select * from t1 inner join t2 on t1v1 = t2v1 or t1v1 = t2v1 and t1v1 = t2v1;
296+
297+
/*
298+
LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
299+
└── LogicalJoin
300+
├── join_type: Inner
301+
├── cond:Or
302+
│ ├── Eq
303+
│ │ ├── #0
304+
│ │ └── #2
305+
│ └── And
306+
│ ├── Eq
307+
│ │ ├── #0
308+
│ │ └── #2
309+
│ └── Eq
310+
│ ├── #0
311+
│ └── #2
312+
├── LogicalScan { table: t1 }
313+
└── LogicalScan { table: t2 }
314+
PhysicalProjection { exprs: [ #0, #1, #2, #3 ] }
315+
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
316+
├── PhysicalScan { table: t1 }
317+
└── PhysicalScan { table: t2 }
318+
0 0 0 200
319+
1 1 1 201
320+
2 2 2 202
321+
*/
322+

optd-sqlplannertest/tests/filter.yml

+30
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,36 @@
4444
- sql: |
4545
select * from t1, t2 where t1v1 = t2v1 or t1v1 = t2v1 and t1v1 = t2v1;
4646
desc: Test SimplifyFilterRule (remove duplicates)
47+
tasks:
48+
- explain:logical_optd,physical_optd
49+
- execute
50+
- sql: |
51+
select * from t1 inner join t2 on t1v1 = t2v1 and false;
52+
desc: Test SimplifyJoinCondRule and EliminateJoinRule (false filter to empty relation)
53+
tasks:
54+
- explain:logical_optd,physical_optd
55+
- execute
56+
- sql: |
57+
select * from t1 inner join t2 on t1v1 = t2v1 and t1v1 = t2v3 and true;
58+
desc: Test SimplifyJoinCondRule (skip true filter for and)
59+
tasks:
60+
- explain:logical_optd,physical_optd
61+
- execute
62+
- sql: |
63+
select * from t1 inner join t2 on t1v1 = t2v1 or t1v1 = t2v3 and true;
64+
desc: Test SimplifyJoinCondRule (skip true filter for and)
65+
tasks:
66+
- explain:logical_optd,physical_optd
67+
- execute
68+
- sql: |
69+
select * from t1 inner join t2 on t1v1 = t2v1 or t1v1 = t2v3 or true;
70+
desc: Test SimplifyJoinCondRule, EliminateFilter (repace true filter for or)
71+
tasks:
72+
- explain:logical_optd,physical_optd
73+
- execute
74+
- sql: |
75+
select * from t1 inner join t2 on t1v1 = t2v1 or t1v1 = t2v1 and t1v1 = t2v1;
76+
desc: Test SimplifyJoinCondRule (remove duplicates)
4777
tasks:
4878
- explain:logical_optd,physical_optd
4979
- execute

optd-sqlplannertest/tests/tpch.planner.sql

+16-4
Original file line numberDiff line numberDiff line change
@@ -1937,9 +1937,13 @@ LogicalProjection
19371937
└── LogicalProjection { exprs: [ #1 ] }
19381938
└── LogicalJoin
19391939
├── join_type: Inner
1940-
├── cond:Eq
1941-
│ ├── #2
1942-
│ └── #4
1940+
├── cond:And
1941+
│ ├── Eq
1942+
│ │ ├── #2
1943+
│ │ └── #4
1944+
│ └── Lt
1945+
│ ├── Cast { cast_to: Decimal128(30, 15), expr: #0 }
1946+
│ └── #3
19431947
├── LogicalProjection { exprs: [ #1, #2, #3 ] }
19441948
│ └── LogicalJoin
19451949
│ ├── join_type: Inner
@@ -1986,7 +1990,15 @@ PhysicalProjection
19861990
│ └── [ #0 ]
19871991
├── groups: []
19881992
└── PhysicalProjection { exprs: [ #1 ] }
1989-
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #1 ] }
1993+
└── PhysicalNestedLoopJoin
1994+
├── join_type: Inner
1995+
├── cond:And
1996+
│ ├── Eq
1997+
│ │ ├── #2
1998+
│ │ └── #0
1999+
│ └── Lt
2000+
│ ├── Cast { cast_to: Decimal128(30, 15), expr: #0 }
2001+
│ └── #3
19902002
├── PhysicalProjection { exprs: [ #1, #2, #3 ] }
19912003
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
19922004
│ ├── PhysicalProjection { exprs: [ #1, #4, #5 ] }

0 commit comments

Comments
 (0)