Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 5360b89

Browse files
committed
fix(df-repr): join assoc rule expr, rm exploding rules (#223)
Signed-off-by: Alex Chi <[email protected]>
1 parent a516e0d commit 5360b89

13 files changed

+338
-341
lines changed

optd-datafusion-repr/src/lib.rs

+1-6
Original file line numberDiff line numberDiff line change
@@ -121,15 +121,9 @@ impl DatafusionOptimizer {
121121
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(
122122
rules::JoinCommuteRule::new(),
123123
)));
124-
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(
125-
rules::InnerCrossJoinRule::new(),
126-
)));
127124
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(
128125
rules::JoinAssocRule::new(),
129126
)));
130-
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(
131-
rules::JoinAbsorbFilterRule::new(),
132-
)));
133127
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(
134128
rules::ProjectionPullUpJoin::new(),
135129
)));
@@ -186,6 +180,7 @@ impl DatafusionOptimizer {
186180
panic_on_budget: false,
187181
partial_explore_iter: Some(1 << 20),
188182
partial_explore_space: Some(1 << 10),
183+
disable_pruning: false,
189184
},
190185
),
191186
heuristic_optimizer: HeuristicsOptimizer::new_with_rules(

optd-datafusion-repr/src/rules/joins.rs

+12-45
Original file line numberDiff line numberDiff line change
@@ -8,52 +8,11 @@ use super::macros::{define_impl_rule, define_rule};
88
use crate::plan_nodes::{
99
ArcDfPlanNode, BinOpPred, BinOpType, ColumnRefPred, ConstantPred, ConstantType, DfNodeType,
1010
DfPredType, DfReprPlanNode, DfReprPredNode, JoinType, ListPred, LogOpType,
11-
LogicalEmptyRelation, LogicalFilter, LogicalJoin, LogicalProjection, PhysicalHashJoin, PredExt,
11+
LogicalEmptyRelation, LogicalJoin, LogicalProjection, PhysicalHashJoin, PredExt,
1212
};
1313
use crate::properties::schema::Schema;
1414
use crate::OptimizerExt;
1515

16-
// A cross join B -> A inner join B
17-
define_rule!(
18-
InnerCrossJoinRule,
19-
apply_inner_cross_join,
20-
(Join(JoinType::Cross), left, right)
21-
);
22-
23-
fn apply_inner_cross_join(
24-
_: &impl Optimizer<DfNodeType>,
25-
binding: ArcDfPlanNode,
26-
) -> Vec<PlanNodeOrGroup<DfNodeType>> {
27-
let join = LogicalJoin::from_plan_node(binding).unwrap();
28-
let node = LogicalJoin::new_unchecked(join.left(), join.right(), join.cond(), JoinType::Inner);
29-
vec![node.into_plan_node().into()]
30-
}
31-
32-
// Filter (A inner join B on true) cond -> A inner join B on cond
33-
define_rule!(
34-
JoinAbsorbFilterRule,
35-
apply_join_absorb_filter,
36-
(Filter, (Join(JoinType::Inner), left, right))
37-
);
38-
39-
fn apply_join_absorb_filter(
40-
_: &impl Optimizer<DfNodeType>,
41-
binding: ArcDfPlanNode,
42-
) -> Vec<PlanNodeOrGroup<DfNodeType>> {
43-
let filter = LogicalFilter::from_plan_node(binding).unwrap();
44-
let join = LogicalJoin::from_plan_node(filter.child().unwrap_plan_node()).unwrap();
45-
let join_cond = join.cond();
46-
let filter_cond = filter.cond();
47-
if let Some(constant) = ConstantPred::from_pred_node(join_cond) {
48-
if constant.value().as_bool() {
49-
let node =
50-
LogicalJoin::new_unchecked(join.left(), join.right(), filter_cond, JoinType::Inner);
51-
return vec![node.into_plan_node().into()];
52-
}
53-
}
54-
vec![]
55-
}
56-
5716
// A join B -> B join A
5817
define_rule!(
5918
JoinCommuteRule,
@@ -112,7 +71,15 @@ fn apply_eliminate_join(
11271
if let DfPredType::Constant(const_type) = cond.typ {
11372
if const_type == ConstantType::Bool {
11473
if let Some(ref data) = cond.data {
115-
if !data.as_bool() {
74+
if data.as_bool() {
75+
let node = LogicalJoin::new_unchecked(
76+
left,
77+
right,
78+
ConstantPred::bool(true).into_pred_node(),
79+
JoinType::Cross,
80+
);
81+
return vec![node.into_plan_node().into()];
82+
} else {
11683
// No need to handle schema here, as all exprs in the same group
11784
// will have same logical properties
11885
let mut left_fields = optimizer.get_schema_of(left.clone()).fields;
@@ -146,9 +113,9 @@ fn apply_join_assoc(
146113
let join2 = LogicalJoin::from_plan_node(join1.left().unwrap_plan_node()).unwrap();
147114
let a = join2.left();
148115
let b = join2.right();
149-
let cond2 = join2.cond();
116+
let cond1 = join2.cond();
150117
let a_schema = optimizer.get_schema_of(a.clone());
151-
let cond1 = join1.cond();
118+
let cond2 = join1.cond();
152119

153120
let Some(cond2) = cond2.rewrite_column_refs(&mut |idx| {
154121
if idx < a_schema.len() {

optd-sqlplannertest/tests/basic/cross_product.planner.sql

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ LogicalProjection { exprs: [ #0, #1 ] }
1717
└── LogicalJoin { join_type: Cross, cond: true }
1818
├── LogicalScan { table: t1 }
1919
└── LogicalScan { table: t2 }
20-
PhysicalNestedLoopJoin { join_type: Inner, cond: true }
20+
PhysicalNestedLoopJoin { join_type: Cross, cond: true }
2121
├── PhysicalScan { table: t1 }
2222
└── PhysicalScan { table: t2 }
2323
0 0

optd-sqlplannertest/tests/basic/filter.planner.sql

+3-3
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ PhysicalFilter
9797
│ └── Eq
9898
│ ├── #0
9999
│ └── #3
100-
└── PhysicalNestedLoopJoin { join_type: Inner, cond: true }
100+
└── PhysicalNestedLoopJoin { join_type: Cross, cond: true }
101101
├── PhysicalScan { table: t1 }
102102
└── PhysicalScan { table: t2 }
103103
0 0 0 200
@@ -122,7 +122,7 @@ LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
122122
└── LogicalJoin { join_type: Cross, cond: true }
123123
├── LogicalScan { table: t1 }
124124
└── LogicalScan { table: t2 }
125-
PhysicalNestedLoopJoin { join_type: Inner, cond: true }
125+
PhysicalNestedLoopJoin { join_type: Cross, cond: true }
126126
├── PhysicalScan { table: t1 }
127127
└── PhysicalScan { table: t2 }
128128
0 0 0 200
@@ -254,7 +254,7 @@ LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
254254
│ └── true
255255
├── LogicalScan { table: t1 }
256256
└── LogicalScan { table: t2 }
257-
PhysicalNestedLoopJoin { join_type: Inner, cond: true }
257+
PhysicalNestedLoopJoin { join_type: Cross, cond: true }
258258
├── PhysicalScan { table: t1 }
259259
└── PhysicalScan { table: t2 }
260260
0 0 0 200

optd-sqlplannertest/tests/joins/join_enumerate.planner.sql

+20-10
Original file line numberDiff line numberDiff line change
@@ -24,44 +24,54 @@ select * from t2, t1 where t1v1 = t2v1;
2424
2 202 2 2
2525
*/
2626

27-
-- Test whether the optimizer enumerates all 3-join orders.
27+
-- Test whether the optimizer enumerates all 3-join orders. (It should)
28+
select * from t2, t1, t3 where t1v1 = t2v1 and t1v1 = t3v2;
29+
30+
/*
31+
(Join t2 (Join t1 t3))
32+
(Join t2 (Join t3 t1))
33+
(Join t3 (Join t1 t2))
34+
(Join t3 (Join t2 t1))
35+
(Join (Join t1 t2) t3)
36+
(Join (Join t1 t3) t2)
37+
(Join (Join t2 t1) t3)
38+
(Join (Join t3 t1) t2)
39+
40+
0 200 0 0 0 300
41+
1 201 1 1 1 301
42+
2 202 2 2 2 302
43+
*/
44+
45+
-- Test whether the optimizer enumerates all 3-join orders. (It don't currently)
2846
select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2;
2947

3048
/*
31-
(Join t1 (Join t2 t3))
32-
(Join t1 (Join t3 t2))
3349
(Join t2 (Join t1 t3))
3450
(Join t2 (Join t3 t1))
3551
(Join t3 (Join t1 t2))
3652
(Join t3 (Join t2 t1))
3753
(Join (Join t1 t2) t3)
3854
(Join (Join t1 t3) t2)
3955
(Join (Join t2 t1) t3)
40-
(Join (Join t2 t3) t1)
4156
(Join (Join t3 t1) t2)
42-
(Join (Join t3 t2) t1)
4357
4458
0 200 0 0 0 300
4559
1 201 1 1 1 301
4660
2 202 2 2 2 302
4761
*/
4862

49-
-- Test whether the optimizer enumerates all 3-join orders.
63+
-- Test whether the optimizer enumerates all 3-join orders. (It don't currently)
5064
select * from t1, t2, t3 where t1v1 = t2v1 and t1v2 = t3v2;
5165

5266
/*
53-
(Join t1 (Join t2 t3))
54-
(Join t1 (Join t3 t2))
5567
(Join t2 (Join t1 t3))
5668
(Join t2 (Join t3 t1))
5769
(Join t3 (Join t1 t2))
5870
(Join t3 (Join t2 t1))
5971
(Join (Join t1 t2) t3)
6072
(Join (Join t1 t3) t2)
6173
(Join (Join t2 t1) t3)
62-
(Join (Join t2 t3) t1)
6374
(Join (Join t3 t1) t2)
64-
(Join (Join t3 t2) t1)
6575
6676
0 0 0 200 0 300
6777
1 1 1 201 1 301

optd-sqlplannertest/tests/joins/join_enumerate.yml

+12-5
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,24 @@
1111
select * from t2, t1 where t1v1 = t2v1;
1212
desc: Test whether the optimizer enumerates all 2-join orders.
1313
tasks:
14-
- explain:logical_join_orders
14+
# well actually pruning doesn't matter b/c join order is logical, but we are now missing join orders with t1 as the outer table
15+
- explain[disable_pruning]:logical_join_orders
16+
- execute
17+
- sql: |
18+
select * from t2, t1, t3 where t1v1 = t2v1 and t1v1 = t3v2;
19+
desc: Test whether the optimizer enumerates all 3-join orders. (It should)
20+
tasks:
21+
- explain[disable_pruning]:logical_join_orders
1522
- execute
1623
- sql: |
1724
select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2;
18-
desc: Test whether the optimizer enumerates all 3-join orders.
25+
desc: Test whether the optimizer enumerates all 3-join orders. (It don't currently)
1926
tasks:
20-
- explain:logical_join_orders
27+
- explain[disable_pruning]:logical_join_orders
2128
- execute
2229
- sql: |
2330
select * from t1, t2, t3 where t1v1 = t2v1 and t1v2 = t3v2;
24-
desc: Test whether the optimizer enumerates all 3-join orders.
31+
desc: Test whether the optimizer enumerates all 3-join orders. (It don't currently)
2532
tasks:
26-
- explain:logical_join_orders
33+
- explain[disable_pruning]:logical_join_orders
2734
- execute
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
-- (no id or description)
2+
create table t1(a int, b int);
3+
create table t2(c int, d int);
4+
create table t3(e int, f int);
5+
create table t4(g int, h int);
6+
7+
/*
8+
9+
*/
10+
11+
-- test 3-way join
12+
select * from t1, t2, t3 where a = c AND d = e;
13+
14+
/*
15+
LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5 ] }
16+
└── LogicalFilter
17+
├── cond:And
18+
│ ├── Eq
19+
│ │ ├── #0
20+
│ │ └── #2
21+
│ └── Eq
22+
│ ├── #3
23+
│ └── #4
24+
└── LogicalJoin { join_type: Cross, cond: true }
25+
├── LogicalJoin { join_type: Cross, cond: true }
26+
│ ├── LogicalScan { table: t1 }
27+
│ └── LogicalScan { table: t2 }
28+
└── LogicalScan { table: t3 }
29+
PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
30+
├── PhysicalScan { table: t1 }
31+
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] }
32+
├── PhysicalScan { table: t2 }
33+
└── PhysicalScan { table: t3 }
34+
*/
35+
36+
-- test 3-way join
37+
select * from t1, t2, t3 where a = c AND b = e;
38+
39+
/*
40+
LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5 ] }
41+
└── LogicalFilter
42+
├── cond:And
43+
│ ├── Eq
44+
│ │ ├── #0
45+
│ │ └── #2
46+
│ └── Eq
47+
│ ├── #1
48+
│ └── #4
49+
└── LogicalJoin { join_type: Cross, cond: true }
50+
├── LogicalJoin { join_type: Cross, cond: true }
51+
│ ├── LogicalScan { table: t1 }
52+
│ └── LogicalScan { table: t2 }
53+
└── LogicalScan { table: t3 }
54+
PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] }
55+
├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
56+
│ ├── PhysicalScan { table: t1 }
57+
│ └── PhysicalScan { table: t2 }
58+
└── PhysicalScan { table: t3 }
59+
*/
60+
61+
-- test 4-way join
62+
select * from t1, t2, t3, t4 where a = c AND b = e AND f = g;
63+
64+
/*
65+
LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7 ] }
66+
└── LogicalFilter
67+
├── cond:And
68+
│ ├── Eq
69+
│ │ ├── #0
70+
│ │ └── #2
71+
│ ├── Eq
72+
│ │ ├── #1
73+
│ │ └── #4
74+
│ └── Eq
75+
│ ├── #5
76+
│ └── #6
77+
└── LogicalJoin { join_type: Cross, cond: true }
78+
├── LogicalJoin { join_type: Cross, cond: true }
79+
│ ├── LogicalJoin { join_type: Cross, cond: true }
80+
│ │ ├── LogicalScan { table: t1 }
81+
│ │ └── LogicalScan { table: t2 }
82+
│ └── LogicalScan { table: t3 }
83+
└── LogicalScan { table: t4 }
84+
PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] }
85+
├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
86+
│ ├── PhysicalScan { table: t1 }
87+
│ └── PhysicalScan { table: t2 }
88+
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] }
89+
├── PhysicalScan { table: t3 }
90+
└── PhysicalScan { table: t4 }
91+
*/
92+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
- sql: |
2+
create table t1(a int, b int);
3+
create table t2(c int, d int);
4+
create table t3(e int, f int);
5+
create table t4(g int, h int);
6+
tasks:
7+
- execute
8+
- sql: |
9+
select * from t1, t2, t3 where a = c AND d = e;
10+
desc: test 3-way join
11+
tasks:
12+
- explain:logical_optd,physical_optd
13+
- sql: |
14+
select * from t1, t2, t3 where a = c AND b = e;
15+
desc: test 3-way join
16+
tasks:
17+
- explain:logical_optd,physical_optd
18+
- sql: |
19+
select * from t1, t2, t3, t4 where a = c AND b = e AND f = g;
20+
desc: test 4-way join
21+
tasks:
22+
- explain:logical_optd,physical_optd

optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql

+16-16
Original file line numberDiff line numberDiff line change
@@ -135,27 +135,27 @@ LogicalProjection { exprs: [ #0, #1 ] }
135135
└── LogicalJoin { join_type: Cross, cond: true }
136136
├── LogicalScan { table: t2 }
137137
└── LogicalScan { table: t3 }
138-
PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=9023,io=4000}, stat: {row_cnt=1} }
139-
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=9020,io=4000}, stat: {row_cnt=1} }
138+
PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=9021,io=4000}, stat: {row_cnt=1} }
139+
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=9018,io=4000}, stat: {row_cnt=1} }
140140
├── PhysicalAgg
141141
│ ├── aggrs:Agg(Sum)
142142
│ │ └── [ Cast { cast_to: Int64, child: #2 } ]
143143
│ ├── groups: [ #1 ]
144-
│ ├── cost: {compute=8018,io=3000}
144+
│ ├── cost: {compute=8016,io=3000}
145145
│ ├── stat: {row_cnt=1}
146-
│ └── PhysicalProjection { exprs: [ #2, #0, #1, #3, #4 ], cost: {compute=8010,io=3000}, stat: {row_cnt=1} }
147-
── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ], cost: {compute=8004,io=3000}, stat: {row_cnt=1} }
148-
── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=7002,io=2000}, stat: {row_cnt=1} }
149-
├── PhysicalFilter
150-
│ ├── cond:Gt
151-
│ │ ├── #0
152-
│ │ └── 100(i64)
153-
│ ├── cost: {compute=3000,io=1000}
154-
│ ├── stat: {row_cnt=1}
155-
│ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
156-
└── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} }
157-
└── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
158-
└── PhysicalScan { table: t3, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
146+
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ], cost: {compute=8008,io=3000}, stat: {row_cnt=1} }
147+
── PhysicalProjection { exprs: [ #2, #0, #1 ], cost: {compute=7006,io=2000}, stat: {row_cnt=1} }
148+
── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=7002,io=2000}, stat: {row_cnt=1} }
149+
├── PhysicalFilter
150+
│ ├── cond:Gt
151+
│ │ ├── #0
152+
│ │ └── 100(i64)
153+
│ ├── cost: {compute=3000,io=1000}
154+
│ ├── stat: {row_cnt=1}
155+
│ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
156+
└── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} }
157+
└── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
158+
│ └── PhysicalScan { table: t3, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
159159
└── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
160160
*/
161161

0 commit comments

Comments
 (0)