Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit d340bbb

Browse files
authored
feat(df-repr): add project elimination rule, deprecate join order enumeration (#192)
Also removed a major perf issue on enumerating join orders. All queries run super fast with this functionality disabled. We need to find a more efficient way to enumerate join orders. --------- Signed-off-by: Alex Chi <[email protected]>
1 parent ce8e1ad commit d340bbb

23 files changed

+788
-693
lines changed

optd-datafusion-bridge/src/lib.rs

+37-32
Original file line numberDiff line numberDiff line change
@@ -256,40 +256,45 @@ impl OptdQueryPlanner {
256256
.unwrap()
257257
.explain_to_string(if verbose { Some(&meta) } else { None }),
258258
));
259-
let join_order = get_join_order(optimized_rel.clone());
260-
explains.push(StringifiedPlan::new(
261-
PlanType::OptimizedPhysicalPlan {
262-
optimizer_name: "optd-join-order".to_string(),
263-
},
264-
if let Some(join_order) = join_order {
265-
join_order.to_string()
266-
} else {
267-
"None".to_string()
268-
},
269-
));
270-
let bindings = optimizer
271-
.optd_cascades_optimizer()
272-
.get_all_group_bindings(group_id, true);
273-
let mut join_orders = BTreeSet::new();
274-
let mut logical_join_orders = BTreeSet::new();
275-
for binding in bindings {
276-
if let Some(join_order) = get_join_order(binding) {
277-
logical_join_orders.insert(join_order.conv_into_logical_join_order());
278-
join_orders.insert(join_order);
259+
260+
const ENABLE_JOIN_ORDER: bool = false;
261+
262+
if ENABLE_JOIN_ORDER {
263+
let join_order = get_join_order(optimized_rel.clone());
264+
explains.push(StringifiedPlan::new(
265+
PlanType::OptimizedPhysicalPlan {
266+
optimizer_name: "optd-join-order".to_string(),
267+
},
268+
if let Some(join_order) = join_order {
269+
join_order.to_string()
270+
} else {
271+
"None".to_string()
272+
},
273+
));
274+
let bindings = optimizer
275+
.optd_cascades_optimizer()
276+
.get_all_group_bindings(group_id, true);
277+
let mut join_orders = BTreeSet::new();
278+
let mut logical_join_orders = BTreeSet::new();
279+
for binding in bindings {
280+
if let Some(join_order) = get_join_order(binding) {
281+
logical_join_orders.insert(join_order.conv_into_logical_join_order());
282+
join_orders.insert(join_order);
283+
}
279284
}
285+
explains.push(StringifiedPlan::new(
286+
PlanType::OptimizedPhysicalPlan {
287+
optimizer_name: "optd-all-join-orders".to_string(),
288+
},
289+
join_orders.iter().map(|x| x.to_string()).join("\n"),
290+
));
291+
explains.push(StringifiedPlan::new(
292+
PlanType::OptimizedPhysicalPlan {
293+
optimizer_name: "optd-all-logical-join-orders".to_string(),
294+
},
295+
logical_join_orders.iter().map(|x| x.to_string()).join("\n"),
296+
));
280297
}
281-
explains.push(StringifiedPlan::new(
282-
PlanType::OptimizedPhysicalPlan {
283-
optimizer_name: "optd-all-join-orders".to_string(),
284-
},
285-
join_orders.iter().map(|x| x.to_string()).join("\n"),
286-
));
287-
explains.push(StringifiedPlan::new(
288-
PlanType::OptimizedPhysicalPlan {
289-
optimizer_name: "optd-all-logical-join-orders".to_string(),
290-
},
291-
logical_join_orders.iter().map(|x| x.to_string()).join("\n"),
292-
));
293298
}
294299
// println!(
295300
// "{} cost={}",

optd-datafusion-repr/src/lib.rs

+25-16
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@ use properties::{
2323
};
2424
use rules::{
2525
EliminateDuplicatedAggExprRule, EliminateDuplicatedSortExprRule, EliminateFilterRule,
26-
EliminateJoinRule, EliminateLimitRule, FilterAggTransposeRule, FilterCrossJoinTransposeRule,
27-
FilterInnerJoinTransposeRule, FilterMergeRule, FilterProjectTransposeRule,
28-
FilterSortTransposeRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, PhysicalConversionRule,
29-
ProjectFilterTransposeRule, ProjectMergeRule, ProjectionPullUpJoin, SimplifyFilterRule,
30-
SimplifyJoinCondRule,
26+
EliminateJoinRule, EliminateLimitRule, EliminateProjectRule, FilterAggTransposeRule,
27+
FilterCrossJoinTransposeRule, FilterInnerJoinTransposeRule, FilterMergeRule,
28+
FilterProjectTransposeRule, FilterSortTransposeRule, HashJoinRule, JoinAssocRule,
29+
JoinCommuteRule, PhysicalConversionRule, ProjectMergeRule, ProjectionPullUpJoin,
30+
SimplifyFilterRule, SimplifyJoinCondRule,
3131
};
3232

3333
pub use optd_core::rel_node::Value;
@@ -46,7 +46,7 @@ mod testing;
4646
// mod expand;
4747

4848
pub struct DatafusionOptimizer {
49-
hueristic_optimizer: HeuristicsOptimizer<OptRelNodeTyp>,
49+
heuristic_optimizer: HeuristicsOptimizer<OptRelNodeTyp>,
5050
cascades_optimizer: CascadesOptimizer<OptRelNodeTyp>,
5151
pub runtime_statistics: RuntimeAdaptionStorage,
5252
enable_adaptive: bool,
@@ -75,7 +75,7 @@ impl DatafusionOptimizer {
7575
}
7676

7777
pub fn optd_hueristic_optimizer(&self) -> &HeuristicsOptimizer<OptRelNodeTyp> {
78-
&self.hueristic_optimizer
78+
&self.heuristic_optimizer
7979
}
8080

8181
pub fn optd_optimizer_mut(&mut self) -> &mut CascadesOptimizer<OptRelNodeTyp> {
@@ -99,6 +99,8 @@ impl DatafusionOptimizer {
9999
Arc::new(DepJoinPastAgg::new()),
100100
Arc::new(ProjectMergeRule::new()),
101101
Arc::new(FilterMergeRule::new()),
102+
// disabled due to logical properties are not implemented in heuristics
103+
// Arc::new(EliminateProjectRule::new()),
102104
]
103105
}
104106

@@ -110,9 +112,10 @@ impl DatafusionOptimizer {
110112
rule_wrappers.push(RuleWrapper::new_cascades(rule));
111113
}
112114
// project transpose rules
113-
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(
114-
ProjectFilterTransposeRule::new(),
115-
)));
115+
// only do filter-project one way for now to reduce search space
116+
// rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(
117+
// ProjectFilterTransposeRule::new(),
118+
// )));
116119
// add all filter pushdown rules as heuristic rules
117120
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(
118121
FilterProjectTransposeRule::new(),
@@ -129,13 +132,19 @@ impl DatafusionOptimizer {
129132
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(
130133
FilterAggTransposeRule::new(),
131134
)));
132-
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(HashJoinRule::new()))); // 17
133-
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(JoinCommuteRule::new()))); // 18
135+
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(HashJoinRule::new())));
136+
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(JoinCommuteRule::new())));
134137
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(JoinAssocRule::new())));
135138
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(
136139
ProjectionPullUpJoin::new(),
137140
)));
138-
141+
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(
142+
EliminateProjectRule::new(),
143+
)));
144+
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(ProjectMergeRule::new())));
145+
rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(
146+
EliminateFilterRule::new(),
147+
)));
139148
rule_wrappers
140149
}
141150

@@ -167,7 +176,7 @@ impl DatafusionOptimizer {
167176
partial_explore_space: Some(1 << 10),
168177
},
169178
),
170-
hueristic_optimizer: HeuristicsOptimizer::new_with_rules(
179+
heuristic_optimizer: HeuristicsOptimizer::new_with_rules(
171180
heuristic_rules,
172181
ApplyOrder::TopDown, // uhh TODO reconsider
173182
property_builders.clone(),
@@ -215,7 +224,7 @@ impl DatafusionOptimizer {
215224
cascades_optimizer: optimizer,
216225
enable_adaptive: true,
217226
enable_heuristic: false,
218-
hueristic_optimizer: HeuristicsOptimizer::new_with_rules(
227+
heuristic_optimizer: HeuristicsOptimizer::new_with_rules(
219228
vec![],
220229
ApplyOrder::BottomUp,
221230
Arc::new([]),
@@ -224,7 +233,7 @@ impl DatafusionOptimizer {
224233
}
225234

226235
pub fn heuristic_optimize(&mut self, root_rel: OptRelNodeRef) -> OptRelNodeRef {
227-
self.hueristic_optimizer
236+
self.heuristic_optimizer
228237
.optimize(root_rel)
229238
.expect("heuristics returns error")
230239
}

optd-datafusion-repr/src/rules.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ pub use physical::PhysicalConversionRule;
2424
pub use project_transpose::{
2525
project_filter_transpose::{FilterProjectTransposeRule, ProjectFilterTransposeRule},
2626
project_join_transpose::ProjectionPullUpJoin,
27-
project_merge::ProjectMergeRule,
27+
project_merge::{EliminateProjectRule, ProjectMergeRule},
2828
};
2929
pub use subquery::{
3030
DepInitialDistinct, DepJoinEliminateAtScan, DepJoinPastAgg, DepJoinPastFilter, DepJoinPastProj,

optd-datafusion-repr/src/rules/project_transpose/project_merge.rs

+36-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@ use std::collections::HashMap;
33
use optd_core::rules::{Rule, RuleMatcher};
44
use optd_core::{optimizer::Optimizer, rel_node::RelNode};
55

6-
use crate::plan_nodes::{ExprList, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode};
6+
use crate::plan_nodes::{
7+
ColumnRefExpr, ExprList, LogicalProjection, OptRelNode, OptRelNodeTyp, PlanNode,
8+
};
9+
use crate::properties::schema::SchemaPropertyBuilder;
710
use crate::rules::macros::define_rule;
811

912
use super::project_transpose_common::ProjectionMapping;
@@ -41,6 +44,38 @@ fn apply_projection_merge(
4144
vec![node.into_rel_node().as_ref().clone()]
4245
}
4346

47+
// Proj child [identical columns] -> eliminate
48+
define_rule!(
49+
EliminateProjectRule,
50+
apply_eliminate_project,
51+
(Projection, child, [expr])
52+
);
53+
54+
fn apply_eliminate_project(
55+
optimizer: &impl Optimizer<OptRelNodeTyp>,
56+
EliminateProjectRulePicks { child, expr }: EliminateProjectRulePicks,
57+
) -> Vec<RelNode<OptRelNodeTyp>> {
58+
let exprs = ExprList::from_rel_node(expr.into()).unwrap();
59+
let child_columns = optimizer
60+
.get_property::<SchemaPropertyBuilder>(child.clone().into(), 0)
61+
.len();
62+
if child_columns != exprs.len() {
63+
return Vec::new();
64+
}
65+
for i in 0..exprs.len() {
66+
let child_expr = exprs.child(i);
67+
if child_expr.typ() == OptRelNodeTyp::ColumnRef {
68+
let child_expr = ColumnRefExpr::from_rel_node(child_expr.into_rel_node()).unwrap();
69+
if child_expr.index() != i {
70+
return Vec::new();
71+
}
72+
} else {
73+
return Vec::new();
74+
}
75+
}
76+
vec![child]
77+
}
78+
4479
#[cfg(test)]
4580
mod tests {
4681
use std::sync::Arc;

optd-sqlplannertest/README.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,11 @@ The `explain` and `execute` task will be run with datafusion's logical optimizer
3232

3333
#### Flags
3434

35-
| Name | Description |
36-
| -------------- | --------------------------------------- |
37-
| use_df_logical | Enable Datafusion's logical optimizer |
38-
| verbose | Display estimated cost in physical plan |
39-
| logical_rules | Only enable these logical rules |
35+
| Name | Description |
36+
| -------------- | ------------------------------------------------------------------ |
37+
| use_df_logical | Enable Datafusion's logical optimizer |
38+
| verbose | Display estimated cost in physical plan |
39+
| logical_rules | Only enable these logical rules (also disable heuristic optimizer) |
4040

4141
Currently we have the following options for the explain task:
4242

optd-sqlplannertest/src/lib.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ impl DatafusionDBMS {
106106
for r in 0..rules.len() {
107107
optimizer.enable_rule(r);
108108
}
109+
guard.as_mut().unwrap().enable_heuristic(true);
109110
} else {
110111
for (rule_id, rule) in rules.as_ref().iter().enumerate() {
111112
if rule.rule.is_impl_rule() {
@@ -127,6 +128,7 @@ impl DatafusionDBMS {
127128
if !rules_to_enable.is_empty() {
128129
bail!("Unknown logical rule: {:?}", rules_to_enable);
129130
}
131+
guard.as_mut().unwrap().enable_heuristic(false);
130132
}
131133
}
132134
let sql = unescape_input(sql)?;
@@ -335,8 +337,11 @@ fn extract_flags(task: &str) -> Result<TestFlags> {
335337
} else if flag == "use_df_logical" {
336338
options.enable_df_logical = true;
337339
} else if flag.starts_with("logical_rules") {
338-
options.enable_logical_rules =
339-
flag.split('+').skip(1).map(|x| x.to_string()).collect();
340+
if let Some((_, flag)) = flag.split_once(':') {
341+
options.enable_logical_rules = flag.split('+').map(|x| x.to_string()).collect();
342+
} else {
343+
bail!("Failed to parse logical_rules flag: {}", flag);
344+
}
340345
} else if flag == "disable_explore_limit" {
341346
options.disable_explore_limit = true;
342347
} else {

optd-sqlplannertest/tests/basic/basic_nodes.planner.sql

+1-2
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@ LogicalLimit { skip: 0(u64), fetch: 1(u64) }
1919
└── LogicalProjection { exprs: [ #0, #1 ] }
2020
└── LogicalScan { table: t1 }
2121
PhysicalLimit { skip: 0(u64), fetch: 1(u64) }
22-
└── PhysicalProjection { exprs: [ #0, #1 ] }
23-
└── PhysicalScan { table: t1 }
22+
└── PhysicalScan { table: t1 }
2423
0 0
2524
0 0
2625
1 1

optd-sqlplannertest/tests/basic/cross_product.planner.sql

+3-4
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,9 @@ LogicalProjection { exprs: [ #0, #1 ] }
1717
└── LogicalJoin { join_type: Cross, cond: true }
1818
├── LogicalScan { table: t1 }
1919
└── LogicalScan { table: t2 }
20-
PhysicalProjection { exprs: [ #0, #1 ] }
21-
└── PhysicalNestedLoopJoin { join_type: Cross, cond: true }
22-
├── PhysicalScan { table: t1 }
23-
└── PhysicalScan { table: t2 }
20+
PhysicalNestedLoopJoin { join_type: Cross, cond: true }
21+
├── PhysicalScan { table: t1 }
22+
└── PhysicalScan { table: t2 }
2423
0 0
2524
0 1
2625
0 2

optd-sqlplannertest/tests/basic/eliminate_duplicated_expr.planner.sql

+6-10
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ select * from t1;
1212
/*
1313
LogicalProjection { exprs: [ #0, #1 ] }
1414
└── LogicalScan { table: t1 }
15-
PhysicalProjection { exprs: [ #0, #1 ] }
16-
└── PhysicalScan { table: t1 }
15+
PhysicalScan { table: t1 }
1716
0 0
1817
1 1
1918
5 2
@@ -45,8 +44,7 @@ PhysicalSort
4544
│ │ └── #0
4645
│ └── SortOrder { order: Asc }
4746
│ └── #1
48-
└── PhysicalProjection { exprs: [ #0, #1 ] }
49-
└── PhysicalScan { table: t1 }
47+
└── PhysicalScan { table: t1 }
5048
0 0
5149
0 2
5250
1 1
@@ -61,9 +59,8 @@ select * from t1 group by v1, v2, v1;
6159
LogicalProjection { exprs: [ #0, #1 ] }
6260
└── LogicalAgg { exprs: [], groups: [ #0, #1, #0 ] }
6361
└── LogicalScan { table: t1 }
64-
PhysicalProjection { exprs: [ #0, #1 ] }
65-
└── PhysicalAgg { aggrs: [], groups: [ #0, #1 ] }
66-
└── PhysicalScan { table: t1 }
62+
PhysicalAgg { aggrs: [], groups: [ #0, #1 ] }
63+
└── PhysicalScan { table: t1 }
6764
0 0
6865
1 1
6966
5 2
@@ -96,9 +93,8 @@ PhysicalSort
9693
│ │ └── #0
9794
│ └── SortOrder { order: Asc }
9895
│ └── #1
99-
└── PhysicalProjection { exprs: [ #0, #1 ] }
100-
└── PhysicalAgg { aggrs: [], groups: [ #0, #1 ] }
101-
└── PhysicalScan { table: t1 }
96+
└── PhysicalAgg { aggrs: [], groups: [ #0, #1 ] }
97+
└── PhysicalScan { table: t1 }
10298
0 0
10399
0 2
104100
1 1

0 commit comments

Comments
 (0)