Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit b3288bc

Browse files
committed
tune plan space budget
Signed-off-by: Alex Chi Z <[email protected]>
1 parent 3140324 commit b3288bc

File tree

4 files changed

+63
-16
lines changed

4 files changed

+63
-16
lines changed

optd-core/src/cascades/optimizer.rs

+41-8
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ pub type RuleId = usize;
2727

2828
#[derive(Default, Clone, Debug)]
2929
pub struct OptimizerContext {
30-
pub budget_used: bool,
30+
pub budget_used_logical: bool,
31+
pub budget_used_all: bool,
3132
pub rules_applied: usize,
3233
}
3334

@@ -42,13 +43,20 @@ pub struct OptimizerProperties {
4243
pub disable_pruning: bool,
4344
}
4445

46+
#[derive(Debug, Default)]
47+
pub struct CascadesStats {
48+
pub rule_match_count: HashMap<usize, usize>,
49+
pub rule_total_bindings: HashMap<usize, usize>,
50+
}
51+
4552
pub struct CascadesOptimizer<T: NodeType, M: Memo<T> = NaiveMemo<T>> {
4653
memo: M,
4754
pub(super) tasks: VecDeque<Box<dyn Task<T, M>>>,
4855
explored_group: HashSet<GroupId>,
4956
explored_expr: HashSet<ExprId>,
5057
fired_rules: HashMap<ExprId, HashSet<RuleId>>,
5158
rules: Arc<[Arc<dyn Rule<T, Self>>]>,
59+
pub stats: CascadesStats,
5260
disabled_rules: HashSet<usize>,
5361
cost: Arc<dyn CostModel<T, M>>,
5462
property_builders: Arc<[Box<dyn LogicalPropertyBuilderAny<T>>]>,
@@ -123,6 +131,7 @@ impl<T: NodeType> CascadesOptimizer<T, NaiveMemo<T>> {
123131
property_builders,
124132
prop,
125133
disabled_rules: HashSet::new(),
134+
stats: CascadesStats::default(),
126135
}
127136
}
128137

@@ -250,39 +259,63 @@ impl<T: NodeType, M: Memo<T>> CascadesOptimizer<T, M> {
250259
self.tasks
251260
.push_back(Box::new(OptimizeGroupTask::new(group_id, None)));
252261
// get the task from the stack
253-
self.ctx.budget_used = false;
262+
self.ctx.budget_used_logical = false;
263+
self.ctx.budget_used_all = false;
254264
let plan_space_begin = self.memo.estimated_plan_space();
255265
let mut iter = 0;
256266
while let Some(task) = self.tasks.pop_back() {
257267
let new_tasks = task.execute(self)?;
258268
self.tasks.extend(new_tasks);
259269
iter += 1;
260-
if !self.ctx.budget_used {
270+
if !self.ctx.budget_used_logical {
261271
let plan_space = self.memo.estimated_plan_space();
262272
if let Some(partial_explore_space) = self.prop.partial_explore_space {
263273
if plan_space - plan_space_begin > partial_explore_space {
264274
println!(
265275
"plan space size budget used, not applying logical rules any more. current plan space: {}",
266276
plan_space
267277
);
268-
self.ctx.budget_used = true;
278+
self.ctx.budget_used_logical = true;
269279
if self.prop.panic_on_budget {
270280
panic!("plan space size budget used");
271281
}
272282
}
273-
} else if let Some(partial_explore_iter) = self.prop.partial_explore_iter {
283+
}
284+
}
285+
if !self.ctx.budget_used_all {
286+
if let Some(partial_explore_iter) = self.prop.partial_explore_iter {
274287
if iter >= partial_explore_iter {
275288
println!(
276-
"plan explore iter budget used, not applying logical rules any more. current plan space: {}",
277-
plan_space
289+
"plan explore iter budget used, not applying physical/logical rules any more if there's no winner. current iter: {}",
290+
iter
278291
);
279-
self.ctx.budget_used = true;
292+
self.ctx.budget_used_all = true;
280293
if self.prop.panic_on_budget {
281294
panic!("plan space size budget used");
282295
}
283296
}
284297
}
285298
}
299+
if iter > 100000 && iter % 10000 == 0 {
300+
println!("iter={}", iter);
301+
println!("plan_space={}", self.memo.estimated_plan_space());
302+
for (id, rule) in self.rules.iter().enumerate() {
303+
println!(
304+
"{}: matched={}, bindings={}",
305+
rule.name(),
306+
self.stats
307+
.rule_match_count
308+
.get(&id)
309+
.copied()
310+
.unwrap_or_default(),
311+
self.stats
312+
.rule_total_bindings
313+
.get(&id)
314+
.copied()
315+
.unwrap_or_default()
316+
);
317+
}
318+
}
286319
}
287320
Ok(())
288321
}

optd-core/src/cascades/tasks/apply_rule.rs

+4
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,11 @@ impl<T: NodeType, M: Memo<T>> Task<T, M> for ApplyRuleTask {
177177
let group_id = optimizer.get_group_id(self.expr_id);
178178
let mut tasks = vec![];
179179
let binding_exprs = match_and_pick_expr(rule.matcher(), self.expr_id, optimizer);
180+
if !binding_exprs.is_empty() {
181+
*optimizer.stats.rule_match_count.entry(self.rule_id).or_default() += 1;
182+
}
180183
for binding in binding_exprs {
184+
*optimizer.stats.rule_total_bindings.entry(self.rule_id).or_default() += 1;
181185
trace!(event = "before_apply_rule", task = "apply_rule", input_binding=%binding);
182186
let applied = rule.apply(optimizer, binding);
183187

optd-core/src/cascades/tasks/optimize_expression.rs

+17-6
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ fn top_matches<T: NodeType>(matcher: &RuleMatcher<T>, match_typ: T) -> bool {
4242
impl<T: NodeType, M: Memo<T>> Task<T, M> for OptimizeExpressionTask {
4343
fn execute(&self, optimizer: &mut CascadesOptimizer<T, M>) -> Result<Vec<Box<dyn Task<T, M>>>> {
4444
let expr = optimizer.get_expr_memoed(self.expr_id);
45+
let group_id = optimizer.get_group_id(self.expr_id);
4546
trace!(event = "task_begin", task = "optimize_expr", expr_id = %self.expr_id, expr = %expr);
4647
let mut tasks = vec![];
4748
for (rule_id, rule) in optimizer.rules().iter().enumerate() {
@@ -53,17 +54,27 @@ impl<T: NodeType, M: Memo<T>> Task<T, M> for OptimizeExpressionTask {
5354
continue;
5455
}
5556
// Skip transformation rules when budget is used
56-
if optimizer.ctx.budget_used && !rule.is_impl_rule() {
57+
if (optimizer.ctx.budget_used_logical || optimizer.ctx.budget_used_all)
58+
&& !rule.is_impl_rule()
59+
{
5760
continue;
5861
}
62+
if optimizer.ctx.budget_used_all
63+
&& optimizer.get_group_info(group_id).winner.has_full_winner()
64+
{
65+
break;
66+
}
5967
if top_matches(rule.matcher(), expr.typ.clone()) {
60-
tasks.push(
61-
Box::new(ApplyRuleTask::new(rule_id, self.expr_id, self.exploring, self.upper_bound))
62-
as Box<dyn Task<T, M>>,
63-
);
68+
tasks.push(Box::new(ApplyRuleTask::new(
69+
rule_id,
70+
self.expr_id,
71+
self.exploring,
72+
self.upper_bound,
73+
)) as Box<dyn Task<T, M>>);
6474
for &input_group_id in &expr.children {
6575
tasks.push(
66-
Box::new(ExploreGroupTask::new(input_group_id, self.upper_bound)) as Box<dyn Task<T, M>>
76+
Box::new(ExploreGroupTask::new(input_group_id, self.upper_bound))
77+
as Box<dyn Task<T, M>>,
6778
);
6879
}
6980
}

optd-datafusion-repr/src/lib.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,6 @@ impl DatafusionOptimizer {
102102
rule_wrappers.push(rule);
103103
}
104104
rule_wrappers.push(Arc::new(rules::FilterProjectTransposeRule::new()));
105-
rule_wrappers.push(Arc::new(rules::FilterCrossJoinTransposeRule::new()));
106105
rule_wrappers.push(Arc::new(rules::FilterInnerJoinTransposeRule::new()));
107106
rule_wrappers.push(Arc::new(rules::FilterSortTransposeRule::new()));
108107
rule_wrappers.push(Arc::new(rules::FilterAggTransposeRule::new()));
@@ -150,7 +149,7 @@ impl DatafusionOptimizer {
150149
OptimizerProperties {
151150
panic_on_budget: false,
152151
partial_explore_iter: Some(1 << 20),
153-
partial_explore_space: Some(1 << 10),
152+
partial_explore_space: None, // remove this in the future
154153
disable_pruning: false,
155154
},
156155
),

0 commit comments

Comments
 (0)