Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit ef3beae

Browse files
committed
refactor(core): prune based on upper bound (#265)
Signed-off-by: Alex Chi Z <[email protected]> tune plan space budget Signed-off-by: Alex Chi Z <[email protected]>
1 parent 5aa9209 commit ef3beae

File tree

8 files changed

+124
-35
lines changed

8 files changed

+124
-35
lines changed

Diff for: optd-core/src/cascades/memo.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ impl<T: NodeType> NaiveMemo<T> {
352352
}
353353

354354
fn verify_integrity(&self) {
355-
if cfg!(debug_assertions) {
355+
if false {
356356
let num_of_exprs = self.expr_id_to_expr_node.len();
357357
assert_eq!(num_of_exprs, self.expr_node_to_expr_id.len());
358358
assert_eq!(num_of_exprs, self.expr_id_to_group_id.len());

Diff for: optd-core/src/cascades/optimizer.rs

+42-9
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ pub type RuleId = usize;
2727

2828
#[derive(Default, Clone, Debug)]
2929
pub struct OptimizerContext {
30-
pub budget_used: bool,
30+
pub budget_used_logical: bool,
31+
pub budget_used_all: bool,
3132
pub rules_applied: usize,
3233
}
3334

@@ -42,13 +43,20 @@ pub struct OptimizerProperties {
4243
pub disable_pruning: bool,
4344
}
4445

46+
#[derive(Debug, Default)]
47+
pub struct CascadesStats {
48+
pub rule_match_count: HashMap<usize, usize>,
49+
pub rule_total_bindings: HashMap<usize, usize>,
50+
}
51+
4552
pub struct CascadesOptimizer<T: NodeType, M: Memo<T> = NaiveMemo<T>> {
4653
memo: M,
4754
pub(super) tasks: VecDeque<Box<dyn Task<T, M>>>,
4855
explored_group: HashSet<GroupId>,
4956
explored_expr: HashSet<ExprId>,
5057
fired_rules: HashMap<ExprId, HashSet<RuleId>>,
5158
rules: Arc<[Arc<dyn Rule<T, Self>>]>,
59+
pub stats: CascadesStats,
5260
disabled_rules: HashSet<usize>,
5361
cost: Arc<dyn CostModel<T, M>>,
5462
property_builders: Arc<[Box<dyn LogicalPropertyBuilderAny<T>>]>,
@@ -123,6 +131,7 @@ impl<T: NodeType> CascadesOptimizer<T, NaiveMemo<T>> {
123131
property_builders,
124132
prop,
125133
disabled_rules: HashSet::new(),
134+
stats: CascadesStats::default(),
126135
}
127136
}
128137

@@ -248,41 +257,65 @@ impl<T: NodeType, M: Memo<T>> CascadesOptimizer<T, M> {
248257
fn fire_optimize_tasks(&mut self, group_id: GroupId) -> Result<()> {
249258
trace!(event = "fire_optimize_tasks", root_group_id = %group_id);
250259
self.tasks
251-
.push_back(Box::new(OptimizeGroupTask::new(group_id)));
260+
.push_back(Box::new(OptimizeGroupTask::new(group_id, None)));
252261
// get the task from the stack
253-
self.ctx.budget_used = false;
262+
self.ctx.budget_used_logical = false;
263+
self.ctx.budget_used_all = false;
254264
let plan_space_begin = self.memo.estimated_plan_space();
255265
let mut iter = 0;
256266
while let Some(task) = self.tasks.pop_back() {
257267
let new_tasks = task.execute(self)?;
258268
self.tasks.extend(new_tasks);
259269
iter += 1;
260-
if !self.ctx.budget_used {
270+
if !self.ctx.budget_used_logical {
261271
let plan_space = self.memo.estimated_plan_space();
262272
if let Some(partial_explore_space) = self.prop.partial_explore_space {
263273
if plan_space - plan_space_begin > partial_explore_space {
264274
println!(
265275
"plan space size budget used, not applying logical rules any more. current plan space: {}",
266276
plan_space
267277
);
268-
self.ctx.budget_used = true;
278+
self.ctx.budget_used_logical = true;
269279
if self.prop.panic_on_budget {
270280
panic!("plan space size budget used");
271281
}
272282
}
273-
} else if let Some(partial_explore_iter) = self.prop.partial_explore_iter {
283+
}
284+
}
285+
if !self.ctx.budget_used_all {
286+
if let Some(partial_explore_iter) = self.prop.partial_explore_iter {
274287
if iter >= partial_explore_iter {
275288
println!(
276-
"plan explore iter budget used, not applying logical rules any more. current plan space: {}",
277-
plan_space
289+
"plan explore iter budget used, not applying physical/logical rules any more if there's no winner. current iter: {}",
290+
iter
278291
);
279-
self.ctx.budget_used = true;
292+
self.ctx.budget_used_all = true;
280293
if self.prop.panic_on_budget {
281294
panic!("plan space size budget used");
282295
}
283296
}
284297
}
285298
}
299+
if iter > 100000 && iter % 10000 == 0 {
300+
println!("iter={}", iter);
301+
println!("plan_space={}", self.memo.estimated_plan_space());
302+
for (id, rule) in self.rules.iter().enumerate() {
303+
println!(
304+
"{}: matched={}, bindings={}",
305+
rule.name(),
306+
self.stats
307+
.rule_match_count
308+
.get(&id)
309+
.copied()
310+
.unwrap_or_default(),
311+
self.stats
312+
.rule_total_bindings
313+
.get(&id)
314+
.copied()
315+
.unwrap_or_default()
316+
);
317+
}
318+
}
286319
}
287320
Ok(())
288321
}

Diff for: optd-core/src/cascades/tasks/apply_rule.rs

+14-2
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,21 @@ pub struct ApplyRuleTask {
2121
rule_id: RuleId,
2222
expr_id: ExprId,
2323
exploring: bool,
24+
upper_bound: Option<f64>,
2425
}
2526

2627
impl ApplyRuleTask {
27-
pub fn new(rule_id: RuleId, expr_id: ExprId, exploring: bool) -> Self {
28+
pub fn new(
29+
rule_id: RuleId,
30+
expr_id: ExprId,
31+
exploring: bool,
32+
upper_bound: Option<f64>,
33+
) -> Self {
2834
Self {
2935
rule_id,
3036
expr_id,
3137
exploring,
38+
upper_bound,
3239
}
3340
}
3441
}
@@ -170,7 +177,11 @@ impl<T: NodeType, M: Memo<T>> Task<T, M> for ApplyRuleTask {
170177
let group_id = optimizer.get_group_id(self.expr_id);
171178
let mut tasks = vec![];
172179
let binding_exprs = match_and_pick_expr(rule.matcher(), self.expr_id, optimizer);
180+
if !binding_exprs.is_empty() {
181+
*optimizer.stats.rule_match_count.entry(self.rule_id).or_default() += 1;
182+
}
173183
for binding in binding_exprs {
184+
*optimizer.stats.rule_total_bindings.entry(self.rule_id).or_default() += 1;
174185
trace!(event = "before_apply_rule", task = "apply_rule", input_binding=%binding);
175186
let applied = rule.apply(optimizer, binding);
176187

@@ -181,13 +192,14 @@ impl<T: NodeType, M: Memo<T>> Task<T, M> for ApplyRuleTask {
181192
let typ = expr.unwrap_typ();
182193
if typ.is_logical() {
183194
tasks.push(
184-
Box::new(OptimizeExpressionTask::new(expr_id, self.exploring))
195+
Box::new(OptimizeExpressionTask::new(expr_id, self.exploring, self.upper_bound))
185196
as Box<dyn Task<T, M>>,
186197
);
187198
} else {
188199
tasks.push(Box::new(OptimizeInputsTask::new(
189200
expr_id,
190201
!optimizer.prop.disable_pruning,
202+
self.upper_bound
191203
)) as Box<dyn Task<T, M>>);
192204
}
193205
optimizer.unmark_expr_explored(expr_id);

Diff for: optd-core/src/cascades/tasks/explore_group.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,15 @@ use crate::nodes::NodeType;
1414

1515
pub struct ExploreGroupTask {
1616
group_id: GroupId,
17+
upper_bound: Option<f64>,
1718
}
1819

1920
impl ExploreGroupTask {
20-
pub fn new(group_id: GroupId) -> Self {
21-
Self { group_id }
21+
pub fn new(group_id: GroupId, upper_bound: Option<f64>) -> Self {
22+
Self {
23+
group_id,
24+
upper_bound,
25+
}
2226
}
2327
}
2428

@@ -36,7 +40,7 @@ impl<T: NodeType, M: Memo<T>> Task<T, M> for ExploreGroupTask {
3640
let typ = optimizer.get_expr_memoed(expr).typ.clone();
3741
if typ.is_logical() {
3842
tasks
39-
.push(Box::new(OptimizeExpressionTask::new(expr, true)) as Box<dyn Task<T, M>>);
43+
.push(Box::new(OptimizeExpressionTask::new(expr, true, self.upper_bound)) as Box<dyn Task<T, M>>);
4044
}
4145
}
4246
optimizer.mark_group_explored(self.group_id);

Diff for: optd-core/src/cascades/tasks/optimize_expression.rs

+24-8
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,16 @@ use crate::rules::RuleMatcher;
1616
pub struct OptimizeExpressionTask {
1717
expr_id: ExprId,
1818
exploring: bool,
19+
upper_bound: Option<f64>,
1920
}
2021

2122
impl OptimizeExpressionTask {
22-
pub fn new(expr_id: ExprId, exploring: bool) -> Self {
23-
Self { expr_id, exploring }
23+
pub fn new(expr_id: ExprId, exploring: bool, upper_bound: Option<f64>) -> Self {
24+
Self {
25+
expr_id,
26+
exploring,
27+
upper_bound,
28+
}
2429
}
2530
}
2631

@@ -37,6 +42,7 @@ fn top_matches<T: NodeType>(matcher: &RuleMatcher<T>, match_typ: T) -> bool {
3742
impl<T: NodeType, M: Memo<T>> Task<T, M> for OptimizeExpressionTask {
3843
fn execute(&self, optimizer: &mut CascadesOptimizer<T, M>) -> Result<Vec<Box<dyn Task<T, M>>>> {
3944
let expr = optimizer.get_expr_memoed(self.expr_id);
45+
let group_id = optimizer.get_group_id(self.expr_id);
4046
trace!(event = "task_begin", task = "optimize_expr", expr_id = %self.expr_id, expr = %expr);
4147
let mut tasks = vec![];
4248
for (rule_id, rule) in optimizer.rules().iter().enumerate() {
@@ -48,17 +54,27 @@ impl<T: NodeType, M: Memo<T>> Task<T, M> for OptimizeExpressionTask {
4854
continue;
4955
}
5056
// Skip transformation rules when budget is used
51-
if optimizer.ctx.budget_used && !rule.is_impl_rule() {
57+
if (optimizer.ctx.budget_used_logical || optimizer.ctx.budget_used_all)
58+
&& !rule.is_impl_rule()
59+
{
5260
continue;
5361
}
62+
if optimizer.ctx.budget_used_all
63+
&& optimizer.get_group_info(group_id).winner.has_full_winner()
64+
{
65+
break;
66+
}
5467
if top_matches(rule.matcher(), expr.typ.clone()) {
55-
tasks.push(
56-
Box::new(ApplyRuleTask::new(rule_id, self.expr_id, self.exploring))
57-
as Box<dyn Task<T, M>>,
58-
);
68+
tasks.push(Box::new(ApplyRuleTask::new(
69+
rule_id,
70+
self.expr_id,
71+
self.exploring,
72+
self.upper_bound,
73+
)) as Box<dyn Task<T, M>>);
5974
for &input_group_id in &expr.children {
6075
tasks.push(
61-
Box::new(ExploreGroupTask::new(input_group_id)) as Box<dyn Task<T, M>>
76+
Box::new(ExploreGroupTask::new(input_group_id, self.upper_bound))
77+
as Box<dyn Task<T, M>>,
6278
);
6379
}
6480
}

Diff for: optd-core/src/cascades/tasks/optimize_group.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,15 @@ use crate::nodes::NodeType;
1515

1616
pub struct OptimizeGroupTask {
1717
group_id: GroupId,
18+
upper_bound: Option<f64>,
1819
}
1920

2021
impl OptimizeGroupTask {
21-
pub fn new(group_id: GroupId) -> Self {
22-
Self { group_id }
22+
pub fn new(group_id: GroupId, upper_bound: Option<f64>) -> Self {
23+
Self {
24+
group_id,
25+
upper_bound,
26+
}
2327
}
2428
}
2529

@@ -37,7 +41,7 @@ impl<T: NodeType, M: Memo<T>> Task<T, M> for OptimizeGroupTask {
3741
for &expr in &exprs {
3842
let typ = optimizer.get_expr_memoed(expr).typ.clone();
3943
if typ.is_logical() {
40-
tasks.push(Box::new(OptimizeExpressionTask::new(expr, false)) as Box<dyn Task<T, M>>);
44+
tasks.push(Box::new(OptimizeExpressionTask::new(expr, false, self.upper_bound)) as Box<dyn Task<T, M>>);
4145
}
4246
}
4347
for &expr in &exprs {
@@ -46,6 +50,7 @@ impl<T: NodeType, M: Memo<T>> Task<T, M> for OptimizeGroupTask {
4650
tasks.push(Box::new(OptimizeInputsTask::new(
4751
expr,
4852
!optimizer.prop.disable_pruning,
53+
self.upper_bound
4954
)) as Box<dyn Task<T, M>>);
5055
}
5156
}

0 commit comments

Comments
 (0)