Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 4a8da7e

Browse files
authored
Feat: two stage heuristic (#139)
This pr adds infer properties for heuristic optimizer in optd core. It splits the two passes for heuristic and cascades. Now heuristic rules can be used on the first heuristic optimizer as registering them as default_heuristic_rule and these rules are able to traverse the whole plan tree as there are no placeholder in heuristic stage. The properties are not shared among heuristic pass and cascades pass though. It is recalculated in cascades. The pr is for enabling more complicated heuristic rules and one can register their rules as either heuristic or cascade to test its usability and performance. The logic for the rule creation is the same as previous heuristic rule wrapper: heuristic rule either return 0 or 1 node. Returning 1 node means that the rule is successfully applied. Returning 0 node means it fails some constraints for the rule and heuristic optimizer will use the original node. --------- Signed-off-by: AveryQi115 <[email protected]>
1 parent 91a7879 commit 4a8da7e

File tree

4 files changed

+145
-38
lines changed

4 files changed

+145
-38
lines changed

optd-core/src/heuristics/optimizer.rs

+62-8
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
use std::{collections::HashMap, sync::Arc};
22

33
use anyhow::Result;
4+
use itertools::Itertools;
5+
use std::any::Any;
46

57
use crate::{
68
optimizer::Optimizer,
9+
property::PropertyBuilderAny,
710
rel_node::{RelNode, RelNodeRef, RelNodeTyp},
811
rules::{Rule, RuleMatcher},
912
};
@@ -16,6 +19,8 @@ pub enum ApplyOrder {
1619
pub struct HeuristicsOptimizer<T: RelNodeTyp> {
1720
rules: Arc<[Arc<dyn Rule<T, Self>>]>,
1821
apply_order: ApplyOrder,
22+
property_builders: Arc<[Box<dyn PropertyBuilderAny<T>>]>,
23+
properties: HashMap<RelNodeRef<T>, Arc<[Box<dyn Any + Send + Sync + 'static>]>>,
1924
}
2025

2126
fn match_node<T: RelNodeTyp>(
@@ -102,10 +107,16 @@ fn match_and_pick<T: RelNodeTyp>(
102107
}
103108

104109
impl<T: RelNodeTyp> HeuristicsOptimizer<T> {
105-
pub fn new_with_rules(rules: Vec<Arc<dyn Rule<T, Self>>>, apply_order: ApplyOrder) -> Self {
110+
pub fn new_with_rules(
111+
rules: Vec<Arc<dyn Rule<T, Self>>>,
112+
apply_order: ApplyOrder,
113+
property_builders: Arc<[Box<dyn PropertyBuilderAny<T>>]>,
114+
) -> Self {
106115
Self {
107116
rules: rules.into(),
108117
apply_order,
118+
property_builders,
119+
properties: HashMap::new(),
109120
}
110121
}
111122

@@ -122,8 +133,10 @@ impl<T: RelNodeTyp> HeuristicsOptimizer<T> {
122133
let matcher = rule.matcher();
123134
if let Some(picks) = match_and_pick(matcher, root_rel.clone()) {
124135
let mut results = rule.apply(self, picks);
125-
assert_eq!(results.len(), 1);
126-
root_rel = results.remove(0).into();
136+
assert!(results.len() <= 1);
137+
if !results.is_empty() {
138+
root_rel = results.remove(0).into();
139+
}
127140
}
128141
}
129142
Ok(root_rel)
@@ -141,20 +154,61 @@ impl<T: RelNodeTyp> HeuristicsOptimizer<T> {
141154
}
142155
.into(),
143156
)?;
157+
self.infer_properties(root_rel.clone());
158+
self.properties.insert(
159+
node.clone(),
160+
self.properties.get(&root_rel.clone()).unwrap().clone(),
161+
);
144162
Ok(node)
145163
}
146164
ApplyOrder::TopDown => {
165+
self.infer_properties(root_rel.clone());
147166
let root_rel = self.apply_rules(root_rel)?;
148167
let optimized_children = self.optimize_inputs(&root_rel.children)?;
149-
Ok(RelNode {
168+
let node: Arc<RelNode<T>> = RelNode {
150169
typ: root_rel.typ.clone(),
151170
children: optimized_children,
152171
data: root_rel.data.clone(),
153172
}
154-
.into())
173+
.into();
174+
self.infer_properties(root_rel.clone());
175+
self.properties.insert(
176+
node.clone(),
177+
self.properties.get(&root_rel.clone()).unwrap().clone(),
178+
);
179+
Ok(node)
155180
}
156181
}
157182
}
183+
184+
fn infer_properties(&mut self, root_rel: RelNodeRef<T>) {
185+
if self.properties.contains_key(&root_rel) {
186+
return;
187+
}
188+
189+
let child_properties = root_rel
190+
.children
191+
.iter()
192+
.map(|child| {
193+
self.infer_properties((*child).clone());
194+
self.properties.get(child).unwrap().clone()
195+
})
196+
.collect_vec();
197+
let mut props = Vec::with_capacity(self.property_builders.len());
198+
for (id, builder) in self.property_builders.iter().enumerate() {
199+
let child_properties = child_properties
200+
.iter()
201+
.map(|x| x[id].as_ref() as &dyn std::any::Any)
202+
.collect::<Vec<_>>();
203+
let prop = builder.derive_any(
204+
root_rel.typ.clone(),
205+
root_rel.data.clone(),
206+
child_properties.as_slice(),
207+
);
208+
props.push(prop);
209+
}
210+
self.properties.insert(root_rel.clone(), props.into());
211+
}
158212
}
159213

160214
impl<T: RelNodeTyp> Optimizer<T> for HeuristicsOptimizer<T> {
@@ -167,8 +221,8 @@ impl<T: RelNodeTyp> Optimizer<T> for HeuristicsOptimizer<T> {
167221
root_rel: RelNodeRef<T>,
168222
idx: usize,
169223
) -> P::Prop {
170-
let _ = root_rel;
171-
let _ = idx;
172-
unimplemented!()
224+
let props = self.properties.get(&root_rel).unwrap();
225+
let prop = props[idx].as_ref();
226+
prop.downcast_ref::<P::Prop>().unwrap().clone()
173227
}
174228
}

optd-datafusion-bridge/src/lib.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ impl OptdQueryPlanner {
218218
optimizer_name: "datafusion".to_string(),
219219
}));
220220
}
221-
let optd_rel = ctx.conv_into_optd(logical_plan)?;
221+
let mut optd_rel = ctx.conv_into_optd(logical_plan)?;
222222
if let Some(explains) = &mut explains {
223223
explains.push(StringifiedPlan::new(
224224
PlanType::OptimizedLogicalPlan {
@@ -230,7 +230,12 @@ impl OptdQueryPlanner {
230230
));
231231
}
232232
let mut optimizer = self.optimizer.lock().unwrap().take().unwrap();
233-
let (group_id, optimized_rel, meta) = optimizer.optimize(optd_rel)?;
233+
234+
if optimizer.is_heuristic_enabled() {
235+
optd_rel = optimizer.heuristic_optimize(optd_rel);
236+
}
237+
238+
let (group_id, optimized_rel, meta) = optimizer.cascades_optimize(optd_rel)?;
234239

235240
if let Some(explains) = &mut explains {
236241
explains.push(StringifiedPlan::new(
@@ -253,7 +258,7 @@ impl OptdQueryPlanner {
253258
},
254259
));
255260
let bindings = optimizer
256-
.optd_optimizer()
261+
.optd_cascades_optimizer()
257262
.get_all_group_bindings(group_id, true);
258263
let mut join_orders = BTreeSet::new();
259264
let mut logical_join_orders = BTreeSet::new();

optd-datafusion-repr/src/bin/test_optimize.rs

+1
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ pub fn main() {
9292
Arc::new(HashJoinRule::new()),
9393
],
9494
optd_core::heuristics::ApplyOrder::BottomUp,
95+
Arc::new([]),
9596
);
9697
let node = optimizer.optimize(fnal.0.into_rel_node()).unwrap();
9798
println!(

optd-datafusion-repr/src/lib.rs

+74-27
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,11 @@ use cost::{
99
};
1010
use optd_core::{
1111
cascades::{CascadesOptimizer, GroupId, OptimizerProperties},
12+
heuristics::{ApplyOrder, HeuristicsOptimizer},
13+
optimizer::Optimizer,
14+
property::PropertyBuilderAny,
1215
rel_node::RelNodeMetaMap,
13-
rules::RuleWrapper,
16+
rules::{Rule, RuleWrapper},
1417
};
1518

1619
use plan_nodes::{OptRelNodeRef, OptRelNodeTyp};
@@ -33,9 +36,11 @@ pub mod properties;
3336
pub mod rules;
3437

3538
pub struct DatafusionOptimizer {
36-
optimizer: CascadesOptimizer<OptRelNodeTyp>,
39+
hueristic_optimizer: HeuristicsOptimizer<OptRelNodeTyp>,
40+
cascades_optimizer: CascadesOptimizer<OptRelNodeTyp>,
3741
pub runtime_statistics: RuntimeAdaptionStorage,
3842
enable_adaptive: bool,
43+
enable_heuristic: bool,
3944
}
4045

4146
impl DatafusionOptimizer {
@@ -47,26 +52,43 @@ impl DatafusionOptimizer {
4752
self.enable_adaptive
4853
}
4954

50-
pub fn optd_optimizer(&self) -> &CascadesOptimizer<OptRelNodeTyp> {
51-
&self.optimizer
55+
pub fn enable_heuristic(&mut self, enable: bool) {
56+
self.enable_heuristic = enable;
57+
}
58+
59+
pub fn is_heuristic_enabled(&self) -> bool {
60+
self.enable_heuristic
61+
}
62+
63+
pub fn optd_cascades_optimizer(&self) -> &CascadesOptimizer<OptRelNodeTyp> {
64+
&self.cascades_optimizer
65+
}
66+
67+
pub fn optd_hueristic_optimizer(&self) -> &HeuristicsOptimizer<OptRelNodeTyp> {
68+
&self.hueristic_optimizer
5269
}
5370

5471
pub fn optd_optimizer_mut(&mut self) -> &mut CascadesOptimizer<OptRelNodeTyp> {
55-
&mut self.optimizer
72+
&mut self.cascades_optimizer
5673
}
5774

58-
pub fn default_rules() -> Vec<Arc<RuleWrapper<OptRelNodeTyp, CascadesOptimizer<OptRelNodeTyp>>>>
59-
{
75+
pub fn default_heuristic_rules(
76+
) -> Vec<Arc<dyn Rule<OptRelNodeTyp, HeuristicsOptimizer<OptRelNodeTyp>>>> {
77+
vec![
78+
Arc::new(SimplifyFilterRule::new()),
79+
Arc::new(SimplifyJoinCondRule::new()),
80+
Arc::new(EliminateFilterRule::new()),
81+
Arc::new(EliminateJoinRule::new()),
82+
Arc::new(EliminateLimitRule::new()),
83+
Arc::new(EliminateDuplicatedSortExprRule::new()),
84+
Arc::new(EliminateDuplicatedAggExprRule::new()),
85+
]
86+
}
87+
88+
pub fn default_cascades_rules(
89+
) -> Vec<Arc<RuleWrapper<OptRelNodeTyp, CascadesOptimizer<OptRelNodeTyp>>>> {
6090
let rules = PhysicalConversionRule::all_conversions();
61-
let mut rule_wrappers = vec![
62-
RuleWrapper::new_heuristic(Arc::new(SimplifyFilterRule::new())),
63-
RuleWrapper::new_heuristic(Arc::new(SimplifyJoinCondRule::new())),
64-
RuleWrapper::new_heuristic(Arc::new(EliminateFilterRule::new())),
65-
RuleWrapper::new_heuristic(Arc::new(EliminateJoinRule::new())),
66-
RuleWrapper::new_heuristic(Arc::new(EliminateLimitRule::new())),
67-
RuleWrapper::new_heuristic(Arc::new(EliminateDuplicatedSortExprRule::new())),
68-
RuleWrapper::new_heuristic(Arc::new(EliminateDuplicatedAggExprRule::new())),
69-
];
91+
let mut rule_wrappers = vec![];
7092
for rule in rules {
7193
rule_wrappers.push(RuleWrapper::new_cascades(rule));
7294
}
@@ -86,23 +108,34 @@ impl DatafusionOptimizer {
86108
stats: DataFusionBaseTableStats,
87109
enable_adaptive: bool,
88110
) -> Self {
89-
let rules = Self::default_rules();
111+
let cascades_rules = Self::default_cascades_rules();
112+
let heuristic_rules = Self::default_heuristic_rules();
113+
let property_builders: Arc<[Box<dyn PropertyBuilderAny<OptRelNodeTyp>>]> = Arc::new([
114+
Box::new(SchemaPropertyBuilder::new(catalog.clone())),
115+
Box::new(ColumnRefPropertyBuilder::new(catalog.clone())),
116+
]);
90117
let cost_model = AdaptiveCostModel::new(DEFAULT_DECAY, stats);
91118
Self {
92119
runtime_statistics: cost_model.get_runtime_map(),
93-
optimizer: CascadesOptimizer::new_with_prop(
94-
rules,
120+
cascades_optimizer: CascadesOptimizer::new_with_prop(
121+
cascades_rules,
95122
Box::new(cost_model),
96123
vec![
97124
Box::new(SchemaPropertyBuilder::new(catalog.clone())),
98-
Box::new(ColumnRefPropertyBuilder::new(catalog)),
125+
Box::new(ColumnRefPropertyBuilder::new(catalog.clone())),
99126
],
100127
OptimizerProperties {
101128
partial_explore_iter: Some(1 << 20),
102129
partial_explore_space: Some(1 << 10),
103130
},
104131
),
132+
hueristic_optimizer: HeuristicsOptimizer::new_with_rules(
133+
heuristic_rules,
134+
ApplyOrder::BottomUp,
135+
property_builders.clone(),
136+
),
105137
enable_adaptive,
138+
enable_heuristic: true,
106139
}
107140
}
108141

@@ -140,31 +173,45 @@ impl DatafusionOptimizer {
140173
);
141174
Self {
142175
runtime_statistics,
143-
optimizer,
176+
cascades_optimizer: optimizer,
144177
enable_adaptive: true,
178+
enable_heuristic: false,
179+
hueristic_optimizer: HeuristicsOptimizer::new_with_rules(
180+
vec![],
181+
ApplyOrder::BottomUp,
182+
Arc::new([]),
183+
),
145184
}
146185
}
147186

148-
pub fn optimize(
187+
pub fn heuristic_optimize(&mut self, root_rel: OptRelNodeRef) -> OptRelNodeRef {
188+
self.hueristic_optimizer
189+
.optimize(root_rel)
190+
.expect("heuristics returns error")
191+
}
192+
193+
pub fn cascades_optimize(
149194
&mut self,
150195
root_rel: OptRelNodeRef,
151196
) -> Result<(GroupId, OptRelNodeRef, RelNodeMetaMap)> {
152197
if self.enable_adaptive {
153198
self.runtime_statistics.lock().unwrap().iter_cnt += 1;
154-
self.optimizer.step_clear_winner();
199+
self.cascades_optimizer.step_clear_winner();
155200
} else {
156-
self.optimizer.step_clear();
201+
self.cascades_optimizer.step_clear();
157202
}
158203

159-
let group_id = self.optimizer.step_optimize_rel(root_rel)?;
204+
let group_id = self.cascades_optimizer.step_optimize_rel(root_rel)?;
160205

161206
let mut meta = Some(HashMap::new());
162-
let optimized_rel = self.optimizer.step_get_optimize_rel(group_id, &mut meta)?;
207+
let optimized_rel = self
208+
.cascades_optimizer
209+
.step_get_optimize_rel(group_id, &mut meta)?;
163210

164211
Ok((group_id, optimized_rel, meta.unwrap()))
165212
}
166213

167214
pub fn dump(&self, group_id: Option<GroupId>) {
168-
self.optimizer.dump(group_id)
215+
self.cascades_optimizer.dump(group_id)
169216
}
170217
}

0 commit comments

Comments
 (0)