Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit c77a719

Browse files
authored
refactor(core): remove rule wrapper (#229)
yet another thing we can remove after the predicate refactor. --------- Signed-off-by: Alex Chi <[email protected]>
1 parent 75f9fe6 commit c77a719

File tree

13 files changed

+76
-163
lines changed

13 files changed

+76
-163
lines changed

Cargo.lock

+9-8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ members = [
1313
resolver = "2"
1414

1515
[workspace.package]
16-
version = "0.1.0"
16+
version = "0.1.1"
1717
edition = "2021"
1818
homepage = "https://github.com/cmu-db/optd"
1919
keywords = ["sql", "database", "optimizer", "datafusion"]

datafusion-optd-cli/Cargo.toml

+5-6
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,11 @@
1919
name = "datafusion-optd-cli"
2020
description = "Command Line Client for DataFusion query engine."
2121
version = "32.0.0"
22-
authors = ["Apache Arrow <[email protected]>"]
2322
edition = "2021"
2423
keywords = ["arrow", "datafusion", "query", "sql"]
2524
license = "Apache-2.0"
26-
homepage = "https://github.com/apache/arrow-datafusion"
27-
repository = "https://github.com/apache/arrow-datafusion"
25+
homepage = "https://github.com/cmu-db/optd"
26+
repository = "https://github.com/cmu-db/optd"
2827
rust-version = "1.70"
2928
readme = "README.md"
3029

@@ -57,9 +56,9 @@ tokio = { version = "1.24", features = [
5756
"parking_lot",
5857
] }
5958
url = "2.2"
60-
optd-datafusion-bridge = { path = "../optd-datafusion-bridge" }
61-
optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost" }
62-
optd-datafusion-repr = { path = "../optd-datafusion-repr" }
59+
optd-datafusion-bridge = { path = "../optd-datafusion-bridge", version = "0.1" }
60+
optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost", version = "0.1" }
61+
optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" }
6362
tracing-subscriber = "0.3"
6463
tracing = "0.1"
6564

optd-core/src/cascades/optimizer.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use crate::nodes::{
2020
};
2121
use crate::optimizer::Optimizer;
2222
use crate::property::{PropertyBuilder, PropertyBuilderAny};
23-
use crate::rules::RuleWrapper;
23+
use crate::rules::Rule;
2424

2525
pub type RuleId = usize;
2626

@@ -47,7 +47,7 @@ pub struct CascadesOptimizer<T: NodeType, M: Memo<T> = NaiveMemo<T>> {
4747
explored_group: HashSet<GroupId>,
4848
explored_expr: HashSet<ExprId>,
4949
fired_rules: HashMap<ExprId, HashSet<RuleId>>,
50-
rules: Arc<[Arc<RuleWrapper<T, Self>>]>,
50+
rules: Arc<[Arc<dyn Rule<T, Self>>]>,
5151
disabled_rules: HashSet<usize>,
5252
cost: Arc<dyn CostModel<T, M>>,
5353
property_builders: Arc<[Box<dyn PropertyBuilderAny<T>>]>,
@@ -94,15 +94,15 @@ impl Display for PredId {
9494

9595
impl<T: NodeType> CascadesOptimizer<T, NaiveMemo<T>> {
9696
pub fn new(
97-
rules: Vec<Arc<RuleWrapper<T, Self>>>,
97+
rules: Vec<Arc<dyn Rule<T, Self>>>,
9898
cost: Box<dyn CostModel<T, NaiveMemo<T>>>,
9999
property_builders: Vec<Box<dyn PropertyBuilderAny<T>>>,
100100
) -> Self {
101101
Self::new_with_prop(rules, cost, property_builders, Default::default())
102102
}
103103

104104
pub fn new_with_prop(
105-
rules: Vec<Arc<RuleWrapper<T, Self>>>,
105+
rules: Vec<Arc<dyn Rule<T, Self>>>,
106106
cost: Box<dyn CostModel<T, NaiveMemo<T>>>,
107107
property_builders: Vec<Box<dyn PropertyBuilderAny<T>>>,
108108
prop: OptimizerProperties,
@@ -153,7 +153,7 @@ impl<T: NodeType, M: Memo<T>> CascadesOptimizer<T, M> {
153153
self.cost.clone()
154154
}
155155

156-
pub fn rules(&self) -> Arc<[Arc<RuleWrapper<T, Self>>]> {
156+
pub fn rules(&self) -> Arc<[Arc<dyn Rule<T, Self>>]> {
157157
self.rules.clone()
158158
}
159159

optd-core/src/cascades/tasks/apply_rule.rs

+3-8
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use crate::cascades::optimizer::{CascadesOptimizer, ExprId, RuleId};
1515
use crate::cascades::tasks::{OptimizeExpressionTask, OptimizeInputsTask};
1616
use crate::cascades::{GroupId, Memo};
1717
use crate::nodes::{ArcPlanNode, NodeType, PlanNode, PlanNodeOrGroup};
18-
use crate::rules::{OptimizeType, RuleMatcher};
18+
use crate::rules::RuleMatcher;
1919

2020
pub struct ApplyRuleTask {
2121
rule_id: RuleId,
@@ -164,21 +164,16 @@ impl<T: NodeType, M: Memo<T>> Task<T, M> for ApplyRuleTask {
164164
return Ok(vec![]);
165165
}
166166

167-
let rule_wrapper = optimizer.rules()[self.rule_id].clone();
168-
let rule = rule_wrapper.rule();
167+
let rule = optimizer.rules()[self.rule_id].clone();
169168

170-
trace!(event = "task_begin", task = "apply_rule", expr_id = %self.expr_id, rule_id = %self.rule_id, rule = %rule.name(), optimize_type=%rule_wrapper.optimize_type());
169+
trace!(event = "task_begin", task = "apply_rule", expr_id = %self.expr_id, rule_id = %self.rule_id, rule = %rule.name());
171170
let group_id = optimizer.get_group_id(self.expr_id);
172171
let mut tasks = vec![];
173172
let binding_exprs = match_and_pick_expr(rule.matcher(), self.expr_id, optimizer);
174173
for binding in binding_exprs {
175174
trace!(event = "before_apply_rule", task = "apply_rule", input_binding=%binding);
176175
let applied = rule.apply(optimizer, binding);
177176

178-
if rule_wrapper.optimize_type() == OptimizeType::Heuristics {
179-
panic!("no more heuristics rule in cascades");
180-
}
181-
182177
for expr in applied {
183178
trace!(event = "after_apply_rule", task = "apply_rule", output_binding=%expr);
184179
// TODO: remove clone in the below line

optd-core/src/cascades/tasks/optimize_expression.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ impl<T: NodeType, M: Memo<T>> Task<T, M> for OptimizeExpressionTask {
3939
let expr = optimizer.get_expr_memoed(self.expr_id);
4040
trace!(event = "task_begin", task = "optimize_expr", expr_id = %self.expr_id, expr = %expr);
4141
let mut tasks = vec![];
42-
for (rule_id, rule_wrapper) in optimizer.rules().iter().enumerate() {
43-
let rule = rule_wrapper.rule();
42+
for (rule_id, rule) in optimizer.rules().iter().enumerate() {
4443
if optimizer.is_rule_fired(self.expr_id, rule_id) {
4544
continue;
4645
}

optd-core/src/rules.rs

-50
Original file line numberDiff line numberDiff line change
@@ -5,61 +5,11 @@
55

66
mod ir;
77

8-
use std::fmt::{Display, Formatter};
9-
use std::sync::Arc;
10-
118
pub use ir::RuleMatcher;
129

1310
use crate::nodes::{ArcPlanNode, NodeType, PlanNodeOrGroup};
1411
use crate::optimizer::Optimizer;
1512

16-
#[derive(Clone, Copy, Debug, PartialEq)]
17-
pub enum OptimizeType {
18-
Cascades,
19-
Heuristics,
20-
}
21-
22-
impl Display for OptimizeType {
23-
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
24-
match self {
25-
Self::Cascades => write!(f, "cascades"),
26-
Self::Heuristics => write!(f, "heuristics"),
27-
}
28-
}
29-
}
30-
31-
pub struct RuleWrapper<T: NodeType, O: Optimizer<T>> {
32-
pub rule: Arc<dyn Rule<T, O>>,
33-
pub optimize_type: OptimizeType,
34-
}
35-
36-
impl<T: NodeType, O: Optimizer<T>> RuleWrapper<T, O> {
37-
pub fn new(rule: Arc<dyn Rule<T, O>>, optimizer_type: OptimizeType) -> Self {
38-
Self {
39-
rule,
40-
optimize_type: optimizer_type,
41-
}
42-
}
43-
pub fn new_cascades(rule: Arc<dyn Rule<T, O>>) -> Arc<Self> {
44-
Arc::new(Self {
45-
rule,
46-
optimize_type: OptimizeType::Cascades,
47-
})
48-
}
49-
pub fn new_heuristic(rule: Arc<dyn Rule<T, O>>) -> Arc<Self> {
50-
Arc::new(Self {
51-
rule,
52-
optimize_type: OptimizeType::Heuristics,
53-
})
54-
}
55-
pub fn rule(&self) -> Arc<dyn Rule<T, O>> {
56-
self.rule.clone()
57-
}
58-
pub fn optimize_type(&self) -> OptimizeType {
59-
self.optimize_type
60-
}
61-
}
62-
6313
// TODO: docs, possible renames.
6414
// TODO: Why do we have all of these match types? Seems like possible overkill.
6515
pub trait Rule<T: NodeType, O: Optimizer<T>>: 'static + Send + Sync {

optd-datafusion-bridge/Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ datafusion = "32.0.0"
1515
datafusion-expr = "32.0.0"
1616
async-trait = "0.1"
1717
itertools = "0.13"
18-
optd-core = { path = "../optd-core" }
19-
optd-datafusion-repr = { path = "../optd-datafusion-repr" }
18+
optd-core = { path = "../optd-core", version = "0.1" }
19+
optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" }
2020
anyhow = "1"
2121
async-recursion = "1"
2222
futures-lite = "2"
+10-7
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,26 @@
11
[package]
22
name = "optd-datafusion-repr-adv-cost"
3-
version = "0.1.0"
4-
edition = "2021"
5-
6-
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
3+
description = "datafusion plan representation for optd"
4+
version = { workspace = true }
5+
edition = { workspace = true }
6+
homepage = { workspace = true }
7+
keywords = { workspace = true }
8+
license = { workspace = true }
9+
repository = { workspace = true }
710

811
[dependencies]
912
anyhow = "1"
1013
arrow-schema = "47.0.0"
1114
assert_approx_eq = "1.1.0"
1215
datafusion = "32.0.0"
1316
ordered-float = "4"
14-
optd-datafusion-repr = { path = "../optd-datafusion-repr" }
15-
optd-core = { path = "../optd-core" }
17+
optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" }
18+
optd-core = { path = "../optd-core", version = "0.1" }
1619
serde = { version = "1.0", features = ["derive"] }
1720
rayon = "1.10"
1821
itertools = "0.13"
1922
test-case = "3.3"
2023
tracing = "0.1"
2124
tracing-subscriber = "0.3"
22-
optd-gungnir = { path = "../optd-gungnir" }
25+
optd-gungnir = { path = "../optd-gungnir", version = "0.1" }
2326
serde_with = { version = "3.7.0", features = ["json"] }

optd-datafusion-repr/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ tracing = "0.1"
1717
tracing-subscriber = "0.3"
1818
pretty-xmlish = "0.1"
1919
itertools = "0.13"
20-
optd-core = { path = "../optd-core" }
20+
optd-core = { path = "../optd-core", version = "0.1" }
2121
camelpaste = "0.1"
2222
datafusion-expr = "32.0.0"
2323
serde = { version = "1.0", features = ["derive"] }

0 commit comments

Comments
 (0)