From c77a719d013bae480609bd06b35cbe90c0dd5af0 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Thu, 7 Nov 2024 22:39:12 -0500 Subject: [PATCH] refactor(core): remove rule wrapper (#229) yet another thing we can remove after the predicate refactor. --------- Signed-off-by: Alex Chi --- Cargo.lock | 17 ++-- Cargo.toml | 2 +- datafusion-optd-cli/Cargo.toml | 11 ++- optd-core/src/cascades/optimizer.rs | 10 +-- optd-core/src/cascades/tasks/apply_rule.rs | 11 +-- .../src/cascades/tasks/optimize_expression.rs | 3 +- optd-core/src/rules.rs | 50 ----------- optd-datafusion-bridge/Cargo.toml | 4 +- optd-datafusion-repr-adv-cost/Cargo.toml | 17 ++-- optd-datafusion-repr/Cargo.toml | 2 +- optd-datafusion-repr/src/lib.rs | 89 ++++++------------- optd-sqlplannertest/Cargo.toml | 19 ++-- optd-sqlplannertest/src/lib.rs | 4 +- 13 files changed, 76 insertions(+), 163 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eb1f6c11..1e21b9bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2924,7 +2924,7 @@ dependencies = [ [[package]] name = "optd-core" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "arrow-schema", @@ -2938,7 +2938,7 @@ dependencies = [ [[package]] name = "optd-datafusion-bridge" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "async-recursion", @@ -2955,7 +2955,7 @@ dependencies = [ [[package]] name = "optd-datafusion-repr" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "arrow-schema", @@ -2972,7 +2972,7 @@ dependencies = [ [[package]] name = "optd-datafusion-repr-adv-cost" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "arrow-schema", @@ -2993,7 +2993,7 @@ dependencies = [ [[package]] name = "optd-gungnir" -version = "0.1.0" +version = "0.1.1" dependencies = [ "crossbeam", "hashbrown 0.14.5", @@ -3039,7 +3039,7 @@ dependencies = [ [[package]] name = "optd-sqlplannertest" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "async-trait", @@ -4042,8 +4042,9 @@ dependencies = [ [[package]] name = "sqlplannertest" -version = "0.1.0" -source = "git+https://github.com/risinglightdb/sqlplannertest-rs?branch=main#6122d5be20383c0a6a50327258ea9da8e2d72df5" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40df613c24d7066362c37ef5047373532700efb87856088212a47f7b273c7424" dependencies = [ "anyhow", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index d23689f7..75a08d58 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ members = [ resolver = "2" [workspace.package] -version = "0.1.0" +version = "0.1.1" edition = "2021" homepage = "https://github.com/cmu-db/optd" keywords = ["sql", "database", "optimizer", "datafusion"] diff --git a/datafusion-optd-cli/Cargo.toml b/datafusion-optd-cli/Cargo.toml index 5ddd83c7..b97b1f2b 100644 --- a/datafusion-optd-cli/Cargo.toml +++ b/datafusion-optd-cli/Cargo.toml @@ -19,12 +19,11 @@ name = "datafusion-optd-cli" description = "Command Line Client for DataFusion query engine." version = "32.0.0" -authors = ["Apache Arrow "] edition = "2021" keywords = ["arrow", "datafusion", "query", "sql"] license = "Apache-2.0" -homepage = "https://github.com/apache/arrow-datafusion" -repository = "https://github.com/apache/arrow-datafusion" +homepage = "https://github.com/cmu-db/optd" +repository = "https://github.com/cmu-db/optd" rust-version = "1.70" readme = "README.md" @@ -57,9 +56,9 @@ tokio = { version = "1.24", features = [ "parking_lot", ] } url = "2.2" -optd-datafusion-bridge = { path = "../optd-datafusion-bridge" } -optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost" } -optd-datafusion-repr = { path = "../optd-datafusion-repr" } +optd-datafusion-bridge = { path = "../optd-datafusion-bridge", version = "0.1" } +optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost", version = "0.1" } +optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" } tracing-subscriber = "0.3" tracing = "0.1" diff --git a/optd-core/src/cascades/optimizer.rs b/optd-core/src/cascades/optimizer.rs index dd6d5f34..fc3767dd 100644 --- a/optd-core/src/cascades/optimizer.rs +++ b/optd-core/src/cascades/optimizer.rs @@ -20,7 +20,7 @@ use crate::nodes::{ }; use crate::optimizer::Optimizer; use crate::property::{PropertyBuilder, PropertyBuilderAny}; -use crate::rules::RuleWrapper; +use crate::rules::Rule; pub type RuleId = usize; @@ -47,7 +47,7 @@ pub struct CascadesOptimizer = NaiveMemo> { explored_group: HashSet, explored_expr: HashSet, fired_rules: HashMap>, - rules: Arc<[Arc>]>, + rules: Arc<[Arc>]>, disabled_rules: HashSet, cost: Arc>, property_builders: Arc<[Box>]>, @@ -94,7 +94,7 @@ impl Display for PredId { impl CascadesOptimizer> { pub fn new( - rules: Vec>>, + rules: Vec>>, cost: Box>>, property_builders: Vec>>, ) -> Self { @@ -102,7 +102,7 @@ impl CascadesOptimizer> { } pub fn new_with_prop( - rules: Vec>>, + rules: Vec>>, cost: Box>>, property_builders: Vec>>, prop: OptimizerProperties, @@ -153,7 +153,7 @@ impl> CascadesOptimizer { self.cost.clone() } - pub fn rules(&self) -> Arc<[Arc>]> { + pub fn rules(&self) -> Arc<[Arc>]> { self.rules.clone() } diff --git a/optd-core/src/cascades/tasks/apply_rule.rs b/optd-core/src/cascades/tasks/apply_rule.rs index 2db820ff..b6e62efb 100644 --- a/optd-core/src/cascades/tasks/apply_rule.rs +++ b/optd-core/src/cascades/tasks/apply_rule.rs @@ -15,7 +15,7 @@ use crate::cascades::optimizer::{CascadesOptimizer, ExprId, RuleId}; use crate::cascades::tasks::{OptimizeExpressionTask, OptimizeInputsTask}; use crate::cascades::{GroupId, Memo}; use crate::nodes::{ArcPlanNode, NodeType, PlanNode, PlanNodeOrGroup}; -use crate::rules::{OptimizeType, RuleMatcher}; +use crate::rules::RuleMatcher; pub struct ApplyRuleTask { rule_id: RuleId, @@ -164,10 +164,9 @@ impl> Task for ApplyRuleTask { return Ok(vec![]); } - let rule_wrapper = optimizer.rules()[self.rule_id].clone(); - let rule = rule_wrapper.rule(); + let rule = optimizer.rules()[self.rule_id].clone(); - trace!(event = "task_begin", task = "apply_rule", expr_id = %self.expr_id, rule_id = %self.rule_id, rule = %rule.name(), optimize_type=%rule_wrapper.optimize_type()); + trace!(event = "task_begin", task = "apply_rule", expr_id = %self.expr_id, rule_id = %self.rule_id, rule = %rule.name()); let group_id = optimizer.get_group_id(self.expr_id); let mut tasks = vec![]; let binding_exprs = match_and_pick_expr(rule.matcher(), self.expr_id, optimizer); @@ -175,10 +174,6 @@ impl> Task for ApplyRuleTask { trace!(event = "before_apply_rule", task = "apply_rule", input_binding=%binding); let applied = rule.apply(optimizer, binding); - if rule_wrapper.optimize_type() == OptimizeType::Heuristics { - panic!("no more heuristics rule in cascades"); - } - for expr in applied { trace!(event = "after_apply_rule", task = "apply_rule", output_binding=%expr); // TODO: remove clone in the below line diff --git a/optd-core/src/cascades/tasks/optimize_expression.rs b/optd-core/src/cascades/tasks/optimize_expression.rs index 9c900e90..a47f01a7 100644 --- a/optd-core/src/cascades/tasks/optimize_expression.rs +++ b/optd-core/src/cascades/tasks/optimize_expression.rs @@ -39,8 +39,7 @@ impl> Task for OptimizeExpressionTask { let expr = optimizer.get_expr_memoed(self.expr_id); trace!(event = "task_begin", task = "optimize_expr", expr_id = %self.expr_id, expr = %expr); let mut tasks = vec![]; - for (rule_id, rule_wrapper) in optimizer.rules().iter().enumerate() { - let rule = rule_wrapper.rule(); + for (rule_id, rule) in optimizer.rules().iter().enumerate() { if optimizer.is_rule_fired(self.expr_id, rule_id) { continue; } diff --git a/optd-core/src/rules.rs b/optd-core/src/rules.rs index 0442eca1..4d8e71d2 100644 --- a/optd-core/src/rules.rs +++ b/optd-core/src/rules.rs @@ -5,61 +5,11 @@ mod ir; -use std::fmt::{Display, Formatter}; -use std::sync::Arc; - pub use ir::RuleMatcher; use crate::nodes::{ArcPlanNode, NodeType, PlanNodeOrGroup}; use crate::optimizer::Optimizer; -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum OptimizeType { - Cascades, - Heuristics, -} - -impl Display for OptimizeType { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - Self::Cascades => write!(f, "cascades"), - Self::Heuristics => write!(f, "heuristics"), - } - } -} - -pub struct RuleWrapper> { - pub rule: Arc>, - pub optimize_type: OptimizeType, -} - -impl> RuleWrapper { - pub fn new(rule: Arc>, optimizer_type: OptimizeType) -> Self { - Self { - rule, - optimize_type: optimizer_type, - } - } - pub fn new_cascades(rule: Arc>) -> Arc { - Arc::new(Self { - rule, - optimize_type: OptimizeType::Cascades, - }) - } - pub fn new_heuristic(rule: Arc>) -> Arc { - Arc::new(Self { - rule, - optimize_type: OptimizeType::Heuristics, - }) - } - pub fn rule(&self) -> Arc> { - self.rule.clone() - } - pub fn optimize_type(&self) -> OptimizeType { - self.optimize_type - } -} - // TODO: docs, possible renames. // TODO: Why do we have all of these match types? Seems like possible overkill. pub trait Rule>: 'static + Send + Sync { diff --git a/optd-datafusion-bridge/Cargo.toml b/optd-datafusion-bridge/Cargo.toml index 51370608..047523d2 100644 --- a/optd-datafusion-bridge/Cargo.toml +++ b/optd-datafusion-bridge/Cargo.toml @@ -15,8 +15,8 @@ datafusion = "32.0.0" datafusion-expr = "32.0.0" async-trait = "0.1" itertools = "0.13" -optd-core = { path = "../optd-core" } -optd-datafusion-repr = { path = "../optd-datafusion-repr" } +optd-core = { path = "../optd-core", version = "0.1" } +optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" } anyhow = "1" async-recursion = "1" futures-lite = "2" diff --git a/optd-datafusion-repr-adv-cost/Cargo.toml b/optd-datafusion-repr-adv-cost/Cargo.toml index d36a7d29..4d84a0e8 100644 --- a/optd-datafusion-repr-adv-cost/Cargo.toml +++ b/optd-datafusion-repr-adv-cost/Cargo.toml @@ -1,9 +1,12 @@ [package] name = "optd-datafusion-repr-adv-cost" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +description = "datafusion plan representation for optd" +version = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +repository = { workspace = true } [dependencies] anyhow = "1" @@ -11,13 +14,13 @@ arrow-schema = "47.0.0" assert_approx_eq = "1.1.0" datafusion = "32.0.0" ordered-float = "4" -optd-datafusion-repr = { path = "../optd-datafusion-repr" } -optd-core = { path = "../optd-core" } +optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" } +optd-core = { path = "../optd-core", version = "0.1" } serde = { version = "1.0", features = ["derive"] } rayon = "1.10" itertools = "0.13" test-case = "3.3" tracing = "0.1" tracing-subscriber = "0.3" -optd-gungnir = { path = "../optd-gungnir" } +optd-gungnir = { path = "../optd-gungnir", version = "0.1" } serde_with = { version = "3.7.0", features = ["json"] } diff --git a/optd-datafusion-repr/Cargo.toml b/optd-datafusion-repr/Cargo.toml index bb83fe80..b9ea8e9a 100644 --- a/optd-datafusion-repr/Cargo.toml +++ b/optd-datafusion-repr/Cargo.toml @@ -17,7 +17,7 @@ tracing = "0.1" tracing-subscriber = "0.3" pretty-xmlish = "0.1" itertools = "0.13" -optd-core = { path = "../optd-core" } +optd-core = { path = "../optd-core", version = "0.1" } camelpaste = "0.1" datafusion-expr = "32.0.0" serde = { version = "1.0", features = ["derive"] } diff --git a/optd-datafusion-repr/src/lib.rs b/optd-datafusion-repr/src/lib.rs index 84789f6b..2e80ff19 100644 --- a/optd-datafusion-repr/src/lib.rs +++ b/optd-datafusion-repr/src/lib.rs @@ -18,7 +18,7 @@ use optd_core::nodes::PlanNodeMetaMap; pub use optd_core::nodes::Value; use optd_core::optimizer::Optimizer; use optd_core::property::PropertyBuilderAny; -use optd_core::rules::{Rule, RuleWrapper}; +use optd_core::rules::Rule; pub use optimizer_ext::OptimizerExt; use plan_nodes::{ArcDfPlanNode, DfNodeType}; use properties::column_ref::ColumnRefPropertyBuilder; @@ -94,58 +94,28 @@ impl DatafusionOptimizer { ] } - pub fn default_cascades_rules( - ) -> Vec>>> { + pub fn default_cascades_rules() -> Vec>>> + { let rules = rules::PhysicalConversionRule::all_conversions(); let mut rule_wrappers = vec![]; for rule in rules { - rule_wrappers.push(RuleWrapper::new_cascades(rule)); + rule_wrappers.push(rule); } - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::FilterProjectTransposeRule::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::FilterCrossJoinTransposeRule::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::FilterInnerJoinTransposeRule::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::FilterSortTransposeRule::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::FilterAggTransposeRule::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::HashJoinRule::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::JoinCommuteRule::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::JoinAssocRule::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::ProjectionPullUpJoin::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::EliminateProjectRule::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::ProjectMergeRule::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::EliminateLimitRule::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::EliminateJoinRule::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::EliminateFilterRule::new(), - ))); - rule_wrappers.push(RuleWrapper::new_cascades(Arc::new( - rules::ProjectFilterTransposeRule::new(), - ))); + rule_wrappers.push(Arc::new(rules::FilterProjectTransposeRule::new())); + rule_wrappers.push(Arc::new(rules::FilterCrossJoinTransposeRule::new())); + rule_wrappers.push(Arc::new(rules::FilterInnerJoinTransposeRule::new())); + rule_wrappers.push(Arc::new(rules::FilterSortTransposeRule::new())); + rule_wrappers.push(Arc::new(rules::FilterAggTransposeRule::new())); + rule_wrappers.push(Arc::new(rules::HashJoinRule::new())); + rule_wrappers.push(Arc::new(rules::JoinCommuteRule::new())); + rule_wrappers.push(Arc::new(rules::JoinAssocRule::new())); + rule_wrappers.push(Arc::new(rules::ProjectionPullUpJoin::new())); + rule_wrappers.push(Arc::new(rules::EliminateProjectRule::new())); + rule_wrappers.push(Arc::new(rules::ProjectMergeRule::new())); + rule_wrappers.push(Arc::new(rules::EliminateLimitRule::new())); + rule_wrappers.push(Arc::new(rules::EliminateJoinRule::new())); + rule_wrappers.push(Arc::new(rules::EliminateFilterRule::new())); + rule_wrappers.push(Arc::new(rules::ProjectFilterTransposeRule::new())); rule_wrappers } @@ -199,22 +169,13 @@ impl DatafusionOptimizer { let rules = rules::PhysicalConversionRule::all_conversions(); let mut rule_wrappers = Vec::new(); for rule in rules { - rule_wrappers.push(RuleWrapper::new_cascades(rule)); + rule_wrappers.push(rule); } - // rule_wrappers.push(RuleWrapper::new_cascades(Arc::new(HashJoinRule::new()))); - // rule_wrappers.insert( - // 0, - // RuleWrapper::new_cascades(Arc::new(JoinCommuteRule::new())), - // ); - // rule_wrappers.insert(1, RuleWrapper::new_cascades(Arc::new(JoinAssocRule::new()))); - // rule_wrappers.insert( - // 2, - // RuleWrapper::new_cascades(Arc::new(ProjectionPullUpJoin::new())), - // ); - // rule_wrappers.insert( - // 3, - // RuleWrapper::new_heuristic(Arc::new(EliminateFilterRule::new())), - // ); + rule_wrappers.push(Arc::new(rules::HashJoinRule::new())); + rule_wrappers.insert(0, Arc::new(rules::JoinCommuteRule::new())); + rule_wrappers.insert(1, Arc::new(rules::JoinAssocRule::new())); + rule_wrappers.insert(2, Arc::new(rules::ProjectionPullUpJoin::new())); + rule_wrappers.insert(3, Arc::new(rules::EliminateFilterRule::new())); let cost_model = AdaptiveCostModel::new(1000); let runtime_statistics = cost_model.get_runtime_map(); diff --git a/optd-sqlplannertest/Cargo.toml b/optd-sqlplannertest/Cargo.toml index bff901ca..f80ffb93 100644 --- a/optd-sqlplannertest/Cargo.toml +++ b/optd-sqlplannertest/Cargo.toml @@ -1,17 +1,22 @@ [package] name = "optd-sqlplannertest" -version = "0.1.0" -edition = "2021" +description = "sqlplannertest for optd" +version = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +repository = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] clap = { version = "4.5.4", features = ["derive"] } anyhow = { version = "1", features = ["backtrace"] } -sqlplannertest = { git = "https://github.com/risinglightdb/sqlplannertest-rs", branch = "main" } +sqlplannertest = "0.2" async-trait = "0.1" -datafusion-optd-cli = { path = "../datafusion-optd-cli" } -optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost" } +datafusion-optd-cli = { path = "../datafusion-optd-cli", version = "32.0.0" } +optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost", version = "0.1" } datafusion = { version = "32.0.0", features = [ "avro", "crypto_expressions", @@ -30,8 +35,8 @@ tokio = { version = "1.24", features = [ "sync", "parking_lot", ] } -optd-datafusion-bridge = { path = "../optd-datafusion-bridge" } -optd-datafusion-repr = { path = "../optd-datafusion-repr" } +optd-datafusion-bridge = { path = "../optd-datafusion-bridge", version = "0.1" } +optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" } itertools = "0.13" lazy_static = "1.4.0" diff --git a/optd-sqlplannertest/src/lib.rs b/optd-sqlplannertest/src/lib.rs index d96a6a66..c556d9d0 100644 --- a/optd-sqlplannertest/src/lib.rs +++ b/optd-sqlplannertest/src/lib.rs @@ -141,7 +141,7 @@ impl DatafusionDBMS { guard.as_mut().unwrap().enable_heuristic(true); } else { for (rule_id, rule) in rules.as_ref().iter().enumerate() { - if rule.rule.is_impl_rule() { + if rule.is_impl_rule() { optimizer.enable_rule(rule_id); } else { optimizer.disable_rule(rule_id); @@ -153,7 +153,7 @@ impl DatafusionDBMS { .map(|x| x.as_str()) .collect::>(); for (rule_id, rule) in rules.as_ref().iter().enumerate() { - if rules_to_enable.remove(rule.rule.name()) { + if rules_to_enable.remove(rule.name()) { optimizer.enable_rule(rule_id); } }