Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 75f9fe6

Browse files
authored
refactor(core): drop value-bag dependency for statistics (#227)
value-bag was added for easy ser/deserialization of the statistics. However, after second thought, this wouldn't be possible to be compatible with serde API: when someone wants to deserialize something, they need to know the underlying type to deserialize, and this info is erased behind `dyn`. Therefore, the correct approach to enable serialization for the properties is to have two new methods on the PropertyBuilder / CostModel trait: `serialize_stats(&self, &dyn Any) -> serde_json::Value` and `deserialize_stats(&self, serde_json::Value) -> Box<dyn Any>`. --------- Signed-off-by: Alex Chi <[email protected]>
1 parent 4f84645 commit 75f9fe6

File tree

8 files changed

+23
-156
lines changed

8 files changed

+23
-156
lines changed

Cargo.lock

+9-143
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

optd-core/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@ itertools = "0.13"
1818
serde = { version = "1.0", features = ["derive", "rc"] }
1919
arrow-schema = "47.0.0"
2020
chrono = "0.4"
21-
value-bag = { version = "1", features = ["owned"] }
21+
erased-serde = "0.4"

optd-core/src/cascades/memo.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ impl<T: NodeType> std::fmt::Display for MemoPlanNode<T> {
4141
}
4242
}
4343

44-
#[derive(Debug, Clone)]
44+
#[derive(Clone)]
4545
pub struct WinnerInfo {
4646
pub expr_id: ExprId,
4747
pub total_weighted_cost: f64,
@@ -51,7 +51,7 @@ pub struct WinnerInfo {
5151
pub statistics: Arc<Statistics>,
5252
}
5353

54-
#[derive(Debug, Clone)]
54+
#[derive(Clone)]
5555
pub enum Winner {
5656
Unknown,
5757
Impossible,
@@ -81,7 +81,7 @@ impl Default for Winner {
8181
}
8282
}
8383

84-
#[derive(Default, Debug, Clone)]
84+
#[derive(Default, Clone)]
8585
pub struct GroupInfo {
8686
pub winner: Winner,
8787
}

optd-core/src/cost.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@ use crate::cascades::{CascadesOptimizer, Memo, RelNodeContext};
77
use crate::nodes::{ArcPredNode, NodeType};
88

99
/// The statistics of a group.
10-
#[derive(Clone, Debug)]
11-
pub struct Statistics(pub value_bag::OwnedValueBag);
10+
pub struct Statistics(pub Box<dyn std::any::Any + Send + Sync + 'static>);
1211

1312
/// The cost of an operation. The cost is represented as a vector of double values.
1413
/// For example, it can be represented as `[compute_cost, io_cost]`.

optd-core/src/nodes.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ impl<T: NodeType> PredNode<T> {
358358
}
359359

360360
/// Metadata for a rel node.
361-
#[derive(Clone, Debug)]
361+
#[derive(Clone)]
362362
pub struct PlanNodeMeta {
363363
/// The group (id) of the `RelNode`
364364
pub group_id: GroupId,

optd-datafusion-repr/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,3 @@ camelpaste = "0.1"
2222
datafusion-expr = "32.0.0"
2323
serde = { version = "1.0", features = ["derive"] }
2424
bincode = "1.3.3"
25-
value-bag = { version = "1", features = ["owned"] }

optd-datafusion-repr/src/cost/base_cost.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,14 @@ use std::collections::HashMap;
88
use itertools::Itertools;
99
use optd_core::cascades::{CascadesOptimizer, NaiveMemo, RelNodeContext};
1010
use optd_core::cost::{Cost, CostModel, Statistics};
11-
use value_bag::ValueBag;
1211

1312
use crate::plan_nodes::{ArcDfPredNode, ConstantPred, DfNodeType, DfReprPredNode};
1413

14+
#[derive(Debug, Clone)]
15+
pub struct DfStatistics {
16+
row_cnt: f64,
17+
}
18+
1519
pub struct DfCostModel {
1620
table_stat: HashMap<String, usize>,
1721
}
@@ -31,15 +35,15 @@ impl DfCostModel {
3135
}
3236

3337
pub fn row_cnt(Statistics(stat): &Statistics) -> f64 {
34-
stat.by_ref().as_f64()
38+
stat.downcast_ref::<DfStatistics>().unwrap().row_cnt
3539
}
3640

3741
pub fn cost(compute_cost: f64, io_cost: f64) -> Cost {
3842
Cost(vec![compute_cost, io_cost])
3943
}
4044

4145
pub fn stat(row_cnt: f64) -> Statistics {
42-
Statistics(ValueBag::from_f64(row_cnt).to_owned())
46+
Statistics(Box::new(DfStatistics { row_cnt }))
4347
}
4448

4549
pub fn cost_tuple(Cost(cost): &Cost) -> (f64, f64) {

optd-datafusion-repr/src/testing/dummy_cost.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
use optd_core::cascades::{CascadesOptimizer, NaiveMemo, RelNodeContext};
77
use optd_core::cost::{Cost, CostModel, Statistics};
8-
use value_bag::ValueBag;
98

109
use crate::plan_nodes::{ArcDfPredNode, DfNodeType};
1110

@@ -36,7 +35,7 @@ impl CostModel<DfNodeType, NaiveMemo<DfNodeType>> for DummyCostModel {
3635
_: Option<RelNodeContext>,
3736
_: Option<&CascadesOptimizer<DfNodeType>>,
3837
) -> Statistics {
39-
Statistics(ValueBag::empty().to_owned())
38+
Statistics(Box::new(()))
4039
}
4140

4241
fn explain_cost(&self, _: &Cost) -> String {

0 commit comments

Comments
 (0)