Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 7915fb9

Browse files
authored
Fix: fix empty relation schema maintainence across passes (#118)
# Why we need this pr? Previously. EmptyRelation's schema is derived from its logically equivalent peer exprs. For example, the schema for `select * from empty_relation` is deducted from `select * from t1 inner join t2 on false`. And the empty relation expression will replace the original inner join. It works fine in the first pass as empty relation is always transformed from expressions having proper schema(except for queries like `select 64;`, it is directly converted from datafusion and has empty schema). But it will break adaptive design as the next time query is executed, there's only empty relation left in the optimization tree and there's no way to deduct schema properties from pure empty relation. # Design we serialize schema as emptyRelation's data value and it is stored in its relnode, once we need the schema we fetch it from empty relation's data. # Major Changes - schema prop maintains the type for ColumnRef typ and List typ - adds convertion from Value to ConstantType to convert data to schema field - adds serialized data to EmptyRelation data so schema is keeped - change EmptyRelation initialization method - change columnExpr derivation rules for emptyRelation --------- Signed-off-by: AveryQi115 <[email protected]>
1 parent ee02c09 commit 7915fb9

File tree

14 files changed

+163
-48
lines changed

14 files changed

+163
-48
lines changed

Diff for: Cargo.lock

+11
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: optd-core/src/rel_node.rs

+7
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,13 @@ impl Value {
151151
_ => panic!("Value is not a string"),
152152
}
153153
}
154+
155+
pub fn as_slice(&self) -> Arc<[u8]> {
156+
match self {
157+
Value::Serialized(i) => i.clone(),
158+
_ => panic!("Value is not a serialized"),
159+
}
160+
}
154161
}
155162

156163
/// A RelNode is consisted of a plan node type and some children.

Diff for: optd-datafusion-bridge/src/from_optd.rs

+1-4
Original file line numberDiff line numberDiff line change
@@ -548,10 +548,6 @@ impl OptdPlanContext<'_> {
548548
node: PlanNode,
549549
meta: &RelNodeMetaMap,
550550
) -> Result<Arc<dyn ExecutionPlan>> {
551-
let mut schema = OptdSchema { fields: vec![] };
552-
if node.typ() == OptRelNodeTyp::PhysicalEmptyRelation {
553-
schema = node.schema(self.optimizer.unwrap().optd_optimizer());
554-
}
555551
let rel_node = node.into_rel_node();
556552

557553
let group_id = meta
@@ -599,6 +595,7 @@ impl OptdPlanContext<'_> {
599595
}
600596
OptRelNodeTyp::PhysicalEmptyRelation => {
601597
let physical_node = PhysicalEmptyRelation::from_rel_node(rel_node).unwrap();
598+
let schema = physical_node.empty_relation_schema();
602599
let datafusion_schema: Schema = from_optd_schema(schema);
603600
Arc::new(datafusion::physical_plan::empty::EmptyExec::new(
604601
physical_node.produce_one_row(),

Diff for: optd-datafusion-bridge/src/into_optd.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use optd_datafusion_repr::plan_nodes::{
1212
LogicalEmptyRelation, LogicalFilter, LogicalJoin, LogicalLimit, LogicalProjection, LogicalScan,
1313
LogicalSort, OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode, SortOrderExpr, SortOrderType,
1414
};
15+
use optd_datafusion_repr::properties::schema::Schema as OPTDSchema;
1516

1617
use crate::OptdPlanContext;
1718

@@ -366,7 +367,12 @@ impl OptdPlanContext<'_> {
366367
&mut self,
367368
node: &logical_plan::EmptyRelation,
368369
) -> Result<LogicalEmptyRelation> {
369-
Ok(LogicalEmptyRelation::new(node.produce_one_row))
370+
// empty_relation from datafusion always have an empty schema
371+
let empty_schema = OPTDSchema { fields: vec![] };
372+
Ok(LogicalEmptyRelation::new(
373+
node.produce_one_row,
374+
empty_schema,
375+
))
370376
}
371377

372378
fn conv_into_optd_limit(&mut self, node: &logical_plan::Limit) -> Result<LogicalLimit> {

Diff for: optd-datafusion-repr/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ datafusion-expr = "32.0.0"
2222
async-trait = "0.1"
2323
datafusion = "32.0.0"
2424
assert_approx_eq = "1.1.0"
25+
serde = { version = "1.0", features = ["derive"] }
26+
bincode = "1.3.3"

Diff for: optd-datafusion-repr/src/plan_nodes.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use optd_core::{
2323

2424
pub use agg::{LogicalAgg, PhysicalAgg};
2525
pub use apply::{ApplyType, LogicalApply};
26-
pub use empty_relation::{LogicalEmptyRelation, PhysicalEmptyRelation};
26+
pub use empty_relation::{EmptyRelationData, LogicalEmptyRelation, PhysicalEmptyRelation};
2727
pub use expr::{
2828
BetweenExpr, BinOpExpr, BinOpType, CastExpr, ColumnRefExpr, ConstantExpr, ConstantType,
2929
DataTypeExpr, ExprList, FuncExpr, FuncType, InListExpr, LikeExpr, LogOpExpr, LogOpType,

Diff for: optd-datafusion-repr/src/plan_nodes/empty_relation.rs

+50-8
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
use pretty_xmlish::Pretty;
22

3+
use bincode;
34
use optd_core::rel_node::{RelNode, RelNodeMetaMap, Value};
5+
use serde::{Deserialize, Serialize};
6+
use std::sync::Arc;
47

58
use crate::explain::Insertable;
69

710
use super::{replace_typ, OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode};
811

12+
use crate::properties::schema::Schema;
13+
914
#[derive(Clone, Debug)]
1015
pub struct LogicalEmptyRelation(pub PlanNode);
1116

@@ -29,25 +34,49 @@ impl OptRelNode for LogicalEmptyRelation {
2934
}
3035
}
3136

37+
#[derive(Clone, Debug, Serialize, Deserialize)]
38+
pub struct EmptyRelationData {
39+
pub produce_one_row: bool,
40+
pub schema: Schema,
41+
}
42+
3243
impl LogicalEmptyRelation {
33-
pub fn new(produce_one_row: bool) -> LogicalEmptyRelation {
44+
pub fn new(produce_one_row: bool, schema: Schema) -> LogicalEmptyRelation {
45+
let data = EmptyRelationData {
46+
produce_one_row,
47+
schema,
48+
};
49+
let serialized_data: Arc<[u8]> = bincode::serialize(&data).unwrap().into_iter().collect();
3450
LogicalEmptyRelation(PlanNode(
3551
RelNode {
3652
typ: OptRelNodeTyp::EmptyRelation,
3753
children: vec![],
38-
data: Some(Value::Bool(produce_one_row)),
54+
data: Some(Value::Serialized(serialized_data)),
3955
}
4056
.into(),
4157
))
4258
}
4359

44-
pub fn produce_one_row(&self) -> bool {
45-
self.clone()
60+
fn get_data(&self) -> EmptyRelationData {
61+
let serialized_data = self
62+
.clone()
4663
.into_rel_node()
4764
.data
4865
.as_ref()
4966
.unwrap()
50-
.as_bool()
67+
.as_slice();
68+
69+
bincode::deserialize(serialized_data.as_ref()).unwrap()
70+
}
71+
72+
pub fn empty_relation_schema(&self) -> Schema {
73+
let data = self.get_data();
74+
data.schema
75+
}
76+
77+
pub fn produce_one_row(&self) -> bool {
78+
let data = self.get_data();
79+
data.produce_one_row
5180
}
5281
}
5382

@@ -80,12 +109,25 @@ impl PhysicalEmptyRelation {
80109
Self(node)
81110
}
82111

83-
pub fn produce_one_row(&self) -> bool {
84-
self.clone()
112+
fn get_data(&self) -> EmptyRelationData {
113+
let serialized_data = self
114+
.clone()
85115
.into_rel_node()
86116
.data
87117
.as_ref()
88118
.unwrap()
89-
.as_bool()
119+
.as_slice();
120+
121+
bincode::deserialize(serialized_data.as_ref()).unwrap()
122+
}
123+
124+
pub fn produce_one_row(&self) -> bool {
125+
let data = self.get_data();
126+
data.produce_one_row
127+
}
128+
129+
pub fn empty_relation_schema(&self) -> Schema {
130+
let data = self.get_data();
131+
data.schema
90132
}
91133
}

Diff for: optd-datafusion-repr/src/plan_nodes/expr.rs

+15-8
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::{fmt::Display, sync::Arc};
33
use arrow_schema::DataType;
44
use itertools::Itertools;
55
use pretty_xmlish::Pretty;
6+
use serde::{Deserialize, Serialize};
67

78
use optd_core::rel_node::{RelNode, RelNodeMetaMap, Value};
89

@@ -65,7 +66,7 @@ impl OptRelNode for ExprList {
6566
}
6667
}
6768

68-
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
69+
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, Serialize, Deserialize)]
6970
pub enum ConstantType {
7071
Bool,
7172
Utf8String,
@@ -84,12 +85,9 @@ pub enum ConstantType {
8485
Any,
8586
}
8687

87-
#[derive(Clone, Debug)]
88-
pub struct ConstantExpr(pub Expr);
89-
90-
impl ConstantExpr {
91-
pub fn new(value: Value) -> Self {
92-
let typ = match &value {
88+
impl ConstantType {
89+
pub fn get_data_type_from_value(value: &Value) -> Self {
90+
match value {
9391
Value::Bool(_) => ConstantType::Bool,
9492
Value::String(_) => ConstantType::Utf8String,
9593
Value::UInt8(_) => ConstantType::UInt8,
@@ -102,7 +100,16 @@ impl ConstantExpr {
102100
Value::Int64(_) => ConstantType::Int64,
103101
Value::Float(_) => ConstantType::Float64,
104102
_ => unimplemented!(),
105-
};
103+
}
104+
}
105+
}
106+
107+
#[derive(Clone, Debug)]
108+
pub struct ConstantExpr(pub Expr);
109+
110+
impl ConstantExpr {
111+
pub fn new(value: Value) -> Self {
112+
let typ = ConstantType::get_data_type_from_value(&value);
106113
Self::new_with_type(value, typ)
107114
}
108115

Diff for: optd-datafusion-repr/src/properties.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
pub mod column_ref;
22
pub mod schema;
3+
4+
const DEFAULT_NAME: &str = "unnamed";

Diff for: optd-datafusion-repr/src/properties/column_ref.rs

+15-2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@ use std::{ops::Deref, sync::Arc};
22

33
use optd_core::property::PropertyBuilder;
44

5-
use crate::plan_nodes::OptRelNodeTyp;
5+
use crate::plan_nodes::{EmptyRelationData, OptRelNodeTyp};
66

77
use super::schema::Catalog;
8+
use super::DEFAULT_NAME;
89

910
#[derive(Clone, Debug)]
1011
pub enum ColumnRef {
@@ -55,6 +56,19 @@ impl PropertyBuilder<OptRelNodeTyp> for ColumnRefPropertyBuilder {
5556
})
5657
.collect()
5758
}
59+
OptRelNodeTyp::EmptyRelation => {
60+
let data = data.unwrap().as_slice();
61+
let empty_relation_data: EmptyRelationData =
62+
bincode::deserialize(data.as_ref()).unwrap();
63+
let schema = empty_relation_data.schema;
64+
let column_cnt = schema.fields.len();
65+
(0..column_cnt)
66+
.map(|i| ColumnRef::BaseTableColumnRef {
67+
table: DEFAULT_NAME.to_string(),
68+
col_idx: i,
69+
})
70+
.collect()
71+
}
5872
OptRelNodeTyp::ColumnRef => {
5973
let col_ref_idx = data.unwrap().as_u64();
6074
// this is always safe since col_ref_idx was initially a usize in ColumnRefExpr::new()
@@ -114,7 +128,6 @@ impl PropertyBuilder<OptRelNodeTyp> for ColumnRefPropertyBuilder {
114128
| OptRelNodeTyp::BinOp(_)
115129
| OptRelNodeTyp::DataType(_)
116130
| OptRelNodeTyp::Between
117-
| OptRelNodeTyp::EmptyRelation
118131
| OptRelNodeTyp::Like
119132
| OptRelNodeTyp::InList => {
120133
vec![ColumnRef::Derived]

Diff for: optd-datafusion-repr/src/properties/schema.rs

+26-14
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1+
use serde::{Deserialize, Serialize};
12
use std::sync::Arc;
23

34
use optd_core::property::PropertyBuilder;
45

5-
use crate::plan_nodes::{ConstantType, OptRelNodeTyp};
6+
use super::DEFAULT_NAME;
7+
use crate::plan_nodes::{ConstantType, EmptyRelationData, OptRelNodeTyp};
68

7-
#[derive(Clone, Debug)]
9+
#[derive(Clone, Debug, Serialize, Deserialize)]
810
pub struct Field {
911
pub name: String,
1012
pub typ: ConstantType,
1113
pub nullable: bool,
1214
}
13-
#[derive(Clone, Debug)]
15+
#[derive(Clone, Debug, Serialize, Deserialize)]
1416
pub struct Schema {
1517
pub fields: Vec<Field>,
1618
}
@@ -61,23 +63,33 @@ impl PropertyBuilder<OptRelNodeTyp> for SchemaPropertyBuilder {
6163
schema.fields.extend(schema2.fields);
6264
schema
6365
}
64-
OptRelNodeTyp::List => {
65-
// TODO: calculate real is_nullable for aggregations
66+
OptRelNodeTyp::EmptyRelation => {
67+
let data = data.unwrap().as_slice();
68+
let empty_relation_data: EmptyRelationData =
69+
bincode::deserialize(data.as_ref()).unwrap();
70+
empty_relation_data.schema
71+
}
72+
OptRelNodeTyp::ColumnRef => {
73+
let data_typ = ConstantType::get_data_type_from_value(&data.unwrap());
6674
Schema {
67-
fields: vec![
68-
Field {
69-
name: "unnamed".to_string(),
70-
typ: ConstantType::Any,
71-
nullable: true
72-
};
73-
children.len()
74-
],
75+
fields: vec![Field {
76+
name: DEFAULT_NAME.to_string(),
77+
typ: data_typ,
78+
nullable: true,
79+
}],
80+
}
81+
}
82+
OptRelNodeTyp::List => {
83+
let mut fields = vec![];
84+
for child in children {
85+
fields.extend(child.fields.clone());
7586
}
87+
Schema { fields }
7688
}
7789
OptRelNodeTyp::LogOp(_) => Schema {
7890
fields: vec![
7991
Field {
80-
name: "unnamed".to_string(),
92+
name: DEFAULT_NAME.to_string(),
8193
typ: ConstantType::Any,
8294
nullable: true
8395
};

0 commit comments

Comments
 (0)