Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 2694b9a

Browse files
authored
Merge pull request #52 from AveryQi115/empty_relation
feat: [LogicalOptimizer] add support for empty relation
2 parents 4f313ba + 12b64d1 commit 2694b9a

File tree

11 files changed

+231
-32
lines changed

11 files changed

+231
-32
lines changed

datafusion-optd-cli/src/main.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,10 @@ pub async fn main() -> Result<()> {
158158
};
159159

160160
let mut session_config = SessionConfig::from_env()?.with_information_schema(true);
161-
161+
162162
if !args.enable_logical {
163163
session_config.options_mut().optimizer.max_passes = 0;
164164
}
165-
166165

167166
if let Some(batch_size) = args.batch_size {
168167
session_config = session_config.with_batch_size(batch_size);

optd-datafusion-bridge/src/from_optd.rs

+41-5
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::{collections::HashMap, sync::Arc};
33
use anyhow::{bail, Context, Result};
44
use async_recursion::async_recursion;
55
use datafusion::{
6-
arrow::datatypes::{Schema, SchemaRef},
6+
arrow::datatypes::{DataType, Field, Schema, SchemaRef},
77
datasource::source_as_provider,
88
logical_expr::Operator,
99
physical_expr,
@@ -12,7 +12,8 @@ use datafusion::{
1212
aggregates::AggregateMode,
1313
expressions::create_aggregate_expr,
1414
joins::{
15-
utils::{ColumnIndex, JoinFilter}, CrossJoinExec, PartitionMode
15+
utils::{ColumnIndex, JoinFilter},
16+
CrossJoinExec, PartitionMode,
1617
},
1718
projection::ProjectionExec,
1819
AggregateExpr, ExecutionPlan, PhysicalExpr,
@@ -23,14 +24,36 @@ use optd_datafusion_repr::{
2324
plan_nodes::{
2425
BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ConstantType, Expr, FuncExpr, FuncType,
2526
JoinType, LogOpExpr, LogOpType, OptRelNode, OptRelNodeRef, OptRelNodeTyp, PhysicalAgg,
26-
PhysicalFilter, PhysicalHashJoin, PhysicalNestedLoopJoin, PhysicalProjection, PhysicalScan,
27-
PhysicalSort, PlanNode, SortOrderExpr, SortOrderType,
27+
PhysicalEmptyRelation, PhysicalFilter, PhysicalHashJoin, PhysicalNestedLoopJoin,
28+
PhysicalProjection, PhysicalScan, PhysicalSort, PlanNode, SortOrderExpr, SortOrderType,
2829
},
30+
properties::schema::Schema as OptdSchema,
2931
PhysicalCollector,
3032
};
3133

3234
use crate::{physical_collector::CollectorExec, OptdPlanContext};
3335

36+
// TODO: current DataType and ConstantType are not 1 to 1 mapping
37+
// optd schema stores constantType from data type in catalog.get
38+
// for decimal128, the precision is lost
39+
fn from_optd_schema(optd_schema: &OptdSchema) -> Schema {
40+
let match_type = |typ: &ConstantType| match typ {
41+
ConstantType::Any => unimplemented!(),
42+
ConstantType::Bool => DataType::Boolean,
43+
ConstantType::Int => DataType::Int64,
44+
ConstantType::Date => DataType::Date32,
45+
ConstantType::Decimal => DataType::Float64,
46+
ConstantType::Utf8String => DataType::Utf8,
47+
};
48+
let fields: Vec<_> = optd_schema
49+
.0
50+
.iter()
51+
.enumerate()
52+
.map(|(i, typ)| Field::new(&format!("c{}", i), match_type(typ), false))
53+
.collect();
54+
Schema::new(fields)
55+
}
56+
3457
impl OptdPlanContext<'_> {
3558
#[async_recursion]
3659
async fn from_optd_table_scan(
@@ -317,7 +340,8 @@ impl OptdPlanContext<'_> {
317340
let physical_expr = self.from_optd_expr(node.cond(), &Arc::new(filter_schema.clone()))?;
318341

319342
if let JoinType::Cross = node.join_type() {
320-
return Ok(Arc::new(CrossJoinExec::new(left_exec, right_exec)) as Arc<dyn ExecutionPlan + 'static>);
343+
return Ok(Arc::new(CrossJoinExec::new(left_exec, right_exec))
344+
as Arc<dyn ExecutionPlan + 'static>);
321345
}
322346

323347
let join_type = match node.join_type() {
@@ -398,6 +422,10 @@ impl OptdPlanContext<'_> {
398422

399423
#[async_recursion]
400424
async fn from_optd_plan_node(&mut self, node: PlanNode) -> Result<Arc<dyn ExecutionPlan>> {
425+
let mut schema = OptdSchema(vec![]);
426+
if node.typ() == OptRelNodeTyp::PhysicalEmptyRelation {
427+
schema = node.schema(self.optimizer.unwrap().optd_optimizer());
428+
}
401429
let rel_node = node.into_rel_node();
402430
let rel_node_dbg = rel_node.clone();
403431
let result = match &rel_node.typ {
@@ -440,6 +468,14 @@ impl OptdPlanContext<'_> {
440468
self.optimizer.as_ref().unwrap().runtime_statistics.clone(),
441469
)) as Arc<dyn ExecutionPlan>)
442470
}
471+
OptRelNodeTyp::PhysicalEmptyRelation => {
472+
let physical_node = PhysicalEmptyRelation::from_rel_node(rel_node).unwrap();
473+
let datafusion_schema: Schema = from_optd_schema(&schema);
474+
Ok(Arc::new(datafusion::physical_plan::empty::EmptyExec::new(
475+
physical_node.produce_one_row(),
476+
Arc::new(datafusion_schema),
477+
)) as Arc<dyn ExecutionPlan>)
478+
}
443479
typ => unimplemented!("{}", typ),
444480
};
445481
result.with_context(|| format!("when processing {}", rel_node_dbg))

optd-datafusion-bridge/src/into_optd.rs

+24-5
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ use datafusion::{
77
use optd_core::rel_node::RelNode;
88
use optd_datafusion_repr::plan_nodes::{
99
BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, Expr, ExprList, FuncExpr, FuncType,
10-
JoinType, LogOpExpr, LogOpType, LogicalAgg, LogicalFilter, LogicalJoin, LogicalProjection,
11-
LogicalScan, LogicalSort, OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode, SortOrderExpr,
12-
SortOrderType,
10+
JoinType, LogOpExpr, LogOpType, LogicalAgg, LogicalEmptyRelation, LogicalFilter, LogicalJoin,
11+
LogicalProjection, LogicalScan, LogicalSort, OptRelNode, OptRelNodeRef, OptRelNodeTyp,
12+
PlanNode, SortOrderExpr, SortOrderType,
1313
};
1414

1515
use crate::OptdPlanContext;
@@ -69,6 +69,10 @@ impl OptdPlanContext<'_> {
6969
let x = x.as_ref().unwrap();
7070
Ok(ConstantExpr::string(x).into_expr())
7171
}
72+
ScalarValue::Int64(x) => {
73+
let x = x.as_ref().unwrap();
74+
Ok(ConstantExpr::int(*x as i64).into_expr())
75+
}
7276
ScalarValue::Date32(x) => {
7377
let x = x.as_ref().unwrap();
7478
Ok(ConstantExpr::date(*x as i64).into_expr())
@@ -113,7 +117,7 @@ impl OptdPlanContext<'_> {
113117
expr,
114118
)
115119
.into_expr())
116-
}
120+
}
117121
_ => bail!("Unsupported expression: {:?}", expr),
118122
}
119123
}
@@ -233,7 +237,19 @@ impl OptdPlanContext<'_> {
233237
fn into_optd_cross_join(&mut self, node: &logical_plan::CrossJoin) -> Result<LogicalJoin> {
234238
let left = self.into_optd_plan_node(node.left.as_ref())?;
235239
let right = self.into_optd_plan_node(node.right.as_ref())?;
236-
Ok(LogicalJoin::new(left, right, ConstantExpr::bool(true).into_expr(), JoinType::Cross))
240+
Ok(LogicalJoin::new(
241+
left,
242+
right,
243+
ConstantExpr::bool(true).into_expr(),
244+
JoinType::Cross,
245+
))
246+
}
247+
248+
fn into_optd_empty_relation(
249+
&mut self,
250+
node: &logical_plan::EmptyRelation,
251+
) -> Result<LogicalEmptyRelation> {
252+
Ok(LogicalEmptyRelation::new(node.produce_one_row))
237253
}
238254

239255
fn into_optd_plan_node(&mut self, node: &LogicalPlan) -> Result<PlanNode> {
@@ -246,6 +262,9 @@ impl OptdPlanContext<'_> {
246262
LogicalPlan::Join(node) => self.into_optd_join(node)?.into_plan_node(),
247263
LogicalPlan::Filter(node) => self.into_optd_filter(node)?.into_plan_node(),
248264
LogicalPlan::CrossJoin(node) => self.into_optd_cross_join(node)?.into_plan_node(),
265+
LogicalPlan::EmptyRelation(node) => {
266+
self.into_optd_empty_relation(node)?.into_plan_node()
267+
}
249268
_ => bail!(
250269
"unsupported plan node: {}",
251270
format!("{:?}", node).split('\n').next().unwrap()

optd-datafusion-repr/src/cost/base_cost.rs

+1
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ impl CostModel<OptRelNodeTyp> for OptCostModel {
106106
.unwrap_or(1) as f64;
107107
Self::cost(row_cnt, 0.0, row_cnt)
108108
}
109+
OptRelNodeTyp::PhysicalEmptyRelation => Self::cost(0.5, 0.01, 0.0),
109110
OptRelNodeTyp::PhysicalFilter => {
110111
let (row_cnt, _, _) = Self::cost_tuple(&children[0]);
111112
let (_, compute_cost, _) = Self::cost_tuple(&children[1]);

optd-datafusion-repr/src/plan_nodes.rs

+14-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
mod agg;
44
mod apply;
5+
mod empty_relation;
56
mod expr;
67
mod filter;
78
mod join;
@@ -19,6 +20,7 @@ use optd_core::{
1920

2021
pub use agg::{LogicalAgg, PhysicalAgg};
2122
pub use apply::{ApplyType, LogicalApply};
23+
pub use empty_relation::{LogicalEmptyRelation, PhysicalEmptyRelation};
2224
pub use expr::{
2325
BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ConstantType, ExprList, FuncExpr, FuncType,
2426
LogOpExpr, LogOpType, SortOrderExpr, SortOrderType, UnOpExpr, UnOpType,
@@ -48,6 +50,7 @@ pub enum OptRelNodeTyp {
4850
Sort,
4951
Agg,
5052
Apply(ApplyType),
53+
EmptyRelation,
5154
// Physical plan nodes
5255
PhysicalProjection,
5356
PhysicalFilter,
@@ -56,6 +59,7 @@ pub enum OptRelNodeTyp {
5659
PhysicalAgg,
5760
PhysicalHashJoin(JoinType),
5861
PhysicalNestedLoopJoin(JoinType),
62+
PhysicalEmptyRelation,
5963
PhysicalCollector(GroupId), // only produced after optimization is done
6064
// Expressions
6165
Constant(ConstantType),
@@ -78,6 +82,7 @@ impl OptRelNodeTyp {
7882
| Self::Apply(_)
7983
| Self::Sort
8084
| Self::Agg
85+
| Self::EmptyRelation
8186
| Self::PhysicalProjection
8287
| Self::PhysicalFilter
8388
| Self::PhysicalNestedLoopJoin(_)
@@ -86,6 +91,7 @@ impl OptRelNodeTyp {
8691
| Self::PhysicalAgg
8792
| Self::PhysicalHashJoin(_)
8893
| Self::PhysicalCollector(_)
94+
| Self::PhysicalEmptyRelation
8995
)
9096
}
9197

@@ -120,6 +126,7 @@ impl RelNodeTyp for OptRelNodeTyp {
120126
| Self::Apply(_)
121127
| Self::Sort
122128
| Self::Agg
129+
| Self::EmptyRelation
123130
)
124131
}
125132

@@ -194,7 +201,7 @@ impl PlanNode {
194201
self.0.typ.clone()
195202
}
196203

197-
pub fn schema(&self, optimizer: CascadesOptimizer<OptRelNodeTyp>) -> Schema {
204+
pub fn schema(&self, optimizer: &CascadesOptimizer<OptRelNodeTyp>) -> Schema {
198205
let group_id = optimizer.resolve_group_id(self.0.clone());
199206
optimizer.get_property_by_group::<SchemaPropertyBuilder>(group_id, 0)
200207
}
@@ -300,6 +307,9 @@ pub fn explain(rel_node: OptRelNodeRef) -> Pretty<'static> {
300307
OptRelNodeTyp::Apply(_) => LogicalApply::from_rel_node(rel_node)
301308
.unwrap()
302309
.dispatch_explain(),
310+
OptRelNodeTyp::EmptyRelation => LogicalEmptyRelation::from_rel_node(rel_node)
311+
.unwrap()
312+
.dispatch_explain(),
303313
OptRelNodeTyp::PhysicalFilter => PhysicalFilter::from_rel_node(rel_node)
304314
.unwrap()
305315
.dispatch_explain(),
@@ -345,6 +355,9 @@ pub fn explain(rel_node: OptRelNodeRef) -> Pretty<'static> {
345355
OptRelNodeTyp::PhysicalCollector(group_id) => PhysicalCollector::from_rel_node(rel_node)
346356
.unwrap()
347357
.dispatch_explain(),
358+
OptRelNodeTyp::PhysicalEmptyRelation => PhysicalEmptyRelation::from_rel_node(rel_node)
359+
.unwrap()
360+
.dispatch_explain(),
348361
}
349362
}
350363

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
use pretty_xmlish::Pretty;
2+
3+
use optd_core::rel_node::{RelNode, Value};
4+
5+
use super::{replace_typ, OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode};
6+
7+
#[derive(Clone, Debug)]
8+
pub struct LogicalEmptyRelation(pub PlanNode);
9+
10+
impl OptRelNode for LogicalEmptyRelation {
11+
fn into_rel_node(self) -> OptRelNodeRef {
12+
self.0.into_rel_node()
13+
}
14+
15+
fn from_rel_node(rel_node: OptRelNodeRef) -> Option<Self> {
16+
if rel_node.typ != OptRelNodeTyp::EmptyRelation {
17+
return None;
18+
}
19+
PlanNode::from_rel_node(rel_node).map(Self)
20+
}
21+
22+
fn dispatch_explain(&self) -> Pretty<'static> {
23+
Pretty::childless_record(
24+
"LogicalEmptyRelation",
25+
vec![("produce_one_row", self.produce_one_row().to_string().into())],
26+
)
27+
}
28+
}
29+
30+
impl LogicalEmptyRelation {
31+
pub fn new(produce_one_row: bool) -> LogicalEmptyRelation {
32+
LogicalEmptyRelation(PlanNode(
33+
RelNode {
34+
typ: OptRelNodeTyp::EmptyRelation,
35+
children: vec![],
36+
data: Some(Value::Bool(produce_one_row)),
37+
}
38+
.into(),
39+
))
40+
}
41+
42+
pub fn produce_one_row(&self) -> bool {
43+
self.clone()
44+
.into_rel_node()
45+
.data
46+
.as_ref()
47+
.unwrap()
48+
.as_bool()
49+
}
50+
}
51+
52+
#[derive(Clone, Debug)]
53+
pub struct PhysicalEmptyRelation(pub PlanNode);
54+
55+
impl OptRelNode for PhysicalEmptyRelation {
56+
fn into_rel_node(self) -> OptRelNodeRef {
57+
replace_typ(self.0.into_rel_node(), OptRelNodeTyp::PhysicalEmptyRelation)
58+
}
59+
60+
fn from_rel_node(rel_node: OptRelNodeRef) -> Option<Self> {
61+
if rel_node.typ != OptRelNodeTyp::PhysicalEmptyRelation {
62+
return None;
63+
}
64+
PlanNode::from_rel_node(rel_node).map(Self)
65+
}
66+
67+
fn dispatch_explain(&self) -> Pretty<'static> {
68+
Pretty::childless_record(
69+
"PhysicalEmptyRelation",
70+
vec![("produce_one_row", self.produce_one_row().to_string().into())],
71+
)
72+
}
73+
}
74+
75+
impl PhysicalEmptyRelation {
76+
pub fn new(node: PlanNode) -> PhysicalEmptyRelation {
77+
Self(node)
78+
}
79+
80+
pub fn produce_one_row(&self) -> bool {
81+
self.clone()
82+
.into_rel_node()
83+
.data
84+
.as_ref()
85+
.unwrap()
86+
.as_bool()
87+
}
88+
}

optd-datafusion-repr/src/properties/schema.rs

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,11 @@
1-
use std::{
2-
collections::HashMap,
3-
sync::{Arc, Mutex},
4-
};
5-
61
use optd_core::property::PropertyBuilder;
72

83
use crate::plan_nodes::{ConstantType, OptRelNodeTyp};
94

105
#[derive(Clone, Debug)]
116
pub struct Schema(pub Vec<ConstantType>);
127

8+
// TODO: add names, nullable to schema
139
impl Schema {
1410
pub fn len(&self) -> usize {
1511
self.0.len()

optd-datafusion-repr/src/rules/physical.rs

+9
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ impl PhysicalConversionRule {
4040
Arc::new(PhysicalConversionRule::new(OptRelNodeTyp::Filter)),
4141
Arc::new(PhysicalConversionRule::new(OptRelNodeTyp::Sort)),
4242
Arc::new(PhysicalConversionRule::new(OptRelNodeTyp::Agg)),
43+
Arc::new(PhysicalConversionRule::new(OptRelNodeTyp::EmptyRelation)),
4344
]
4445
}
4546
}
@@ -117,6 +118,14 @@ impl<O: Optimizer<OptRelNodeTyp>> Rule<OptRelNodeTyp, O> for PhysicalConversionR
117118
};
118119
vec![node]
119120
}
121+
OptRelNodeTyp::EmptyRelation => {
122+
let node = RelNode {
123+
typ: OptRelNodeTyp::PhysicalEmptyRelation,
124+
children,
125+
data,
126+
};
127+
vec![node]
128+
}
120129
_ => vec![],
121130
}
122131
}

0 commit comments

Comments
 (0)