Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 4c8f4b4

Browse files
authored
fix(cost): use 1000 as default table row cnt (#208)
Helps us generate better plans across all regression tests, otherwise row_cnt=1 does not make much sense. This also fixes NLJ within subquery unnesting test case. Call simplify expr when applying join pushdown rules, which in the future should be done automatically in the core. Signed-off-by: Alex Chi <[email protected]>
1 parent bdf638a commit 4c8f4b4

File tree

13 files changed

+256
-324
lines changed

13 files changed

+256
-324
lines changed

optd-datafusion-repr/src/cost/adaptive_cost.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ use optd_core::{
1010
rel_node::{RelNode, Value},
1111
};
1212

13+
use super::base_cost::DEFAULT_TABLE_ROW_CNT;
14+
1315
pub type RuntimeAdaptionStorage = Arc<Mutex<RuntimeAdaptionStorageInner>>;
1416

1517
#[derive(Default, Debug)]
@@ -52,10 +54,10 @@ impl CostModel<OptRelNodeTyp> for AdaptiveCostModel {
5254
let runtime_row_cnt = (*runtime_row_cnt).max(1) as f64;
5355
return OptCostModel::cost(runtime_row_cnt, 0.0, runtime_row_cnt);
5456
} else {
55-
return OptCostModel::cost(1.0, 0.0, 1.0);
57+
return OptCostModel::cost(DEFAULT_TABLE_ROW_CNT as f64, 0.0, 1.0);
5658
}
5759
} else {
58-
return OptCostModel::cost(1.0, 0.0, 1.0);
60+
return OptCostModel::cost(DEFAULT_TABLE_ROW_CNT as f64, 0.0, 1.0);
5961
}
6062
}
6163
let (mut row_cnt, compute_cost, io_cost) = OptCostModel::cost_tuple(

optd-datafusion-repr/src/cost/base_cost.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ pub const ROW_COUNT: usize = 1;
3131
pub const COMPUTE_COST: usize = 2;
3232
pub const IO_COST: usize = 3;
3333

34+
pub(crate) const DEFAULT_TABLE_ROW_CNT: usize = 1000;
35+
3436
impl OptCostModel {
3537
pub fn row_cnt(Cost(cost): &Cost) -> f64 {
3638
cost[ROW_COUNT]
@@ -104,7 +106,7 @@ impl CostModel<OptRelNodeTyp> for OptCostModel {
104106
.table_stat
105107
.get(table_name.as_ref())
106108
.copied()
107-
.unwrap_or(1) as f64;
109+
.unwrap_or(DEFAULT_TABLE_ROW_CNT) as f64;
108110
Self::cost(row_cnt, 0.0, row_cnt)
109111
}
110112
OptRelNodeTyp::PhysicalLimit => {

optd-datafusion-repr/src/rules/filter.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ define_rule!(
2222
// - Replaces the Or operator with True if any operand is True
2323
// - Replaces the And operator with False if any operand is False
2424
// - Removes Duplicates
25-
fn simplify_log_expr(log_expr: OptRelNodeRef, changed: &mut bool) -> OptRelNodeRef {
25+
pub(crate) fn simplify_log_expr(log_expr: OptRelNodeRef, changed: &mut bool) -> OptRelNodeRef {
2626
let log_expr = LogOpExpr::from_rel_node(log_expr).unwrap();
2727
let op = log_expr.op_type();
2828
// we need a new children vec to output deterministic order

optd-datafusion-repr/src/rules/filter_join.rs

Lines changed: 0 additions & 79 deletions
This file was deleted.

optd-datafusion-repr/src/rules/filter_pushdown.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use crate::plan_nodes::{
2121
};
2222
use crate::properties::schema::SchemaPropertyBuilder;
2323

24+
use super::filter::simplify_log_expr;
2425
use super::macros::define_rule;
2526

2627
/// Emits a LogOpExpr AND if the list has more than one element
@@ -36,7 +37,11 @@ fn and_expr_list_to_expr(exprs: Vec<Expr>) -> Expr {
3637
fn merge_conds(first: Expr, second: Expr) -> Expr {
3738
let new_expr_list = ExprList::new(vec![first, second]);
3839
// Flatten nested logical expressions if possible
39-
LogOpExpr::new_flattened_nested_logical(LogOpType::And, new_expr_list).into_expr()
40+
let flattened =
41+
LogOpExpr::new_flattened_nested_logical(LogOpType::And, new_expr_list).into_expr();
42+
let mut changed = false;
43+
// TODO: such simplifications should be invoked from optd-core, instead of ad-hoc
44+
Expr::from_rel_node(simplify_log_expr(flattened.into_rel_node(), &mut changed)).unwrap()
4045
}
4146

4247
#[derive(Debug, Clone, Copy)]

optd-sqlplannertest/tests/basic/filter.planner.sql

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,17 +89,17 @@ LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
8989
└── LogicalJoin { join_type: Cross, cond: true }
9090
├── LogicalScan { table: t1 }
9191
└── LogicalScan { table: t2 }
92-
PhysicalNestedLoopJoin
93-
├── join_type: Inner
92+
PhysicalFilter
9493
├── cond:Or
9594
│ ├── Eq
9695
│ │ ├── #0
9796
│ │ └── #2
9897
│ └── Eq
9998
│ ├── #0
10099
│ └── #3
101-
├── PhysicalScan { table: t1 }
102-
└── PhysicalScan { table: t2 }
100+
└── PhysicalNestedLoopJoin { join_type: Cross, cond: true }
101+
├── PhysicalScan { table: t1 }
102+
└── PhysicalScan { table: t2 }
103103
0 0 0 200
104104
1 1 1 201
105105
2 2 2 202

optd-sqlplannertest/tests/basic/verbose.planner.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ PhysicalScan { table: t1 }
1717
select * from t1;
1818

1919
/*
20-
PhysicalScan { table: t1, cost: weighted=1.00,row_cnt=1.00,compute=0.00,io=1.00 }
20+
PhysicalScan { table: t1, cost: weighted=1.00,row_cnt=1000.00,compute=0.00,io=1.00 }
2121
*/
2222

2323
-- Test verbose explain with aggregation
@@ -28,7 +28,7 @@ PhysicalAgg
2828
├── aggrs:Agg(Count)
2929
│ └── [ 1(u8) ]
3030
├── groups: []
31-
├── cost: weighted=21.12,row_cnt=1.00,compute=20.12,io=1.00
32-
└── PhysicalScan { table: t1, cost: weighted=1.00,row_cnt=1.00,compute=0.00,io=1.00 }
31+
├── cost: weighted=10071.06,row_cnt=1000.00,compute=10070.06,io=1.00
32+
└── PhysicalScan { table: t1, cost: weighted=1.00,row_cnt=1000.00,compute=0.00,io=1.00 }
3333
*/
3434

optd-sqlplannertest/tests/pushdowns/fliter_transpose.planner.sql

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@ LogicalProjection { exprs: [ #0, #1, #3 ] }
4747
└── LogicalJoin { join_type: Cross, cond: true }
4848
├── LogicalScan { table: t1 }
4949
└── LogicalScan { table: t2 }
50-
PhysicalFilter
51-
── cond:Eq
52-
├── #0
53-
── #2
54-
└── PhysicalProjection { exprs: [ #0, #1, #3 ] }
50+
PhysicalProjection { exprs: [ #0, #1, #3 ] }
51+
── PhysicalFilter
52+
├── cond:Eq
53+
── #0
54+
│ └── #3
5555
└── PhysicalNestedLoopJoin { join_type: Cross, cond: true }
5656
├── PhysicalScan { table: t1 }
5757
└── PhysicalScan { table: t2 }
@@ -72,11 +72,11 @@ LogicalProjection { exprs: [ #0, #1, #2 ] }
7272
└── LogicalJoin { join_type: Cross, cond: true }
7373
├── LogicalScan { table: t1 }
7474
└── LogicalScan { table: t2 }
75-
PhysicalFilter
76-
── cond:Eq
77-
├── #0
78-
── #2
79-
└── PhysicalProjection { exprs: [ #0, #1, #3 ] }
75+
PhysicalProjection { exprs: [ #0, #1, #3 ] }
76+
── PhysicalFilter
77+
├── cond:Eq
78+
── #0
79+
│ └── #3
8080
└── PhysicalNestedLoopJoin { join_type: Cross, cond: true }
8181
├── PhysicalScan { table: t1 }
8282
└── PhysicalScan { table: t2 }

optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -53,24 +53,21 @@ LogicalProjection { exprs: [ #0, #1 ] }
5353
├── LogicalAgg { exprs: [], groups: [ #0 ] }
5454
│ └── LogicalScan { table: t1 }
5555
└── LogicalScan { table: t2 }
56-
PhysicalProjection { exprs: [ #0, #1 ] }
56+
PhysicalProjection { exprs: [ #2, #3 ] }
5757
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
58-
├── PhysicalScan { table: t1 }
59-
└── PhysicalFilter
60-
├── cond:Gt
61-
│ ├── #1
62-
│ └── 100(i64)
63-
└── PhysicalAgg
64-
├── aggrs:Agg(Sum)
65-
│ └── [ Cast { cast_to: Int64, expr: #2 } ]
66-
├── groups: [ #1 ]
67-
└── PhysicalFilter
68-
├── cond:Eq
69-
│ ├── #1
70-
│ └── #0
71-
└── PhysicalNestedLoopJoin { join_type: Inner, cond: true }
72-
├── PhysicalAgg { aggrs: [], groups: [ #0 ] }
73-
│ └── PhysicalScan { table: t1 }
74-
└── PhysicalScan { table: t2 }
58+
├── PhysicalAgg
59+
│ ├── aggrs:Agg(Sum)
60+
│ │ └── [ Cast { cast_to: Int64, expr: #2 } ]
61+
│ ├── groups: [ #1 ]
62+
│ └── PhysicalProjection { exprs: [ #2, #0, #1 ] }
63+
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
64+
│ ├── PhysicalFilter
65+
│ │ ├── cond:Gt
66+
│ │ │ ├── #0
67+
│ │ │ └── 100(i64)
68+
│ │ └── PhysicalScan { table: t2 }
69+
│ └── PhysicalAgg { aggrs: [], groups: [ #0 ] }
70+
│ └── PhysicalScan { table: t1 }
71+
└── PhysicalScan { table: t1 }
7572
*/
7673

optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -362,21 +362,21 @@ PhysicalLimit { skip: 0(u64), fetch: 100(u64) }
362362
│ │ └── #1
363363
│ └── SortOrder { order: Asc }
364364
│ └── #3
365-
└── PhysicalProjection { exprs: [ #5, #1, #22, #7, #9, #2, #4, #6 ] }
366-
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #7, #19 ], right_keys: [ #1, #0 ] }
365+
└── PhysicalProjection { exprs: [ #19, #15, #22, #0, #2, #16, #18, #20 ] }
366+
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #12 ], right_keys: [ #1, #0 ] }
367367
├── PhysicalHashJoin { join_type: Inner, left_keys: [ #23 ], right_keys: [ #0 ] }
368-
│ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] }
369-
│ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #10 ] }
370-
│ │ │ ├── PhysicalScan { table: supplier }
371-
│ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
372-
│ │ │ ├── PhysicalFilter
373-
│ │ │ │ ├── cond:And
374-
│ │ │ │ │ ├── Eq
375-
│ │ │ │ │ │ ── #5
376-
│ │ │ │ │ │ └── 4(i32)
377-
│ │ │ │ │ └── Like { expr: #4, pattern: "%TIN", negated: false, case_insensitive: false }
378-
│ │ │ │ └── PhysicalScan { table: part }
379-
│ │ │ └── PhysicalScan { table: partsupp }
368+
│ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #17 ], right_keys: [ #0 ] }
369+
│ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #10 ], right_keys: [ #0 ] }
370+
│ │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
371+
│ │ │ │ ├── PhysicalFilter
372+
│ │ │ ├── cond:And
373+
│ │ │ │ ├── Eq
374+
│ │ │ │ │ ├── #5
375+
│ │ │ │ │ │ ── 4(i32)
376+
│ │ │ │ │ │ └── Like { expr: #4, pattern: "%TIN", negated: false, case_insensitive: false }
377+
│ │ │ │ │ └── PhysicalScan { table: part }
378+
│ │ │ │ └── PhysicalScan { table: partsupp }
379+
│ │ │ └── PhysicalScan { table: supplier }
380380
│ │ └── PhysicalProjection { exprs: [ #0, #1, #2 ] }
381381
│ │ └── PhysicalScan { table: nation }
382382
│ └── PhysicalProjection { exprs: [ #0 ] }
@@ -609,28 +609,29 @@ PhysicalSort
609609
│ ├── Cast { cast_to: Decimal128(20, 0), expr: 1(i64) }
610610
│ └── #23
611611
├── groups: [ #41 ]
612-
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #19, #3 ], right_keys: [ #0, #3 ] }
613-
├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #1 ] }
614-
│ ├── PhysicalScan { table: customer }
615-
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
616-
│ ├── PhysicalFilter
617-
│ │ ├── cond:And
618-
│ │ │ ├── Geq
619-
│ │ │ │ ├── #4
620-
│ │ │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" }
621-
│ │ │ └── Lt
622-
│ │ │ ├── #4
623-
│ │ │ └── Cast { cast_to: Date32, expr: "2024-01-01" }
624-
│ │ └── PhysicalScan { table: orders }
625-
│ └── PhysicalScan { table: lineitem }
626-
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #9 ], right_keys: [ #0 ] }
627-
├── PhysicalHashJoin { join_type: Inner, left_keys: [ #3 ], right_keys: [ #0 ] }
628-
│ ├── PhysicalScan { table: supplier }
629-
│ └── PhysicalScan { table: nation }
630-
└── PhysicalFilter
631-
├── cond:Eq
632-
│ ├── #1
633-
│ └── "Asia"
634-
└── PhysicalScan { table: region }
612+
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #42 ], right_keys: [ #0 ] }
613+
├── PhysicalHashJoin { join_type: Inner, left_keys: [ #36 ], right_keys: [ #0 ] }
614+
│ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #19, #3 ], right_keys: [ #0, #3 ] }
615+
│ │ ├── PhysicalProjection { exprs: [ #25, #26, #27, #28, #29, #30, #31, #32, #0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21, #22, #23, #24 ] }
616+
│ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] }
617+
│ │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
618+
│ │ │ │ ├── PhysicalFilter
619+
│ │ │ │ │ ├── cond:And
620+
│ │ │ │ │ │ ├── Geq
621+
│ │ │ │ │ │ │ ├── #4
622+
│ │ │ │ │ │ │ └── Cast { cast_to: Date32, expr: "2023-01-01" }
623+
│ │ │ │ │ │ └── Lt
624+
│ │ │ │ │ │ ├── #4
625+
│ │ │ │ │ │ └── Cast { cast_to: Date32, expr: "2024-01-01" }
626+
│ │ │ │ │ └── PhysicalScan { table: orders }
627+
│ │ │ │ └── PhysicalScan { table: lineitem }
628+
│ │ │ └── PhysicalScan { table: customer }
629+
│ │ └── PhysicalScan { table: supplier }
630+
│ └── PhysicalScan { table: nation }
631+
└── PhysicalFilter
632+
├── cond:Eq
633+
│ ├── #1
634+
│ └── "Asia"
635+
└── PhysicalScan { table: region }
635636
*/
636637

0 commit comments

Comments
 (0)