Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit fc187cf

Browse files
committed
refactor(df-repr): adjust filter cost to prefer hash join (#265)
Signed-off-by: Alex Chi Z <[email protected]>
1 parent 583adab commit fc187cf

File tree

10 files changed

+189
-202
lines changed

10 files changed

+189
-202
lines changed

optd-core/src/cascades/memo.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,8 @@ impl<T: NodeType> NaiveMemo<T> {
357357
}
358358

359359
fn verify_integrity(&self) {
360-
if false {
360+
const ENABLE_INTEGRITY_CHECK: bool = false;
361+
if ENABLE_INTEGRITY_CHECK {
361362
let num_of_exprs = self.expr_id_to_expr_node.len();
362363
assert_eq!(num_of_exprs, self.expr_node_to_expr_id.len());
363364
assert_eq!(num_of_exprs, self.expr_id_to_group_id.len());

optd-datafusion-repr/src/cost/base_cost.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -99,13 +99,12 @@ impl CostModel<DfNodeType, NaiveMemo<DfNodeType>> for DfCostModel {
9999
}
100100
DfNodeType::PhysicalLimit => {
101101
let row_cnt = Self::row_cnt(children[0]);
102-
let selectivity = 0.001;
103-
Self::stat((row_cnt * selectivity).max(1.0))
102+
Self::stat(row_cnt.max(1.0))
104103
}
105104
DfNodeType::PhysicalEmptyRelation => Self::stat(0.01),
106105
DfNodeType::PhysicalFilter => {
107106
let row_cnt = Self::row_cnt(children[0]);
108-
let selectivity = 0.001;
107+
let selectivity = 0.01;
109108
Self::stat((row_cnt * selectivity).max(1.0))
110109
}
111110
DfNodeType::PhysicalNestedLoopJoin(_) => {

optd-datafusion-repr/src/memo_ext.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ fn enumerate_join_order_expr_inner<M: Memo<DfNodeType> + ?Sized>(
101101
.take(MAX_JOIN_ORDER_OUTPUT)
102102
.map(|x| (*x).clone())
103103
.collect_vec()
104-
.into()
105104
}
106105
_ => Vec::new(),
107106
}
@@ -139,7 +138,7 @@ impl<M: Memo<DfNodeType>> MemoExt for M {
139138
let mut visited = HashMap::new();
140139
enumerate_join_order_group_inner(self, entry, &mut visited, &mut false)
141140
.iter()
142-
.map(|x| x.clone())
141+
.cloned()
143142
.collect()
144143
}
145144
}

optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql

+9-9
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,14 @@ LogicalProjection { exprs: [ #0, #1 ] }
6262
├── LogicalAgg { exprs: [], groups: [ #0 ] }
6363
│ └── LogicalScan { table: t1 }
6464
└── LogicalScan { table: t2 }
65-
PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4033008,io=4000}, stat: {row_cnt=1} }
66-
└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=4033005,io=4000}, stat: {row_cnt=1} }
65+
PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4033080,io=4000}, stat: {row_cnt=10} }
66+
└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=4033050,io=4000}, stat: {row_cnt=10} }
6767
└── PhysicalFilter
6868
├── cond:Gt
6969
│ ├── #4
7070
│ └── 100(i64)
7171
├── cost: {compute=4033000,io=4000}
72-
├── stat: {row_cnt=1}
72+
├── stat: {row_cnt=10}
7373
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4030000,io=4000}, stat: {row_cnt=1000} }
7474
├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
7575
└── PhysicalNestedLoopJoin
@@ -168,14 +168,14 @@ LogicalProjection { exprs: [ #0, #1 ] }
168168
├── LogicalAgg { exprs: [], groups: [ #0 ] }
169169
│ └── LogicalScan { table: t1 }
170170
└── LogicalScan { table: t2 }
171-
PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=44228008,io=5000}, stat: {row_cnt=1} }
172-
└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=44228005,io=5000}, stat: {row_cnt=1} }
171+
PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=44228080,io=5000}, stat: {row_cnt=10} }
172+
└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=44228050,io=5000}, stat: {row_cnt=10} }
173173
└── PhysicalFilter
174174
├── cond:Gt
175175
│ ├── #4
176176
│ └── 100(i64)
177177
├── cost: {compute=44228000,io=5000}
178-
├── stat: {row_cnt=1}
178+
├── stat: {row_cnt=10}
179179
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=44225000,io=5000}, stat: {row_cnt=1000} }
180180
├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
181181
└── PhysicalNestedLoopJoin
@@ -359,14 +359,14 @@ LogicalProjection { exprs: [ #0, #1 ] }
359359
└── LogicalJoin { join_type: Inner, cond: true }
360360
├── LogicalScan { table: t2 }
361361
└── LogicalScan { table: t3 }
362-
PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4036008,io=5000}, stat: {row_cnt=1} }
363-
└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=4036005,io=5000}, stat: {row_cnt=1} }
362+
PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=4036080,io=5000}, stat: {row_cnt=10} }
363+
└── PhysicalProjection { exprs: [ #0, #1, #2, #4 ], cost: {compute=4036050,io=5000}, stat: {row_cnt=10} }
364364
└── PhysicalFilter
365365
├── cond:Gt
366366
│ ├── #4
367367
│ └── 100(i64)
368368
├── cost: {compute=4036000,io=5000}
369-
├── stat: {row_cnt=1}
369+
├── stat: {row_cnt=10}
370370
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4033000,io=5000}, stat: {row_cnt=1000} }
371371
├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
372372
└── PhysicalNestedLoopJoin

optd-sqlplannertest/tests/tpch/q11.planner.sql

+40-40
Original file line numberDiff line numberDiff line change
@@ -92,49 +92,49 @@ PhysicalSort
9292
├── exprs:SortOrder { order: Desc }
9393
│ └── #1
9494
└── PhysicalProjection { exprs: [ #0, #1 ] }
95-
└── PhysicalNestedLoopJoin
96-
├── join_type: Inner
95+
└── PhysicalFilter
9796
├── cond:Gt
9897
│ ├── Cast { cast_to: Decimal128(38, 15), child: #1 }
9998
│ └── #2
100-
├── PhysicalAgg
101-
│ ├── aggrs:Agg(Sum)
102-
│ │ └── Mul
103-
│ │ ├── #3
104-
│ │ └── Cast { cast_to: Decimal128(10, 0), child: #2 }
105-
│ ├── groups: [ #0 ]
106-
│ └── PhysicalProjection { exprs: [ #11, #12, #13, #14, #15, #4, #5, #6, #7, #8, #9, #10, #0, #1, #2, #3 ] }
107-
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] }
108-
│ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] }
109-
│ │ ├── PhysicalFilter
110-
│ │ │ ├── cond:Eq
111-
│ │ │ │ ├── #1
112-
│ │ │ │ └── "CHINA"
113-
│ │ │ └── PhysicalScan { table: nation }
114-
│ │ └── PhysicalScan { table: supplier }
115-
│ └── PhysicalScan { table: partsupp }
116-
└── PhysicalProjection
117-
├── exprs:Cast
118-
│ ├── cast_to: Decimal128(38, 15)
119-
│ ├── child:Mul
120-
│ │ ├── Cast { cast_to: Float64, child: #0 }
121-
│ │ └── 0.0001(float)
99+
└── PhysicalNestedLoopJoin { join_type: Inner, cond: true }
100+
├── PhysicalAgg
101+
│ ├── aggrs:Agg(Sum)
102+
│ │ └── Mul
103+
│ │ ├── #3
104+
│ │ └── Cast { cast_to: Decimal128(10, 0), child: #2 }
105+
│ ├── groups: [ #0 ]
106+
│ └── PhysicalProjection { exprs: [ #11, #12, #13, #14, #15, #4, #5, #6, #7, #8, #9, #10, #0, #1, #2, #3 ] }
107+
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] }
108+
│ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] }
109+
│ │ ├── PhysicalFilter
110+
│ │ │ ├── cond:Eq
111+
│ │ │ │ ├── #1
112+
│ │ │ │ └── "CHINA"
113+
│ │ │ └── PhysicalScan { table: nation }
114+
│ │ └── PhysicalScan { table: supplier }
115+
│ └── PhysicalScan { table: partsupp }
116+
└── PhysicalProjection
117+
├── exprs:Cast
118+
│ ├── cast_to: Decimal128(38, 15)
119+
│ ├── child:Mul
120+
│ │ ├── Cast { cast_to: Float64, child: #0 }
121+
│ │ └── 0.0001(float)
122122
123-
└── PhysicalAgg
124-
├── aggrs:Agg(Sum)
125-
│ └── Mul
126-
│ ├── #3
127-
│ └── Cast { cast_to: Decimal128(10, 0), child: #2 }
128-
├── groups: []
129-
└── PhysicalProjection { exprs: [ #11, #12, #13, #14, #15, #4, #5, #6, #7, #8, #9, #10, #0, #1, #2, #3 ] }
130-
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] }
131-
├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] }
132-
│ ├── PhysicalFilter
133-
│ │ ├── cond:Eq
134-
│ │ │ ├── #1
135-
│ │ │ └── "CHINA"
136-
│ │ └── PhysicalScan { table: nation }
137-
│ └── PhysicalScan { table: supplier }
138-
└── PhysicalScan { table: partsupp }
123+
└── PhysicalAgg
124+
├── aggrs:Agg(Sum)
125+
│ └── Mul
126+
│ ├── #3
127+
│ └── Cast { cast_to: Decimal128(10, 0), child: #2 }
128+
├── groups: []
129+
└── PhysicalProjection { exprs: [ #11, #12, #13, #14, #15, #4, #5, #6, #7, #8, #9, #10, #0, #1, #2, #3 ] }
130+
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #1 ] }
131+
├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #3 ] }
132+
│ ├── PhysicalFilter
133+
│ │ ├── cond:Eq
134+
│ │ │ ├── #1
135+
│ │ │ └── "CHINA"
136+
│ │ └── PhysicalScan { table: nation }
137+
│ └── PhysicalScan { table: supplier }
138+
└── PhysicalScan { table: partsupp }
139139
*/
140140

optd-sqlplannertest/tests/tpch/q2.planner.sql

+12-11
Original file line numberDiff line numberDiff line change
@@ -243,17 +243,18 @@ PhysicalLimit { skip: 0(i64), fetch: 100(i64) }
243243
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #19, #0 ], right_keys: [ #1, #0 ] }
244244
├── PhysicalHashJoin { join_type: Inner, left_keys: [ #23 ], right_keys: [ #0 ] }
245245
│ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #12 ], right_keys: [ #0 ] }
246-
│ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0, #9 ], right_keys: [ #0, #1 ] }
247-
│ │ │ ├── PhysicalNestedLoopJoin { join_type: Inner, cond: true }
248-
│ │ │ │ ├── PhysicalFilter
249-
│ │ │ │ │ ├── cond:And
250-
│ │ │ │ │ │ ├── Eq
251-
│ │ │ │ │ │ │ ├── Cast { cast_to: Int64, child: #5 }
252-
│ │ │ │ │ │ │ └── 4(i64)
253-
│ │ │ │ │ │ └── Like { expr: #4, pattern: "%TIN", negated: false, case_insensitive: false }
254-
│ │ │ │ │ └── PhysicalScan { table: part }
255-
│ │ │ │ └── PhysicalScan { table: supplier }
256-
│ │ │ └── PhysicalScan { table: partsupp }
246+
│ │ ├── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8, #14, #15, #16, #17, #18, #19, #20, #9, #10, #11, #12, #13 ] }
247+
│ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #10 ], right_keys: [ #0 ] }
248+
│ │ │ ├── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
249+
│ │ │ │ ├── PhysicalFilter
250+
│ │ │ │ │ ├── cond:And
251+
│ │ │ │ │ │ ├── Eq
252+
│ │ │ │ │ │ │ ├── Cast { cast_to: Int64, child: #5 }
253+
│ │ │ │ │ │ │ └── 4(i64)
254+
│ │ │ │ │ │ └── Like { expr: #4, pattern: "%TIN", negated: false, case_insensitive: false }
255+
│ │ │ │ │ └── PhysicalScan { table: part }
256+
│ │ │ │ └── PhysicalScan { table: partsupp }
257+
│ │ │ └── PhysicalScan { table: supplier }
257258
│ │ └── PhysicalScan { table: nation }
258259
│ └── PhysicalFilter
259260
│ ├── cond:Eq

0 commit comments

Comments
 (0)