Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 9d785e4

Browse files
committed
fix(cost): return full selectivity for self joins
Signed-off-by: Alex Chi <[email protected]>
1 parent 1000e13 commit 9d785e4

File tree

4 files changed

+55
-3
lines changed

4 files changed

+55
-3
lines changed

optd-datafusion-repr/src/cost/base_cost/join.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,10 @@ impl<
450450
predicate: &EqPredicate,
451451
past_eq_columns: &mut EqBaseTableColumnSets,
452452
) -> f64 {
453+
if predicate.left == predicate.right {
454+
// self-join, TODO: is this correct?
455+
return 1.0;
456+
}
453457
// To find the adjustment, we need to know the selectivity of the graph before `predicate` is added.
454458
//
455459
// There are two cases: (1) adding `predicate` does not change the # of connected components, and

optd-datafusion-repr/src/properties/column_ref.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,9 @@ impl PropertyBuilder<OptRelNodeTyp> for ColumnRefPropertyBuilder {
362362
GroupColumnRefs::new(column_refs, child.output_correlation.clone())
363363
}
364364
// Should account for all physical join types.
365-
OptRelNodeTyp::Join(join_type) | OptRelNodeTyp::RawDepJoin(join_type) | OptRelNodeTyp::DepJoin(join_type)=> {
365+
OptRelNodeTyp::Join(join_type)
366+
| OptRelNodeTyp::RawDepJoin(join_type)
367+
| OptRelNodeTyp::DepJoin(join_type) => {
366368
// Concatenate left and right children column refs.
367369
let column_refs = Self::concat_children_col_refs(&children[0..2]);
368370
// Merge the equal columns of two children as input correlation.
@@ -465,12 +467,14 @@ impl PropertyBuilder<OptRelNodeTyp> for ColumnRefPropertyBuilder {
465467
GroupColumnRefs::new(column_refs, correlation)
466468
}
467469
OptRelNodeTyp::Constant(_)
468-
| OptRelNodeTyp::ExternColumnRef // TODO Possibly very very wrong---consult cost model team
469470
| OptRelNodeTyp::Func(_)
470471
| OptRelNodeTyp::DataType(_)
471472
| OptRelNodeTyp::Between
472473
| OptRelNodeTyp::Like
473-
| OptRelNodeTyp::InList => GroupColumnRefs::new(vec![ColumnRef::Derived], None),
474+
| OptRelNodeTyp::InList
475+
| OptRelNodeTyp::ExternColumnRef => {
476+
GroupColumnRefs::new(vec![ColumnRef::Derived], None)
477+
}
474478
_ => unimplemented!("Unsupported rel node type {:?}", typ),
475479
}
476480
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
-- (no id or description)
2+
create table t1(t1v1 int, t1v2 int);
3+
create table t2(t2v1 int, t2v3 int);
4+
insert into t1 values (0, 0), (1, 1), (2, 2);
5+
insert into t2 values (0, 200), (1, 201), (2, 202);
6+
7+
/*
8+
3
9+
3
10+
*/
11+
12+
-- test self join
13+
select * from t1 as a, t1 as b where a.t1v1 = b.t1v1;
14+
15+
/*
16+
LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
17+
└── LogicalFilter
18+
├── cond:Eq
19+
│ ├── #0
20+
│ └── #2
21+
└── LogicalJoin { join_type: Cross, cond: true }
22+
├── LogicalScan { table: t1 }
23+
└── LogicalScan { table: t1 }
24+
PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
25+
├── PhysicalScan { table: t1 }
26+
└── PhysicalScan { table: t1 }
27+
0 0 0 0
28+
1 1 1 1
29+
2 2 2 2
30+
*/
31+
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
- sql: |
2+
create table t1(t1v1 int, t1v2 int);
3+
create table t2(t2v1 int, t2v3 int);
4+
insert into t1 values (0, 0), (1, 1), (2, 2);
5+
insert into t2 values (0, 200), (1, 201), (2, 202);
6+
tasks:
7+
- execute
8+
- sql: |
9+
select * from t1 as a, t1 as b where a.t1v1 = b.t1v1;
10+
desc: test self join
11+
tasks:
12+
- explain:logical_optd,physical_optd
13+
- execute

0 commit comments

Comments
 (0)