Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 93aa6be

Browse files
authored
Fixes for TPC-H Q17 (#255)
Two bugs: - `dep_initial_distinct` did not work if the correlated columns were not `[#0, #1, #2]` and so on. - `dep_join_past_agg` was pushing the actual correlated column number instead of the *index* of the correlated column number. This works in the same cases the above bug did (`[#0, #1, #2]` correlated columns), and so it broke for the same reason afaict.
1 parent 228e7ba commit 93aa6be

File tree

5 files changed

+50
-22
lines changed

5 files changed

+50
-22
lines changed

optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs

+24-16
Original file line numberDiff line numberDiff line change
@@ -116,14 +116,21 @@ fn apply_dep_initial_distinct(
116116

117117
// Our join condition is going to make sure that all of the correlated columns
118118
// in the right side are equal to their equivalent columns in the left side.
119-
// (they will have the same index, just shifted over)
119+
//
120+
// If we have correlated columns [#16, #17], we want our condition to be:
121+
// #16 = #0 AND #17 = #1
122+
//
123+
// This is because the aggregate we install on the right side will map the
124+
// correlated columns to their respective indices as shown.
120125
let join_cond = LogOpPred::new(
121126
LogOpType::And,
122-
(0..correlated_col_indices.len())
123-
.map(|i| {
127+
correlated_col_indices
128+
.iter()
129+
.enumerate()
130+
.map(|(i, x)| {
124131
assert!(i + left_schema_size < left_schema_size + right_schema_size);
125132
BinOpPred::new(
126-
ColumnRefPred::new(i).into_pred_node(),
133+
ColumnRefPred::new(*x).into_pred_node(),
127134
ColumnRefPred::new(i + left_schema_size).into_pred_node(),
128135
BinOpType::Eq,
129136
)
@@ -177,16 +184,18 @@ fn apply_dep_join_past_proj(
177184
let cond = join.cond();
178185
let extern_cols = join.extern_cols();
179186
let proj = LogicalProjection::from_plan_node(right.unwrap_plan_node()).unwrap();
187+
let proj_exprs = proj.exprs();
180188
let right = proj.child();
181189

182190
// TODO: can we have external columns in projection node? I don't think so?
183191
// Cross join should always have true cond
184192
assert!(cond == ConstantPred::bool(true).into_pred_node());
185193
let left_schema_len = optimizer.get_schema_of(left.clone()).len();
186-
let right_schema_len = optimizer.get_schema_of(right.clone()).len();
187194

188-
let right_cols_proj =
189-
(0..right_schema_len).map(|x| ColumnRefPred::new(x + left_schema_len).into_pred_node());
195+
let right_cols_proj = proj_exprs.to_vec().into_iter().map(|x| {
196+
x.rewrite_column_refs(|col| Some(col + left_schema_len))
197+
.unwrap()
198+
});
190199

191200
let left_cols_proj = (0..left_schema_len).map(|x| ColumnRefPred::new(x).into_pred_node());
192201
let new_proj_exprs = ListPred::new(
@@ -281,7 +290,7 @@ define_rule!(
281290
/// talk by Mark Raasveldt. The correlated columns are covered in the original paper.
282291
///
283292
/// TODO: the outer join is not implemented yet, so some edge cases won't work.
284-
/// Run SQList tests to catch these, I guess.
293+
/// Run SQLite tests to catch these, I guess.
285294
fn apply_dep_join_past_agg(
286295
_optimizer: &impl Optimizer<DfNodeType>,
287296
binding: ArcDfPlanNode,
@@ -310,15 +319,14 @@ fn apply_dep_join_past_agg(
310319
})
311320
.collect::<Vec<_>>();
312321

322+
// We need to group by all correlated columns.
323+
// In our initial distinct step, we installed an agg node that groups by all correlated columns.
324+
// Keeping this in mind, we only need to append a sequential number for each correlated column,
325+
// as these will correspond to the outputs of the agg node.
313326
let new_groups = ListPred::new(
314-
groups
315-
.to_vec()
316-
.into_iter()
317-
.map(|x| {
318-
x.rewrite_column_refs(|col| Some(col + correlated_col_indices.len()))
319-
.unwrap()
320-
})
321-
.chain(correlated_col_indices.iter().map(|x| {
327+
(0..correlated_col_indices.len())
328+
.map(|x| ColumnRefPred::new(x).into_pred_node())
329+
.chain(groups.to_vec().into_iter().map(|x| {
322330
x.rewrite_column_refs(|col| Some(col + correlated_col_indices.len()))
323331
.unwrap()
324332
}))

optd-sqllogictest/slt/tpch-q17.slt.disabled renamed to optd-sqllogictest/slt/tpch-q17.slt

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ where
1919
l_partkey = p_partkey
2020
);
2121
----
22-
863.2285714285714285714285714
22+
863.2285714285715
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
include _tpch_tables.slt.part
2+
3+
# A query with a correlated subquery that retrieves columns out of order
4+
# i.e. the extern columns are not of the format [#0, #1, ...]
5+
# This query has extern columns [#1]
6+
query
7+
select
8+
l_orderkey,
9+
l_partkey,
10+
l_extendedprice,
11+
(
12+
select avg(p_size)
13+
from part
14+
where p_partkey = l_partkey
15+
) as avg_extendedprice
16+
from lineitem
17+
where l_extendedprice > 55000;
18+
----
19+
1121 200 55010.00 22.0
20+
4931 200 55010.00 22.0

optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql

+4-4
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ LogicalProjection { exprs: [ #0, #1 ] }
4444
└── LogicalAgg
4545
├── exprs:Agg(Sum)
4646
│ └── [ Cast { cast_to: Int64, child: #2 } ]
47-
├── groups: [ #1 ]
47+
├── groups: [ #0 ]
4848
└── LogicalFilter
4949
├── cond:Eq
5050
│ ├── #1
@@ -64,7 +64,7 @@ PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=18005,io=3000}, stat: {ro
6464
│ └── PhysicalAgg
6565
│ ├── aggrs:Agg(Sum)
6666
│ │ └── [ Cast { cast_to: Int64, child: #2 } ]
67-
│ ├── groups: [ #1 ]
67+
│ ├── groups: [ #0 ]
6868
│ ├── cost: {compute=14000,io=2000}
6969
│ ├── stat: {row_cnt=1000}
7070
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} }
@@ -118,7 +118,7 @@ LogicalProjection { exprs: [ #0, #1 ] }
118118
└── LogicalAgg
119119
├── exprs:Agg(Sum)
120120
│ └── [ Cast { cast_to: Int64, child: #2 } ]
121-
├── groups: [ #1 ]
121+
├── groups: [ #0 ]
122122
└── LogicalProjection { exprs: [ #0, #1, #2, #3, #4 ] }
123123
└── LogicalFilter
124124
├── cond:And
@@ -145,7 +145,7 @@ PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=21005,io=4000}, stat: {ro
145145
│ └── PhysicalAgg
146146
│ ├── aggrs:Agg(Sum)
147147
│ │ └── [ Cast { cast_to: Int64, child: #2 } ]
148-
│ ├── groups: [ #1 ]
148+
│ ├── groups: [ #0 ]
149149
│ ├── cost: {compute=17000,io=3000}
150150
│ ├── stat: {row_cnt=1000}
151151
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ], cost: {compute=9000,io=3000}, stat: {row_cnt=1000} }

optd-sqlplannertest/tests/tpch/tpch-01-05.planner.sql

+1-1
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ PhysicalLimit { skip: 0(u64), fetch: 100(u64) }
358358
└── PhysicalAgg
359359
├── aggrs:Agg(Min)
360360
│ └── [ #4 ]
361-
├── groups: [ #1 ]
361+
├── groups: [ #0 ]
362362
└── PhysicalFilter
363363
├── cond:And
364364
│ ├── Eq

0 commit comments

Comments
 (0)