Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 3fd39a8

Browse files
authored
refactor(plannertest): separate each TPC-H query (#256)
Signed-off-by: Alex Chi Z <[email protected]>
1 parent 93aa6be commit 3fd39a8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+2657
-3174
lines changed

Cargo.lock

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

optd-sqlplannertest/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ repository = { workspace = true }
1313
[dependencies]
1414
clap = { version = "4.5.4", features = ["derive"] }
1515
anyhow = { version = "1", features = ["backtrace"] }
16-
sqlplannertest = "0.2"
16+
sqlplannertest = "0.3"
1717
async-trait = "0.1"
1818
datafusion-optd-cli = { path = "../datafusion-optd-cli", version = "32.0.0" }
1919
optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost", version = "0.1" }

optd-sqlplannertest/src/lib.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -321,12 +321,12 @@ impl DatafusionDBMS {
321321
#[async_trait]
322322
impl sqlplannertest::PlannerTestRunner for DatafusionDBMS {
323323
async fn run(&mut self, test_case: &sqlplannertest::ParsedTestCase) -> Result<String> {
324-
if !test_case.before_sql.is_empty() {
325-
panic!("before is not supported in optd-sqlplannertest, always specify the task type to run");
326-
}
327-
328324
let mut result = String::new();
329325
let r = &mut result;
326+
for sql in &test_case.before_sql {
327+
// We drop output of before statements
328+
self.execute(sql, &TestFlags::default()).await?;
329+
}
330330
for task in &test_case.tasks {
331331
let flags = extract_flags(task)?;
332332
if task.starts_with("execute") {

optd-sqlplannertest/tests/joins/join_enumerate.planner.sql

+30
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ select * from t2, t1 where t1v1 = t2v1;
1919
(Join t1 t2)
2020
(Join t2 t1)
2121
22+
(Join t1 t2)
23+
(Join t2 t1)
24+
2225
0 200 0 0
2326
1 201 1 1
2427
2 202 2 2
@@ -37,6 +40,15 @@ select * from t2, t1, t3 where t1v1 = t2v1 and t1v1 = t3v2;
3740
(Join (Join t2 t1) t3)
3841
(Join (Join t3 t1) t2)
3942
43+
(Join t2 (Join t1 t3))
44+
(Join t2 (Join t3 t1))
45+
(Join t3 (Join t1 t2))
46+
(Join t3 (Join t2 t1))
47+
(Join (Join t1 t2) t3)
48+
(Join (Join t1 t3) t2)
49+
(Join (Join t2 t1) t3)
50+
(Join (Join t3 t1) t2)
51+
4052
0 200 0 0 0 300
4153
1 201 1 1 1 301
4254
2 202 2 2 2 302
@@ -55,6 +67,15 @@ select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2;
5567
(Join (Join t2 t1) t3)
5668
(Join (Join t3 t1) t2)
5769
70+
(Join t2 (Join t1 t3))
71+
(Join t2 (Join t3 t1))
72+
(Join t3 (Join t1 t2))
73+
(Join t3 (Join t2 t1))
74+
(Join (Join t1 t2) t3)
75+
(Join (Join t1 t3) t2)
76+
(Join (Join t2 t1) t3)
77+
(Join (Join t3 t1) t2)
78+
5879
0 200 0 0 0 300
5980
1 201 1 1 1 301
6081
2 202 2 2 2 302
@@ -73,6 +94,15 @@ select * from t1, t2, t3 where t1v1 = t2v1 and t1v2 = t3v2;
7394
(Join (Join t2 t1) t3)
7495
(Join (Join t3 t1) t2)
7596
97+
(Join t2 (Join t1 t3))
98+
(Join t2 (Join t3 t1))
99+
(Join t3 (Join t1 t2))
100+
(Join t3 (Join t2 t1))
101+
(Join (Join t1 t2) t3)
102+
(Join (Join t1 t3) t2)
103+
(Join (Join t2 t1) t3)
104+
(Join (Join t3 t1) t2)
105+
76106
0 0 0 200 0 300
77107
1 1 1 201 1 301
78108
2 2 2 202 2 302

optd-sqlplannertest/tests/joins/join_enumerate.yml

+4-1
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,27 @@
1111
select * from t2, t1 where t1v1 = t2v1;
1212
desc: Test whether the optimizer enumerates all 2-join orders.
1313
tasks:
14-
# well actually pruning doesn't matter b/c join order is logical, but we are now missing join orders with t1 as the outer table
1514
- explain[disable_pruning]:logical_join_orders
15+
- explain:logical_join_orders
1616
- execute
1717
- sql: |
1818
select * from t2, t1, t3 where t1v1 = t2v1 and t1v1 = t3v2;
1919
desc: Test whether the optimizer enumerates all 3-join orders. (It should)
2020
tasks:
2121
- explain[disable_pruning]:logical_join_orders
22+
- explain:logical_join_orders
2223
- execute
2324
- sql: |
2425
select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2;
2526
desc: Test whether the optimizer enumerates all 3-join orders. (It don't currently)
2627
tasks:
2728
- explain[disable_pruning]:logical_join_orders
29+
- explain:logical_join_orders
2830
- execute
2931
- sql: |
3032
select * from t1, t2, t3 where t1v1 = t2v1 and t1v2 = t3v2;
3133
desc: Test whether the optimizer enumerates all 3-join orders. (It don't currently)
3234
tasks:
3335
- explain[disable_pruning]:logical_join_orders
36+
- explain:logical_join_orders
3437
- execute

optd-sqlplannertest/tests/joins/self-join.planner.sql

+18-12
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,28 @@ insert into t2 values (0, 200), (1, 201), (2, 202);
1010
*/
1111

1212
-- test self join
13-
select * from t1 as a, t1 as b where a.t1v1 = b.t1v1;
13+
select * from t1 as a, t1 as b where a.t1v1 = b.t1v1 order by a.t1v1;
1414

1515
/*
1616
(Join t1 t1)
1717
18-
LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
19-
└── LogicalFilter
20-
├── cond:Eq
21-
│ ├── #0
22-
│ └── #2
23-
└── LogicalJoin { join_type: Cross, cond: true }
24-
├── LogicalScan { table: t1 }
25-
└── LogicalScan { table: t1 }
26-
PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
27-
├── PhysicalScan { table: t1 }
28-
└── PhysicalScan { table: t1 }
18+
LogicalSort
19+
├── exprs:SortOrder { order: Asc }
20+
│ └── #0
21+
└── LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
22+
└── LogicalFilter
23+
├── cond:Eq
24+
│ ├── #0
25+
│ └── #2
26+
└── LogicalJoin { join_type: Cross, cond: true }
27+
├── LogicalScan { table: t1 }
28+
└── LogicalScan { table: t1 }
29+
PhysicalSort
30+
├── exprs:SortOrder { order: Asc }
31+
│ └── #0
32+
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
33+
├── PhysicalScan { table: t1 }
34+
└── PhysicalScan { table: t1 }
2935
0 0 0 0
3036
1 1 1 1
3137
2 2 2 2

optd-sqlplannertest/tests/joins/self-join.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
tasks:
77
- execute
88
- sql: |
9-
select * from t1 as a, t1 as b where a.t1v1 = b.t1v1;
9+
select * from t1 as a, t1 as b where a.t1v1 = b.t1v1 order by a.t1v1;
1010
desc: test self join
1111
tasks:
1212
- explain:logical_join_orders,logical_optd,physical_optd

optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql

+137-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ create table t3(t3v2 int, t3v4 int);
77
88
*/
99

10-
-- Test whether the optimizer can unnest correlated subqueries.
10+
-- Test whether the optimizer can unnest correlated subqueries with (scalar op agg)
1111
select * from t1 where (select sum(t2v3) from t2 where t2v1 = t1v1) > 100;
1212

1313
/*
@@ -74,6 +74,142 @@ PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=18005,io=3000}, stat: {ro
7474
└── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
7575
*/
7676

77+
-- Test whether the optimizer can unnest correlated subqueries with (scalar op group agg)
78+
select * from t1 where (select sum(sumt2v3) from (select t2v1, sum(t2v3) as sumt2v3 from t2 where t2v1 = t1v1 group by t2v1)) > 100;
79+
80+
/*
81+
LogicalProjection { exprs: [ #0, #1 ] }
82+
└── LogicalFilter
83+
├── cond:Gt
84+
│ ├── #2
85+
│ └── 100(i64)
86+
└── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] }
87+
├── LogicalScan { table: t1 }
88+
└── LogicalProjection { exprs: [ #0 ] }
89+
└── LogicalAgg
90+
├── exprs:Agg(Sum)
91+
│ └── [ #1 ]
92+
├── groups: []
93+
└── LogicalProjection { exprs: [ #0, #1 ] }
94+
└── LogicalAgg
95+
├── exprs:Agg(Sum)
96+
│ └── [ Cast { cast_to: Int64, child: #1 } ]
97+
├── groups: [ #0 ]
98+
└── LogicalFilter
99+
├── cond:Eq
100+
│ ├── #0
101+
│ └── Extern(#0)
102+
└── LogicalScan { table: t2 }
103+
LogicalProjection { exprs: [ #0, #1 ] }
104+
└── LogicalFilter
105+
├── cond:Gt
106+
│ ├── #2
107+
│ └── 100(i64)
108+
└── LogicalProjection { exprs: [ #0, #1, #3 ] }
109+
└── LogicalJoin
110+
├── join_type: Inner
111+
├── cond:Eq
112+
│ ├── #0
113+
│ └── #2
114+
├── LogicalScan { table: t1 }
115+
└── LogicalProjection { exprs: [ #0, #1 ] }
116+
└── LogicalAgg
117+
├── exprs:Agg(Sum)
118+
│ └── [ #2 ]
119+
├── groups: [ #0 ]
120+
└── LogicalProjection { exprs: [ #0, #1, #2 ] }
121+
└── LogicalAgg
122+
├── exprs:Agg(Sum)
123+
│ └── [ Cast { cast_to: Int64, child: #2 } ]
124+
├── groups: [ #0, #1 ]
125+
└── LogicalFilter
126+
├── cond:Eq
127+
│ ├── #1
128+
│ └── #0
129+
└── LogicalJoin { join_type: Inner, cond: true }
130+
├── LogicalAgg { exprs: [], groups: [ #0 ] }
131+
│ └── LogicalScan { table: t1 }
132+
└── LogicalScan { table: t2 }
133+
PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=25005,io=3000}, stat: {row_cnt=1} }
134+
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=25002,io=3000}, stat: {row_cnt=1} }
135+
├── PhysicalFilter
136+
│ ├── cond:Gt
137+
│ │ ├── #1
138+
│ │ └── 100(i64)
139+
│ ├── cost: {compute=24000,io=2000}
140+
│ ├── stat: {row_cnt=1}
141+
│ └── PhysicalAgg
142+
│ ├── aggrs:Agg(Sum)
143+
│ │ └── [ #2 ]
144+
│ ├── groups: [ #0 ]
145+
│ ├── cost: {compute=21000,io=2000}
146+
│ ├── stat: {row_cnt=1000}
147+
│ └── PhysicalAgg
148+
│ ├── aggrs:Agg(Sum)
149+
│ │ └── [ Cast { cast_to: Int64, child: #2 } ]
150+
│ ├── groups: [ #0, #1 ]
151+
│ ├── cost: {compute=15000,io=2000}
152+
│ ├── stat: {row_cnt=1000}
153+
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} }
154+
│ ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} }
155+
│ │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
156+
│ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
157+
└── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
158+
*/
159+
160+
-- Test whether the optimizer can unnest correlated subqueries with scalar agg in select list
161+
select t1v1, (select sum(t2v3) from t2 where t2v1 = t1v1) as sum from t1;
162+
163+
/*
164+
LogicalProjection { exprs: [ #0, #2 ] }
165+
└── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] }
166+
├── LogicalScan { table: t1 }
167+
└── LogicalProjection { exprs: [ #0 ] }
168+
└── LogicalAgg
169+
├── exprs:Agg(Sum)
170+
│ └── [ Cast { cast_to: Int64, child: #1 } ]
171+
├── groups: []
172+
└── LogicalFilter
173+
├── cond:Eq
174+
│ ├── #0
175+
│ └── Extern(#0)
176+
└── LogicalScan { table: t2 }
177+
LogicalProjection { exprs: [ #0, #2 ] }
178+
└── LogicalProjection { exprs: [ #0, #1, #3 ] }
179+
└── LogicalJoin
180+
├── join_type: Inner
181+
├── cond:Eq
182+
│ ├── #0
183+
│ └── #2
184+
├── LogicalScan { table: t1 }
185+
└── LogicalProjection { exprs: [ #0, #1 ] }
186+
└── LogicalAgg
187+
├── exprs:Agg(Sum)
188+
│ └── [ Cast { cast_to: Int64, child: #2 } ]
189+
├── groups: [ #0 ]
190+
└── LogicalFilter
191+
├── cond:Eq
192+
│ ├── #1
193+
│ └── #0
194+
└── LogicalJoin { join_type: Inner, cond: true }
195+
├── LogicalAgg { exprs: [], groups: [ #0 ] }
196+
│ └── LogicalScan { table: t1 }
197+
└── LogicalScan { table: t2 }
198+
PhysicalProjection { exprs: [ #0, #3 ], cost: {compute=20000,io=3000}, stat: {row_cnt=1000} }
199+
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=17000,io=3000}, stat: {row_cnt=1000} }
200+
├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
201+
└── PhysicalAgg
202+
├── aggrs:Agg(Sum)
203+
│ └── [ Cast { cast_to: Int64, child: #2 } ]
204+
├── groups: [ #0 ]
205+
├── cost: {compute=14000,io=2000}
206+
├── stat: {row_cnt=1000}
207+
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} }
208+
├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} }
209+
│ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
210+
└── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
211+
*/
212+
77213
-- Test whether the optimizer can unnest correlated subqueries.
78214
select * from t1 where (select sum(t2v3) from (select * from t2, t3 where t2v1 = t1v1 and t2v3 = t3v2)) > 100;
79215

optd-sqlplannertest/tests/subqueries/subquery_unnesting.yml

+17-1
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,25 @@
1111
# - explain_logical
1212
- sql: |
1313
select * from t1 where (select sum(t2v3) from t2 where t2v1 = t1v1) > 100;
14-
desc: Test whether the optimizer can unnest correlated subqueries.
14+
desc: Test whether the optimizer can unnest correlated subqueries with (scalar op agg)
1515
tasks:
1616
- explain[verbose]:logical_optd,optimized_logical_optd,physical_optd
17+
- sql: |
18+
select * from t1 where (select sum(sumt2v3) from (select t2v1, sum(t2v3) as sumt2v3 from t2 where t2v1 = t1v1 group by t2v1)) > 100;
19+
desc: Test whether the optimizer can unnest correlated subqueries with (scalar op group agg)
20+
tasks:
21+
- explain[verbose]:logical_optd,optimized_logical_optd,physical_optd
22+
- sql: |
23+
select t1v1, (select sum(t2v3) from t2 where t2v1 = t1v1) as sum from t1;
24+
desc: Test whether the optimizer can unnest correlated subqueries with scalar agg in select list
25+
tasks:
26+
- explain[verbose]:logical_optd,optimized_logical_optd,physical_optd
27+
# - sql: |
28+
# select * from t1 where exists (select * from t2 where t2v1 = t1v1);
29+
# desc: Test whether the optimizer can unnest correlated subqueries with exists
30+
# tasks:
31+
# - explain[verbose]:logical_optd,optimized_logical_optd,physical_optd
32+
# todo: a test case on quantifier (any/all)
1733
- sql: |
1834
select * from t1 where (select sum(t2v3) from (select * from t2, t3 where t2v1 = t1v1 and t2v3 = t3v2)) > 100;
1935
desc: Test whether the optimizer can unnest correlated subqueries.

0 commit comments

Comments
 (0)