Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit d36dfd5

Browse files
authored
test: add cross-product test and enable more explain options (#50)
* test: add cross-product test and enable more options Signed-off-by: Yuchen Liang <[email protected]> * add tasks to use datafusion optimizer Signed-off-by: Yuchen Liang <[email protected]> * change join_enumerate task types Signed-off-by: Yuchen Liang <[email protected]> * restore planner tests Signed-off-by: Yuchen Liang <[email protected]> --------- Signed-off-by: Yuchen Liang <[email protected]>
1 parent 2694b9a commit d36dfd5

File tree

7 files changed

+277
-45
lines changed

7 files changed

+277
-45
lines changed

optd-sqlplannertest/README.md

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
= Usage
2+
3+
**Update the test cases**
4+
5+
```shell
6+
cargo run -p optd-sqlplannertest --bin planner_test_apply
7+
```
8+
9+
**Verify the test cases**
10+
11+
```shell
12+
cargo test -p optd-sqlplannertest
13+
# or use nextest
14+
cargo nextest run -p optd-sqlplannertest
15+
```
16+
17+
The `explain` and `execute` task will be run with datafusion's logical optimizer disabled. To keep using datafusion's logical optimizer, you could use the `execute_with_logical` and `explain_with_logical` tasks instead.
18+
19+
Currently we have the following options for the explain task:
20+
21+
- `logical_datafusion`: datafusion's logical plan.
22+
- `logical_optd`: optd's logical plan before optimization.
23+
- `physical_optd`: optd's physical plan after optimization.
24+
- `physical_datafusion`: datafusion's physical plan.
25+
- `join_orders`: physical join orders.
26+
- `logical_join_orders`: logical join orders.
27+

optd-sqlplannertest/src/lib.rs

+137-40
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,26 @@ use async_trait::async_trait;
2020
#[derive(Default)]
2121
pub struct DatafusionDb {
2222
ctx: SessionContext,
23+
/// Context enabling datafusion's logical optimizer.
24+
with_logical_ctx: SessionContext,
2325
}
2426

2527
impl DatafusionDb {
2628
pub async fn new() -> Result<Self> {
29+
let ctx = DatafusionDb::new_session_ctx(false).await?;
30+
let with_logical_ctx = DatafusionDb::new_session_ctx(true).await?;
31+
Ok(Self {
32+
ctx,
33+
with_logical_ctx,
34+
})
35+
}
36+
37+
/// Creates a new session context. If the `with_logical` flag is set, datafusion's logical optimizer will be used.
38+
async fn new_session_ctx(with_logical: bool) -> Result<SessionContext> {
2739
let mut session_config = SessionConfig::from_env()?.with_information_schema(true);
28-
session_config.options_mut().optimizer.max_passes = 0;
40+
if !with_logical {
41+
session_config.options_mut().optimizer.max_passes = 0;
42+
}
2943

3044
let rn_config = RuntimeConfig::new();
3145
let runtime_env = RuntimeEnv::new(rn_config.clone())?;
@@ -36,26 +50,37 @@ impl DatafusionDb {
3650
let optimizer = DatafusionOptimizer::new_physical(Box::new(DatafusionCatalog::new(
3751
state.catalog_list(),
3852
)));
39-
// clean up optimizer rules so that we can plug in our own optimizer
40-
state = state.with_optimizer_rules(vec![]);
41-
state = state.with_physical_optimizer_rules(vec![]);
53+
if !with_logical {
54+
// clean up optimizer rules so that we can plug in our own optimizer
55+
state = state.with_optimizer_rules(vec![]);
56+
state = state.with_physical_optimizer_rules(vec![]);
57+
}
4258
// use optd-bridge query planner
4359
state = state.with_query_planner(Arc::new(OptdQueryPlanner::new(optimizer)));
4460
SessionContext::new_with_state(state)
4561
};
4662
ctx.refresh_catalogs().await?;
47-
Ok(Self { ctx })
63+
Ok(ctx)
4864
}
4965

50-
async fn execute(&self, sql: &str) -> Result<Vec<Vec<String>>> {
66+
async fn execute(&self, sql: &str, with_logical: bool) -> Result<Vec<Vec<String>>> {
5167
let sql = unescape_input(sql)?;
5268
let dialect = Box::new(GenericDialect);
5369
let statements = DFParser::parse_sql_with_dialect(&sql, dialect.as_ref())?;
5470
let mut result = Vec::new();
5571
for statement in statements {
56-
let plan = self.ctx.state().statement_to_plan(statement).await?;
72+
let df = if with_logical {
73+
let plan = self
74+
.with_logical_ctx
75+
.state()
76+
.statement_to_plan(statement)
77+
.await?;
78+
self.with_logical_ctx.execute_logical_plan(plan).await?
79+
} else {
80+
let plan = self.ctx.state().statement_to_plan(statement).await?;
81+
self.ctx.execute_logical_plan(plan).await?
82+
};
5783

58-
let df = self.ctx.execute_logical_plan(plan).await?;
5984
let batches = df.collect().await?;
6085

6186
let options = FormatOptions::default();
@@ -79,53 +104,125 @@ impl DatafusionDb {
79104
}
80105
Ok(result)
81106
}
107+
108+
/// Executes the `execute` task.
109+
async fn task_execute(&mut self, r: &mut String, sql: &str, with_logical: bool) -> Result<()> {
110+
use std::fmt::Write;
111+
let result = self.execute(&sql, with_logical).await?;
112+
writeln!(r, "{}", result.into_iter().map(|x| x.join(" ")).join("\n"))?;
113+
writeln!(r)?;
114+
Ok(())
115+
}
116+
117+
/// Executes the `explain` task.
118+
async fn task_explain(
119+
&mut self,
120+
r: &mut String,
121+
sql: &str,
122+
task: &str,
123+
with_logical: bool,
124+
) -> Result<()> {
125+
use std::fmt::Write;
126+
127+
let result = self
128+
.execute(&format!("explain {}", &sql), with_logical)
129+
.await?;
130+
let subtask_start_pos = if with_logical {
131+
"explain_with_logical:".len()
132+
} else {
133+
"explain:".len()
134+
};
135+
for subtask in task[subtask_start_pos..].split(",") {
136+
let subtask = subtask.trim();
137+
if subtask == "logical_datafusion" {
138+
writeln!(
139+
r,
140+
"{}",
141+
result
142+
.iter()
143+
.find(|x| x[0] == "logical_plan after datafusion")
144+
.map(|x| &x[1])
145+
.unwrap()
146+
)?;
147+
} else if subtask == "logical_optd" {
148+
writeln!(
149+
r,
150+
"{}",
151+
result
152+
.iter()
153+
.find(|x| x[0] == "logical_plan after optd")
154+
.map(|x| &x[1])
155+
.unwrap()
156+
)?;
157+
} else if subtask == "physical_optd" {
158+
writeln!(
159+
r,
160+
"{}",
161+
result
162+
.iter()
163+
.find(|x| x[0] == "physical_plan after optd")
164+
.map(|x| &x[1])
165+
.unwrap()
166+
)?;
167+
} else if subtask == "join_orders" {
168+
writeln!(
169+
r,
170+
"{}",
171+
result
172+
.iter()
173+
.find(|x| x[0] == "physical_plan after optd-all-join-orders")
174+
.map(|x| &x[1])
175+
.unwrap()
176+
)?;
177+
writeln!(r)?;
178+
} else if subtask == "logical_join_orders" {
179+
writeln!(
180+
r,
181+
"{}",
182+
result
183+
.iter()
184+
.find(|x| x[0] == "physical_plan after optd-all-logical-join-orders")
185+
.map(|x| &x[1])
186+
.unwrap()
187+
)?;
188+
writeln!(r)?;
189+
} else if subtask == "physical_datafusion" {
190+
writeln!(
191+
r,
192+
"{}",
193+
result
194+
.iter()
195+
.find(|x| x[0] == "physical_plan")
196+
.map(|x| &x[1])
197+
.unwrap()
198+
)?;
199+
}
200+
}
201+
202+
Ok(())
203+
}
82204
}
83205

84206
#[async_trait]
85207
impl sqlplannertest::PlannerTestRunner for DatafusionDb {
86208
async fn run(&mut self, test_case: &sqlplannertest::ParsedTestCase) -> Result<String> {
87209
for before in &test_case.before_sql {
88-
self.execute(before)
210+
self.execute(before, true)
89211
.await
90212
.context("before execution error")?;
91213
}
92214

93-
use std::fmt::Write;
94215
let mut result = String::new();
95216
let r = &mut result;
96217
for task in &test_case.tasks {
97218
if task == "execute" {
98-
let result = self.execute(&test_case.sql).await?;
99-
writeln!(r, "{}", result.into_iter().map(|x| x.join(" ")).join("\n"))?;
100-
writeln!(r)?;
219+
self.task_execute(r, &test_case.sql, false).await?;
220+
} else if task == "execute_with_logical" {
221+
self.task_execute(r, &test_case.sql, true).await?;
101222
} else if task.starts_with("explain:") {
102-
let result = self.execute(&format!("explain {}", test_case.sql)).await?;
103-
for subtask in task["explain:".len()..].split(",") {
104-
let subtask = subtask.trim();
105-
if subtask == "join_orders" {
106-
writeln!(
107-
r,
108-
"{}",
109-
result
110-
.iter()
111-
.find(|x| x[0] == "physical_plan after optd-all-join-orders")
112-
.map(|x| &x[1])
113-
.unwrap()
114-
)?;
115-
writeln!(r)?;
116-
} else if subtask == "logical_join_orders" {
117-
writeln!(
118-
r,
119-
"{}",
120-
result
121-
.iter()
122-
.find(|x| x[0] == "physical_plan after optd-all-logical-join-orders")
123-
.map(|x| &x[1])
124-
.unwrap()
125-
)?;
126-
writeln!(r)?;
127-
}
128-
}
223+
self.task_explain(r, &test_case.sql, task, false).await?;
224+
} else if task.starts_with("explain_with_logical:") {
225+
self.task_explain(r, &test_case.sql, task, true).await?;
129226
}
130227
}
131228
Ok(result)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
-- (no id or description)
2+
create table t1(t1v1 int);
3+
create table t2(t2v1 int);
4+
insert into t1 values (0), (1), (2);
5+
insert into t2 values (0), (1), (2);
6+
7+
/*
8+
3
9+
3
10+
*/
11+
12+
-- Test optimizer logical for a cross product.
13+
select * from t1, t2;
14+
15+
/*
16+
LogicalProjection { exprs: [ #0, #1 ] }
17+
└── LogicalJoin { join_type: Cross, cond: true }
18+
├── LogicalScan { table: t1 }
19+
└── LogicalScan { table: t2 }
20+
PhysicalProjection { exprs: [ #0, #1 ] }
21+
└── PhysicalNestedLoopJoin { join_type: Cross, cond: true }
22+
├── PhysicalScan { table: t1 }
23+
└── PhysicalScan { table: t2 }
24+
0 0
25+
0 1
26+
0 2
27+
1 0
28+
1 1
29+
1 2
30+
2 0
31+
2 1
32+
2 2
33+
*/
34+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
- sql: |
2+
create table t1(t1v1 int);
3+
create table t2(t2v1 int);
4+
insert into t1 values (0), (1), (2);
5+
insert into t2 values (0), (1), (2);
6+
tasks:
7+
- execute
8+
- sql: |
9+
select * from t1, t2;
10+
desc: Test optimizer logical for a cross product.
11+
tasks:
12+
- explain:logical_optd,physical_optd
13+
- execute
14+

optd-sqlplannertest/tests/empty_relation.planner.sql

+46
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,16 @@ select 64 + 1;
1414
select 64 + 1 from t1;
1515

1616
/*
17+
LogicalProjection
18+
├── exprs:Add
19+
│ ├── 64
20+
│ └── 1
21+
└── LogicalEmptyRelation { produce_one_row: true }
22+
PhysicalProjection
23+
├── exprs:Add
24+
│ ├── 64
25+
│ └── 1
26+
└── PhysicalEmptyRelation { produce_one_row: true }
1727
65
1828
65
1929
65
@@ -26,5 +36,41 @@ select 64+1 from t1 inner join t2 on false;
2636
select 64+1 from t1 inner join t2 on 1=0;
2737

2838
/*
39+
LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
40+
└── LogicalJoin { join_type: Inner, cond: true }
41+
├── LogicalScan { table: t1 }
42+
└── LogicalScan { table: t2 }
43+
PhysicalProjection { exprs: [ #0, #1, #2, #3 ] }
44+
└── PhysicalProjection { exprs: [ #2, #3, #0, #1 ] }
45+
└── PhysicalNestedLoopJoin { join_type: Inner, cond: true }
46+
├── PhysicalScan { table: t2 }
47+
└── PhysicalScan { table: t1 }
48+
0 0 0 200
49+
0 0 1 201
50+
0 0 2 202
51+
1 1 0 200
52+
1 1 1 201
53+
1 1 2 202
54+
2 2 0 200
55+
2 2 1 201
56+
2 2 2 202
57+
65
58+
65
59+
65
60+
65
61+
65
62+
65
63+
65
64+
65
65+
65
66+
65
67+
65
68+
65
69+
65
70+
65
71+
65
72+
65
73+
65
74+
65
2975
*/
3076

optd-sqlplannertest/tests/join_enumerate.planner.sql

+14
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,14 @@ insert into t3 values (0, 300), (1, 301), (2, 302);
1616
select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2;
1717

1818
/*
19+
(Join t2 (Join t1 t3))
20+
(Join t2 (Join t3 t1))
21+
(Join t3 (Join t1 t2))
22+
(Join t3 (Join t2 t1))
23+
(Join (Join t1 t2) t3)
24+
(Join (Join t1 t3) t2)
1925
(Join (Join t2 t1) t3)
26+
(Join (Join t3 t1) t2)
2027
2128
0 200 0 0 0 300
2229
1 201 1 1 1 301
@@ -27,7 +34,14 @@ select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2;
2734
select * from t1, t2, t3 where t1v1 = t2v1 and t1v2 = t3v2;
2835

2936
/*
37+
(Join t2 (Join t1 t3))
38+
(Join t2 (Join t3 t1))
39+
(Join t3 (Join t1 t2))
40+
(Join t3 (Join t2 t1))
3041
(Join (Join t1 t2) t3)
42+
(Join (Join t1 t3) t2)
43+
(Join (Join t2 t1) t3)
44+
(Join (Join t3 t1) t2)
3145
3246
0 0 0 200 0 300
3347
1 1 1 201 1 301

0 commit comments

Comments
 (0)