Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 9633315

Browse files
authored
refactor(sqlplannertest): split directories, allow subdirectory selection, rename with_logical (#189)
* with_logical is now with_df_logical (which means that with datafusion logical optimizer) * apply planner test now supports subdirectories, so that we don't run all tests, that's super slow * apply planner test now shows time spent on a test case Signed-off-by: Alex Chi <[email protected]>
1 parent 5065c42 commit 9633315

29 files changed

+113
-78
lines changed

Cargo.lock

+12-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

optd-perfbench/Cargo.toml

+1-3
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,7 @@ tokio = { version = "1.24", features = [
3131
shlex = "1.3"
3232
tokio-postgres = "0.7"
3333
regex = "1.10"
34-
clap = { version = "4.5.4", features = [
35-
"derive",
36-
] }
34+
clap = { version = "4.5.4", features = ["derive"] }
3735
log = "0.4"
3836
env_logger = "0.11"
3937
lazy_static = "1.4.0"

optd-perfbench/src/datafusion_dbms.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -103,22 +103,22 @@ impl DatafusionDBMS {
103103
stats: Option<DataFusionBaseTableStats>,
104104
benchmark: &Benchmark,
105105
) -> anyhow::Result<()> {
106-
let with_logical = match benchmark {
106+
let use_df_logical = match benchmark {
107107
Benchmark::Tpch(_) => WITH_LOGICAL_FOR_TPCH,
108108
Benchmark::Job(_) | Benchmark::Joblight(_) => WITH_LOGICAL_FOR_JOB,
109109
};
110-
self.ctx = Some(Self::new_session_ctx(stats, self.adaptive, with_logical).await?);
110+
self.ctx = Some(Self::new_session_ctx(stats, self.adaptive, use_df_logical).await?);
111111
Ok(())
112112
}
113113

114114
async fn new_session_ctx(
115115
stats: Option<DataFusionBaseTableStats>,
116116
adaptive: bool,
117-
with_logical: bool,
117+
use_df_logical: bool,
118118
) -> anyhow::Result<SessionContext> {
119119
let mut session_config = SessionConfig::from_env()?.with_information_schema(true);
120120

121-
if !with_logical {
121+
if !use_df_logical {
122122
session_config.options_mut().optimizer.max_passes = 0;
123123
}
124124

optd-sqlplannertest/Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ edition = "2021"
66
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
77

88
[dependencies]
9+
clap = { version = "4.5.4", features = ["derive"] }
910
anyhow = { version = "1", features = ["backtrace"] }
10-
sqlplannertest = { git = "https://github.com/risinglightdb/sqlplannertest-rs" }
11+
sqlplannertest = { git = "https://github.com/risinglightdb/sqlplannertest-rs", branch = "main" }
1112
async-trait = "0.1"
1213
datafusion-optd-cli = { path = "../datafusion-optd-cli" }
1314
rand = "0.8"

optd-sqlplannertest/README.md

+10-7
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
```shell
66
cargo run -p optd-sqlplannertest --bin planner_test_apply
7+
# or, supply a list of directories to scan from
8+
cargo run -p optd-sqlplannertest --bin planner_test_apply -- subqueries
79
```
810

911
## Verify the test cases
@@ -22,23 +24,24 @@ The `explain` and `execute` task will be run with datafusion's logical optimizer
2224

2325
#### Flags
2426

25-
| Name | Description |
26-
| -- | -- |
27-
| with_logical | Enable Datafusion's logical optimizer |
27+
| Name | Description |
28+
| -------------- | ------------------------------------- |
29+
| use_df_logical | Enable Datafusion's logical optimizer |
2830

2931
### Explain Task
3032

3133
#### Flags
3234

33-
| Name | Description |
34-
| -- | -- |
35-
| with_logical | Enable Datafusion's logical optimizer |
36-
| verbose | Display estimated cost in physical plan |
35+
| Name | Description |
36+
| -------------- | --------------------------------------- |
37+
| use_df_logical | Enable Datafusion's logical optimizer |
38+
| verbose | Display estimated cost in physical plan |
3739

3840
Currently we have the following options for the explain task:
3941

4042
- `logical_datafusion`: datafusion's logical plan.
4143
- `logical_optd`: optd's logical plan before optimization.
44+
- `optimized_logical_optd`: optd's logical plan after heuristics optimization and before cascades optimization.
4245
- `physical_optd`: optd's physical plan after optimization.
4346
- `physical_datafusion`: datafusion's physical plan.
4447
- `join_orders`: physical join orders.

optd-sqlplannertest/src/bin/planner_test_apply.rs

+30-5
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,37 @@ use std::path::Path;
22

33
use anyhow::Result;
44

5+
use clap::Parser;
6+
7+
#[derive(Parser)]
8+
#[command(version, about, long_about = None)]
9+
struct Cli {
10+
/// Optional list of directories to apply the test; if empty, apply all tests
11+
directories: Vec<String>,
12+
}
13+
514
#[tokio::main]
615
async fn main() -> Result<()> {
7-
sqlplannertest::planner_test_apply(
8-
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests"),
9-
|| async { optd_sqlplannertest::DatafusionDBMS::new().await },
10-
)
11-
.await?;
16+
let cli = Cli::parse();
17+
18+
if cli.directories.is_empty() {
19+
println!("Running all tests");
20+
sqlplannertest::planner_test_apply(
21+
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests"),
22+
|| async { optd_sqlplannertest::DatafusionDBMS::new().await },
23+
)
24+
.await?;
25+
} else {
26+
for directory in cli.directories {
27+
println!("Running tests in {}", directory);
28+
sqlplannertest::planner_test_apply(
29+
Path::new(env!("CARGO_MANIFEST_DIR"))
30+
.join("tests")
31+
.join(directory),
32+
|| async { optd_sqlplannertest::DatafusionDBMS::new().await },
33+
)
34+
.await?;
35+
}
36+
}
1237
Ok(())
1338
}

optd-sqlplannertest/src/lib.rs

+16-16
Original file line numberDiff line numberDiff line change
@@ -25,27 +25,27 @@ use async_trait::async_trait;
2525
pub struct DatafusionDBMS {
2626
ctx: SessionContext,
2727
/// Context enabling datafusion's logical optimizer.
28-
with_logical_ctx: SessionContext,
28+
use_df_logical_ctx: SessionContext,
2929
}
3030

3131
impl DatafusionDBMS {
3232
pub async fn new() -> Result<Self> {
3333
let ctx = DatafusionDBMS::new_session_ctx(false, None).await?;
34-
let with_logical_ctx =
34+
let use_df_logical_ctx =
3535
DatafusionDBMS::new_session_ctx(true, Some(ctx.state().catalog_list().clone())).await?;
3636
Ok(Self {
3737
ctx,
38-
with_logical_ctx,
38+
use_df_logical_ctx,
3939
})
4040
}
4141

42-
/// Creates a new session context. If the `with_logical` flag is set, datafusion's logical optimizer will be used.
42+
/// Creates a new session context. If the `use_df_logical` flag is set, datafusion's logical optimizer will be used.
4343
async fn new_session_ctx(
44-
with_logical: bool,
44+
use_df_logical: bool,
4545
catalog: Option<Arc<dyn CatalogList>>,
4646
) -> Result<SessionContext> {
4747
let mut session_config = SessionConfig::from_env()?.with_information_schema(true);
48-
if !with_logical {
48+
if !use_df_logical {
4949
session_config.options_mut().optimizer.max_passes = 0;
5050
}
5151

@@ -67,7 +67,7 @@ impl DatafusionDBMS {
6767
BaseTableStats::default(),
6868
false,
6969
);
70-
if !with_logical {
70+
if !use_df_logical {
7171
// clean up optimizer rules so that we can plug in our own optimizer
7272
state = state.with_optimizer_rules(vec![]);
7373
}
@@ -80,19 +80,19 @@ impl DatafusionDBMS {
8080
Ok(ctx)
8181
}
8282

83-
pub async fn execute(&self, sql: &str, with_logical: bool) -> Result<Vec<Vec<String>>> {
83+
pub async fn execute(&self, sql: &str, use_df_logical: bool) -> Result<Vec<Vec<String>>> {
8484
let sql = unescape_input(sql)?;
8585
let dialect = Box::new(GenericDialect);
8686
let statements = DFParser::parse_sql_with_dialect(&sql, dialect.as_ref())?;
8787
let mut result = Vec::new();
8888
for statement in statements {
89-
let df = if with_logical {
89+
let df = if use_df_logical {
9090
let plan = self
91-
.with_logical_ctx
91+
.use_df_logical_ctx
9292
.state()
9393
.statement_to_plan(statement)
9494
.await?;
95-
self.with_logical_ctx.execute_logical_plan(plan).await?
95+
self.use_df_logical_ctx.execute_logical_plan(plan).await?
9696
} else {
9797
let plan = self.ctx.state().statement_to_plan(statement).await?;
9898
self.ctx.execute_logical_plan(plan).await?
@@ -125,8 +125,8 @@ impl DatafusionDBMS {
125125
/// Executes the `execute` task.
126126
async fn task_execute(&mut self, r: &mut String, sql: &str, flags: &[String]) -> Result<()> {
127127
use std::fmt::Write;
128-
let with_logical = flags.contains(&"with_logical".to_string());
129-
let result = self.execute(sql, with_logical).await?;
128+
let use_df_logical = flags.contains(&"use_df_logical".to_string());
129+
let result = self.execute(sql, use_df_logical).await?;
130130
writeln!(r, "{}", result.into_iter().map(|x| x.join(" ")).join("\n"))?;
131131
writeln!(r)?;
132132
Ok(())
@@ -142,14 +142,14 @@ impl DatafusionDBMS {
142142
) -> Result<()> {
143143
use std::fmt::Write;
144144

145-
let with_logical = flags.contains(&"with_logical".to_string());
145+
let use_df_logical = flags.contains(&"use_df_logical".to_string());
146146
let verbose = flags.contains(&"verbose".to_string());
147147
let explain_sql = if verbose {
148148
format!("explain verbose {}", &sql)
149149
} else {
150150
format!("explain {}", &sql)
151151
};
152-
let result = self.execute(&explain_sql, with_logical).await?;
152+
let result = self.execute(&explain_sql, use_df_logical).await?;
153153
let subtask_start_pos = task.find(':').unwrap() + 1;
154154
for subtask in task[subtask_start_pos..].split(',') {
155155
let subtask = subtask.trim();
@@ -260,7 +260,7 @@ lazy_static! {
260260
}
261261

262262
/// Extract the flags from a task. The flags are specified in square brackets.
263-
/// For example, the flags for the task `explain[with_logical, verbose]` are `["with_logical", "verbose"]`.
263+
/// For example, the flags for the task `explain[use_df_logical, verbose]` are `["use_df_logical", "verbose"]`.
264264
fn extract_flags(task: &str) -> Result<Vec<String>> {
265265
if let Some(captures) = FLAGS_REGEX.captures(task) {
266266
Ok(captures

optd-sqlplannertest/tests/constant_predicate.yml renamed to optd-sqlplannertest/tests/basic/constant_predicate.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,19 @@
22
create table t1(t1v1 int, t1v2 int);
33
insert into t1 values (0, 0), (1, 1), (2, 2);
44
tasks:
5-
- execute[with_logical]
5+
- execute[use_df_logical]
66
- sql: |
77
select * from t1 where t1v1 = 0;
88
desc: Test whether the optimizer handles integer equality predicates correctly.
99
tasks:
10-
- execute[with_logical]
10+
- execute[use_df_logical]
1111
- sql: |
1212
select * from t1 where t1v1 = 0 and t1v2 = 1;
1313
desc: Test whether the optimizer handles multiple integer equality predicates correctly.
1414
tasks:
15-
- execute[with_logical]
15+
- execute[use_df_logical]
1616
- sql: |
1717
select * from t1 where t1v1 = 0 and t1v2 != 1;
1818
desc: Test whether the optimizer handles multiple integer inequality predicates correctly.
1919
tasks:
20-
- execute[with_logical]
20+
- execute[use_df_logical]

optd-sqlplannertest/tests/join_enumerate.yml renamed to optd-sqlplannertest/tests/basic/join_enumerate.yml

+5-5
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66
insert into t2 values (0, 200), (1, 201), (2, 202);
77
insert into t3 values (0, 300), (1, 301), (2, 302);
88
tasks:
9-
- execute[with_logical]
9+
- execute[use_df_logical]
1010
- sql: |
1111
select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2;
1212
desc: Test whether the optimizer enumerates all join orders.
1313
tasks:
14-
- explain[with_logical]:logical_join_orders
15-
- execute[with_logical]
14+
- explain[use_df_logical]:logical_join_orders
15+
- execute[use_df_logical]
1616
- sql: |
1717
select * from t1, t2, t3 where t1v1 = t2v1 and t1v2 = t3v2;
1818
desc: Test whether the optimizer enumerates all join orders.
1919
tasks:
20-
- explain[with_logical]:logical_join_orders
21-
- execute[with_logical]
20+
- explain[use_df_logical]:logical_join_orders
21+
- execute[use_df_logical]

optd-sqlplannertest/tests/subquery_unnesting.yml renamed to optd-sqlplannertest/tests/subqueries/subquery_unnesting.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
create table t2(t2v1 int, t2v3 int);
44
create table t3(t3v2 int, t3v4 int);
55
tasks:
6-
- execute[with_logical]
6+
- execute[use_df_logical]
77
# - sql: |
88
# select * from t1 where t1v1 in (select t2v1 from t2);
99
# desc: Test whether the optimizer can unnest "in" subqueries. -- failing with unsupported expression
@@ -13,4 +13,4 @@
1313
select * from t1 where (select sum(t2v3) from t2 where t2v1 = t1v1) > 100;
1414
desc: Test whether the optimizer can unnest correlated subqueries.
1515
tasks:
16-
- explain:logical_optd,physical_optd
16+
- explain:logical_optd,optimized_logical_optd,physical_optd

0 commit comments

Comments
 (0)