Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit d73c26c

Browse files
authored
refactor(sqlplannertest): further split test cases, support specify rules enabled (#190)
* Split TPC-H test cases * Add the test case that causes cycles Signed-off-by: Alex Chi <[email protected]>
1 parent 9633315 commit d73c26c

22 files changed

+3583
-2889
lines changed

datafusion-optd-cli/src/main.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ struct Args {
137137
maxrows: MaxRows,
138138

139139
#[clap(long, help = "Turn on datafusion logical optimizer before optd")]
140-
enable_logical: bool,
140+
enable_df_logical: bool,
141141

142142
#[clap(long, help = "Turn on adaptive optimization")]
143143
enable_adaptive: bool,
@@ -164,7 +164,7 @@ pub async fn main() -> Result<()> {
164164

165165
let mut session_config = SessionConfig::from_env()?.with_information_schema(true);
166166

167-
if !args.enable_logical {
167+
if !args.enable_df_logical {
168168
session_config.options_mut().optimizer.max_passes = 0;
169169
}
170170

@@ -198,7 +198,7 @@ pub async fn main() -> Result<()> {
198198
let mut ctx = {
199199
let mut state =
200200
SessionState::new_with_config_rt(session_config.clone(), Arc::new(runtime_env));
201-
if !args.enable_logical {
201+
if !args.enable_df_logical {
202202
// clean up optimizer rules so that we can plug in our own optimizer
203203
state = state.with_optimizer_rules(vec![]);
204204
state = state.with_physical_optimizer_rules(vec![]);

datafusion-optd-cli/tests/cli_integration.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ fn cli_test_tpch() {
5656
let mut cmd = Command::cargo_bin("datafusion-optd-cli").unwrap();
5757
cmd.current_dir(".."); // all paths in `test.sql` assume we're in the base dir of the repo
5858
cmd.args([
59-
"--enable-logical",
59+
"--enable-df-logical",
6060
"--file",
6161
"datafusion-optd-cli/tpch-sf0_01/test.sql",
6262
]);

datafusion-optd-cli/tpch-sf0_01/simple_manual_test.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- This is just used if you want to run really simple manual tests on the CLI. Feel free to delete the whole thing and write your own manual tests
2-
-- Command: `cargo run --bin datafusion-optd-cli -- --enable-logical -f datafusion-optd-cli/tpch-sf0_01/simple_manual_test.sql`
2+
-- Command: `cargo run --bin datafusion-optd-cli -- --enable-df-logical -f datafusion-optd-cli/tpch-sf0_01/simple_manual_test.sql`
33
CREATE TABLE NATION (
44
N_NATIONKEY INT NOT NULL,
55
N_NAME CHAR(25) NOT NULL,
@@ -10,4 +10,4 @@ CREATE TABLE NATION (
1010
CREATE EXTERNAL TABLE nation_tbl STORED AS CSV DELIMITER '|' LOCATION 'datafusion-optd-cli/tpch-sf0_01/nation.tbl';
1111
insert into nation select column_1, column_2, column_3, column_4 from nation_tbl;
1212

13-
SELECT * FROM nation where nation.n_nationkey = 1 OR nation.n_nationkey = 2 OR nation.n_nationkey = 5;
13+
SELECT * FROM nation where nation.n_nationkey = 1 OR nation.n_nationkey = 2 OR nation.n_nationkey = 5;

optd-core/src/cascades/optimizer.rs

+11-2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ pub struct OptimizerContext {
3131

3232
#[derive(Default, Clone, Debug)]
3333
pub struct OptimizerProperties {
34+
pub partial_explore_temporarily_disabled: bool,
3435
/// If the number of rules applied exceeds this number, we stop applying logical rules.
3536
pub partial_explore_iter: Option<usize>,
3637
/// Plan space can be expanded by this number of times before we stop applying logical rules.
@@ -86,6 +87,14 @@ impl<T: RelNodeTyp> CascadesOptimizer<T> {
8687
Self::new_with_prop(rules, cost, property_builders, Default::default())
8788
}
8889

90+
pub fn disable_explore_limit(&mut self) {
91+
self.prop.partial_explore_temporarily_disabled = true;
92+
}
93+
94+
pub fn enable_explore_limit(&mut self) {
95+
self.prop.partial_explore_temporarily_disabled = false;
96+
}
97+
8998
pub fn new_with_prop(
9099
rules: Vec<Arc<RuleWrapper<T, Self>>>,
91100
cost: Box<dyn CostModel<T>>,
@@ -113,7 +122,7 @@ impl<T: RelNodeTyp> CascadesOptimizer<T> {
113122
self.cost.clone()
114123
}
115124

116-
pub(super) fn rules(&self) -> Arc<[Arc<RuleWrapper<T, Self>>]> {
125+
pub fn rules(&self) -> Arc<[Arc<RuleWrapper<T, Self>>]> {
117126
self.rules.clone()
118127
}
119128

@@ -229,7 +238,7 @@ impl<T: RelNodeTyp> CascadesOptimizer<T> {
229238
let new_tasks = task.execute(self)?;
230239
self.tasks.extend(new_tasks);
231240
iter += 1;
232-
if !self.ctx.budget_used {
241+
if !self.ctx.budget_used && !self.prop.partial_explore_temporarily_disabled {
233242
let plan_space = self.memo.compute_plan_space();
234243
if let Some(partial_explore_space) = self.prop.partial_explore_space {
235244
if plan_space - plan_space_begin > partial_explore_space {

optd-datafusion-repr/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ impl DatafusionOptimizer {
162162
Box::new(ColumnRefPropertyBuilder::new(catalog.clone())),
163163
],
164164
OptimizerProperties {
165+
partial_explore_temporarily_disabled: false,
165166
partial_explore_iter: Some(1 << 20),
166167
partial_explore_space: Some(1 << 10),
167168
},

optd-sqlplannertest/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ The `explain` and `execute` task will be run with datafusion's logical optimizer
3636
| -------------- | --------------------------------------- |
3737
| use_df_logical | Enable Datafusion's logical optimizer |
3838
| verbose | Display estimated cost in physical plan |
39+
| logical_rules | Only enable these logical rules |
3940

4041
Currently we have the following options for the explain task:
4142

optd-sqlplannertest/src/lib.rs

+99-25
Original file line numberDiff line numberDiff line change
@@ -13,44 +13,49 @@ use optd_datafusion_bridge::{DatafusionCatalog, OptdQueryPlanner};
1313
use optd_datafusion_repr::cost::BaseTableStats;
1414
use optd_datafusion_repr::DatafusionOptimizer;
1515
use regex::Regex;
16+
use std::collections::HashSet;
1617
use std::sync::Arc;
1718

1819
#[global_allocator]
1920
static GLOBAL: MiMalloc = MiMalloc;
2021

21-
use anyhow::{Context, Result};
22+
use anyhow::{bail, Result};
2223
use async_trait::async_trait;
2324

2425
#[derive(Default)]
2526
pub struct DatafusionDBMS {
2627
ctx: SessionContext,
2728
/// Context enabling datafusion's logical optimizer.
2829
use_df_logical_ctx: SessionContext,
30+
/// Shared optd optimizer (for tweaking config)
31+
optd_optimizer: Option<Arc<OptdQueryPlanner>>,
2932
}
3033

3134
impl DatafusionDBMS {
3235
pub async fn new() -> Result<Self> {
33-
let ctx = DatafusionDBMS::new_session_ctx(false, None).await?;
34-
let use_df_logical_ctx =
36+
let (ctx, optd_optimizer) = DatafusionDBMS::new_session_ctx(false, None).await?;
37+
let (use_df_logical_ctx, _) =
3538
DatafusionDBMS::new_session_ctx(true, Some(ctx.state().catalog_list().clone())).await?;
3639
Ok(Self {
3740
ctx,
3841
use_df_logical_ctx,
42+
optd_optimizer: Some(optd_optimizer),
3943
})
4044
}
4145

4246
/// Creates a new session context. If the `use_df_logical` flag is set, datafusion's logical optimizer will be used.
4347
async fn new_session_ctx(
4448
use_df_logical: bool,
4549
catalog: Option<Arc<dyn CatalogList>>,
46-
) -> Result<SessionContext> {
50+
) -> Result<(SessionContext, Arc<OptdQueryPlanner>)> {
4751
let mut session_config = SessionConfig::from_env()?.with_information_schema(true);
4852
if !use_df_logical {
4953
session_config.options_mut().optimizer.max_passes = 0;
5054
}
5155

5256
let rn_config = RuntimeConfig::new();
5357
let runtime_env = RuntimeEnv::new(rn_config.clone())?;
58+
let optd_optimizer;
5459

5560
let ctx = {
5661
let mut state = if let Some(catalog) = catalog {
@@ -73,20 +78,63 @@ impl DatafusionDBMS {
7378
}
7479
state = state.with_physical_optimizer_rules(vec![]);
7580
// use optd-bridge query planner
76-
state = state.with_query_planner(Arc::new(OptdQueryPlanner::new(optimizer)));
81+
optd_optimizer = Arc::new(OptdQueryPlanner::new(optimizer));
82+
state = state.with_query_planner(optd_optimizer.clone());
7783
SessionContext::new_with_state(state)
7884
};
7985
ctx.refresh_catalogs().await?;
80-
Ok(ctx)
86+
Ok((ctx, optd_optimizer))
8187
}
8288

83-
pub async fn execute(&self, sql: &str, use_df_logical: bool) -> Result<Vec<Vec<String>>> {
89+
pub(crate) async fn execute(&self, sql: &str, flags: &TestFlags) -> Result<Vec<Vec<String>>> {
90+
{
91+
let mut guard = self
92+
.optd_optimizer
93+
.as_ref()
94+
.unwrap()
95+
.optimizer
96+
.lock()
97+
.unwrap();
98+
let optimizer = guard.as_mut().unwrap().optd_optimizer_mut();
99+
if flags.disable_explore_limit {
100+
optimizer.disable_explore_limit();
101+
} else {
102+
optimizer.enable_explore_limit();
103+
}
104+
let rules = optimizer.rules();
105+
if flags.enable_logical_rules.is_empty() {
106+
for r in 0..rules.len() {
107+
optimizer.enable_rule(r);
108+
}
109+
} else {
110+
for (rule_id, rule) in rules.as_ref().iter().enumerate() {
111+
if rule.rule.is_impl_rule() {
112+
optimizer.enable_rule(rule_id);
113+
} else {
114+
optimizer.disable_rule(rule_id);
115+
}
116+
}
117+
let mut rules_to_enable = flags
118+
.enable_logical_rules
119+
.iter()
120+
.map(|x| x.as_str())
121+
.collect::<HashSet<_>>();
122+
for (rule_id, rule) in rules.as_ref().iter().enumerate() {
123+
if rules_to_enable.remove(rule.rule.name()) {
124+
optimizer.enable_rule(rule_id);
125+
}
126+
}
127+
if !rules_to_enable.is_empty() {
128+
bail!("Unknown logical rule: {:?}", rules_to_enable);
129+
}
130+
}
131+
}
84132
let sql = unescape_input(sql)?;
85133
let dialect = Box::new(GenericDialect);
86134
let statements = DFParser::parse_sql_with_dialect(&sql, dialect.as_ref())?;
87135
let mut result = Vec::new();
88136
for statement in statements {
89-
let df = if use_df_logical {
137+
let df = if flags.enable_df_logical {
90138
let plan = self
91139
.use_df_logical_ctx
92140
.state()
@@ -95,6 +143,7 @@ impl DatafusionDBMS {
95143
self.use_df_logical_ctx.execute_logical_plan(plan).await?
96144
} else {
97145
let plan = self.ctx.state().statement_to_plan(statement).await?;
146+
98147
self.ctx.execute_logical_plan(plan).await?
99148
};
100149

@@ -123,10 +172,12 @@ impl DatafusionDBMS {
123172
}
124173

125174
/// Executes the `execute` task.
126-
async fn task_execute(&mut self, r: &mut String, sql: &str, flags: &[String]) -> Result<()> {
175+
async fn task_execute(&mut self, r: &mut String, sql: &str, flags: &TestFlags) -> Result<()> {
127176
use std::fmt::Write;
128-
let use_df_logical = flags.contains(&"use_df_logical".to_string());
129-
let result = self.execute(sql, use_df_logical).await?;
177+
if flags.verbose {
178+
bail!("Verbose flag is not supported for execute task");
179+
}
180+
let result = self.execute(sql, flags).await?;
130181
writeln!(r, "{}", result.into_iter().map(|x| x.join(" ")).join("\n"))?;
131182
writeln!(r)?;
132183
Ok(())
@@ -138,19 +189,18 @@ impl DatafusionDBMS {
138189
r: &mut String,
139190
sql: &str,
140191
task: &str,
141-
flags: &[String],
192+
flags: &TestFlags,
142193
) -> Result<()> {
143194
use std::fmt::Write;
144195

145-
let use_df_logical = flags.contains(&"use_df_logical".to_string());
146-
let verbose = flags.contains(&"verbose".to_string());
196+
let verbose = flags.verbose;
147197
let explain_sql = if verbose {
148198
format!("explain verbose {}", &sql)
149199
} else {
150200
format!("explain {}", &sql)
151201
};
152-
let result = self.execute(&explain_sql, use_df_logical).await?;
153-
let subtask_start_pos = task.find(':').unwrap() + 1;
202+
let result = self.execute(&explain_sql, flags).await?;
203+
let subtask_start_pos = task.rfind(':').unwrap() + 1;
154204
for subtask in task[subtask_start_pos..].split(',') {
155205
let subtask = subtask.trim();
156206
if subtask == "logical_datafusion" {
@@ -163,7 +213,7 @@ impl DatafusionDBMS {
163213
.map(|x| &x[1])
164214
.unwrap()
165215
)?;
166-
} else if subtask == "logical_optd_heuristic" {
216+
} else if subtask == "logical_optd_heuristic" || subtask == "optimized_logical_optd" {
167217
writeln!(
168218
r,
169219
"{}",
@@ -225,6 +275,8 @@ impl DatafusionDBMS {
225275
.map(|x| &x[1])
226276
.unwrap()
227277
)?;
278+
} else {
279+
bail!("Unknown subtask: {}", subtask);
228280
}
229281
}
230282

@@ -235,10 +287,8 @@ impl DatafusionDBMS {
235287
#[async_trait]
236288
impl sqlplannertest::PlannerTestRunner for DatafusionDBMS {
237289
async fn run(&mut self, test_case: &sqlplannertest::ParsedTestCase) -> Result<String> {
238-
for before in &test_case.before_sql {
239-
self.execute(before, true)
240-
.await
241-
.context("before execution error")?;
290+
if !test_case.before_sql.is_empty() {
291+
panic!("before is not supported in optd-sqlplannertest, always specify the task type to run");
242292
}
243293

244294
let mut result = String::new();
@@ -259,18 +309,42 @@ lazy_static! {
259309
static ref FLAGS_REGEX: Regex = Regex::new(r"\[(.*)\]").unwrap();
260310
}
261311

312+
#[derive(Default, Debug)]
313+
struct TestFlags {
314+
verbose: bool,
315+
enable_df_logical: bool,
316+
enable_logical_rules: Vec<String>,
317+
disable_explore_limit: bool,
318+
}
319+
262320
/// Extract the flags from a task. The flags are specified in square brackets.
263321
/// For example, the flags for the task `explain[use_df_logical, verbose]` are `["use_df_logical", "verbose"]`.
264-
fn extract_flags(task: &str) -> Result<Vec<String>> {
322+
fn extract_flags(task: &str) -> Result<TestFlags> {
265323
if let Some(captures) = FLAGS_REGEX.captures(task) {
266-
Ok(captures
324+
let flags = captures
267325
.get(1)
268326
.unwrap()
269327
.as_str()
270328
.split(',')
271329
.map(|x| x.trim().to_string())
272-
.collect())
330+
.collect_vec();
331+
let mut options = TestFlags::default();
332+
for flag in flags {
333+
if flag == "verbose" {
334+
options.verbose = true;
335+
} else if flag == "use_df_logical" {
336+
options.enable_df_logical = true;
337+
} else if flag.starts_with("logical_rules") {
338+
options.enable_logical_rules =
339+
flag.split('+').skip(1).map(|x| x.to_string()).collect();
340+
} else if flag == "disable_explore_limit" {
341+
options.disable_explore_limit = true;
342+
} else {
343+
bail!("Unknown flag: {}", flag);
344+
}
345+
}
346+
Ok(options)
273347
} else {
274-
Ok(vec![])
348+
Ok(TestFlags::default())
275349
}
276350
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
-- (no id or description)
2+
create table t1(t1v1 int, t1v2 int);
3+
create table t2(t2v1 int, t2v3 int);
4+
insert into t1 values (0, 0), (1, 1), (2, 2);
5+
insert into t2 values (0, 200), (1, 201), (2, 202);
6+
7+
/*
8+
3
9+
3
10+
*/
11+
12+
-- Test whether we can transpose filter and projection
13+
SELECT t1.t1v1, t1.t1v2, t2.t2v3
14+
FROM t1, t2
15+
WHERE t1.t1v1 = t2.t2v1;
16+
17+
/*
18+
LogicalProjection { exprs: [ #0, #1, #3 ] }
19+
└── LogicalFilter
20+
├── cond:Eq
21+
│ ├── #0
22+
│ └── #2
23+
└── LogicalJoin { join_type: Cross, cond: true }
24+
├── LogicalScan { table: t1 }
25+
└── LogicalScan { table: t2 }
26+
PhysicalProjection { exprs: [ #0, #1, #3 ] }
27+
└── PhysicalFilter
28+
├── cond:Eq
29+
│ ├── #0
30+
│ └── #2
31+
└── PhysicalNestedLoopJoin { join_type: Cross, cond: true }
32+
├── PhysicalScan { table: t1 }
33+
└── PhysicalScan { table: t2 }
34+
*/
35+

0 commit comments

Comments
 (0)