Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 7012f8c

Browse files
authored
test: add tpc-h q2 (#80)
This pull request fixes the context for testing and uses a single catalog for both DF logical + optd logical optimizer. * With DF logical optimizer enabled, it would take a really long time for optd to search the plan space and it always hit the budget. Need to properly implement pruning and later remove the partial explore budget from optd. * The plan may be unstable across different platform as access to HashMap is not deterministic. Signed-off-by: Alex Chi <[email protected]>
1 parent 755db92 commit 7012f8c

File tree

6 files changed

+331
-15
lines changed

6 files changed

+331
-15
lines changed

optd-adaptive-demo/src/bin/optd-adaptive-three-join.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ async fn main() -> Result<()> {
2727
let runtime_env = RuntimeEnv::new(rn_config.clone())?;
2828
let mut state =
2929
SessionState::new_with_config_rt(session_config.clone(), Arc::new(runtime_env));
30-
let mut optimizer: DatafusionOptimizer = DatafusionOptimizer::new_physical(Arc::new(
31-
DatafusionCatalog::new(state.catalog_list()),
32-
));
30+
let mut optimizer: DatafusionOptimizer = DatafusionOptimizer::new_physical_adaptive(
31+
Arc::new(DatafusionCatalog::new(state.catalog_list())),
32+
);
3333
optimizer.optd_optimizer_mut().prop.partial_explore_iter = None;
3434
optimizer.optd_optimizer_mut().prop.partial_explore_space = None;
3535
state = state.with_query_planner(Arc::new(OptdQueryPlanner::new(optimizer)));

optd-adaptive-demo/src/bin/optd-adaptive-tpch-q8.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ async fn main() -> Result<()> {
2828
let mut ctx = {
2929
let mut state =
3030
SessionState::new_with_config_rt(session_config.clone(), Arc::new(runtime_env));
31-
let optimizer = DatafusionOptimizer::new_physical(Arc::new(DatafusionCatalog::new(
32-
state.catalog_list(),
33-
)));
31+
let optimizer = DatafusionOptimizer::new_physical_adaptive(Arc::new(
32+
DatafusionCatalog::new(state.catalog_list()),
33+
));
3434
// clean up optimizer rules so that we can plug in our own optimizer
3535
state = state.with_optimizer_rules(vec![]);
3636
state = state.with_physical_optimizer_rules(vec![]);

optd-datafusion-repr/src/lib.rs

+32-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@ use std::sync::Arc;
44

55
use anyhow::Result;
66
use cost::{AdaptiveCostModel, RuntimeAdaptionStorage};
7-
use optd_core::cascades::{CascadesOptimizer, GroupId, OptimizerProperties};
7+
use optd_core::{
8+
cascades::{CascadesOptimizer, GroupId, OptimizerProperties},
9+
rules::Rule,
10+
};
811
use plan_nodes::{OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode};
912
use properties::{
1013
column_ref::ColumnRefPropertyBuilder,
@@ -44,8 +47,7 @@ impl DatafusionOptimizer {
4447
&mut self.optimizer
4548
}
4649

47-
/// Create an optimizer with default settings: adaptive + partial explore.
48-
pub fn new_physical(catalog: Arc<dyn Catalog>) -> Self {
50+
pub fn default_rules() -> Vec<Arc<dyn Rule<OptRelNodeTyp, CascadesOptimizer<OptRelNodeTyp>>>> {
4951
let mut rules = PhysicalConversionRule::all_conversions();
5052
rules.push(Arc::new(HashJoinRule::new()));
5153
rules.push(Arc::new(JoinCommuteRule::new()));
@@ -56,7 +58,34 @@ impl DatafusionOptimizer {
5658
rules.push(Arc::new(EliminateLimitRule::new()));
5759
rules.push(Arc::new(EliminateDuplicatedSortExprRule::new()));
5860
rules.push(Arc::new(EliminateDuplicatedAggExprRule::new()));
61+
rules
62+
}
63+
64+
/// Create an optimizer for testing purpose: adaptive disabled + partial explore (otherwise it's too slow).
65+
pub fn new_physical(catalog: Arc<dyn Catalog>) -> Self {
66+
let rules = Self::default_rules();
67+
let cost_model = AdaptiveCostModel::new(50);
68+
Self {
69+
runtime_statistics: cost_model.get_runtime_map(),
70+
optimizer: CascadesOptimizer::new_with_prop(
71+
rules,
72+
Box::new(cost_model),
73+
vec![
74+
Box::new(SchemaPropertyBuilder::new(catalog.clone())),
75+
Box::new(ColumnRefPropertyBuilder::new(catalog)),
76+
],
77+
OptimizerProperties {
78+
partial_explore_iter: Some(1 << 20),
79+
partial_explore_space: Some(1 << 10),
80+
},
81+
),
82+
enable_adaptive: false,
83+
}
84+
}
5985

86+
/// Create an optimizer with default settings: adaptive + partial explore.
87+
pub fn new_physical_adaptive(catalog: Arc<dyn Catalog>) -> Self {
88+
let rules = Self::default_rules();
6089
let cost_model = AdaptiveCostModel::new(50);
6190
Self {
6291
runtime_statistics: cost_model.get_runtime_map(),

optd-sqlplannertest/src/lib.rs

+18-6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use datafusion::arrow::util::display::{ArrayFormatter, FormatOptions};
2+
use datafusion::catalog::CatalogList;
23
use datafusion::execution::context::{SessionConfig, SessionState};
34
use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
45
use datafusion::prelude::SessionContext;
@@ -26,16 +27,20 @@ pub struct DatafusionDb {
2627

2728
impl DatafusionDb {
2829
pub async fn new() -> Result<Self> {
29-
let ctx = DatafusionDb::new_session_ctx(false).await?;
30-
let with_logical_ctx = DatafusionDb::new_session_ctx(true).await?;
30+
let ctx = DatafusionDb::new_session_ctx(false, None).await?;
31+
let with_logical_ctx =
32+
DatafusionDb::new_session_ctx(true, Some(ctx.state().catalog_list().clone())).await?;
3133
Ok(Self {
3234
ctx,
3335
with_logical_ctx,
3436
})
3537
}
3638

3739
/// Creates a new session context. If the `with_logical` flag is set, datafusion's logical optimizer will be used.
38-
async fn new_session_ctx(with_logical: bool) -> Result<SessionContext> {
40+
async fn new_session_ctx(
41+
with_logical: bool,
42+
catalog: Option<Arc<dyn CatalogList>>,
43+
) -> Result<SessionContext> {
3944
let mut session_config = SessionConfig::from_env()?.with_information_schema(true);
4045
if !with_logical {
4146
session_config.options_mut().optimizer.max_passes = 0;
@@ -45,16 +50,23 @@ impl DatafusionDb {
4550
let runtime_env = RuntimeEnv::new(rn_config.clone())?;
4651

4752
let ctx = {
48-
let mut state =
49-
SessionState::new_with_config_rt(session_config.clone(), Arc::new(runtime_env));
53+
let mut state = if let Some(catalog) = catalog {
54+
SessionState::new_with_config_rt_and_catalog_list(
55+
session_config.clone(),
56+
Arc::new(runtime_env),
57+
catalog,
58+
)
59+
} else {
60+
SessionState::new_with_config_rt(session_config.clone(), Arc::new(runtime_env))
61+
};
5062
let optimizer = DatafusionOptimizer::new_physical(Arc::new(DatafusionCatalog::new(
5163
state.catalog_list(),
5264
)));
5365
if !with_logical {
5466
// clean up optimizer rules so that we can plug in our own optimizer
5567
state = state.with_optimizer_rules(vec![]);
56-
state = state.with_physical_optimizer_rules(vec![]);
5768
}
69+
state = state.with_physical_optimizer_rules(vec![]);
5870
// use optd-bridge query planner
5971
state = state.with_query_planner(Arc::new(OptdQueryPlanner::new(optimizer)));
6072
SessionContext::new_with_state(state)

optd-sqlplannertest/tests/tpch.planner.sql

+227
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,233 @@ CREATE TABLE LINEITEM (
8888
8989
*/
9090

91+
-- TPC-H Q2
92+
select
93+
s_acctbal,
94+
s_name,
95+
n_name,
96+
p_partkey,
97+
p_mfgr,
98+
s_address,
99+
s_phone,
100+
s_comment
101+
from
102+
part,
103+
supplier,
104+
partsupp,
105+
nation,
106+
region
107+
where
108+
p_partkey = ps_partkey
109+
and s_suppkey = ps_suppkey
110+
and p_size = 4
111+
and p_type like '%TIN'
112+
and s_nationkey = n_nationkey
113+
and n_regionkey = r_regionkey
114+
and r_name = 'AFRICA'
115+
and ps_supplycost = (
116+
select
117+
min(ps_supplycost)
118+
from
119+
partsupp,
120+
supplier,
121+
nation,
122+
region
123+
where
124+
p_partkey = ps_partkey
125+
and s_suppkey = ps_suppkey
126+
and s_nationkey = n_nationkey
127+
and n_regionkey = r_regionkey
128+
and r_name = 'AFRICA'
129+
)
130+
order by
131+
s_acctbal desc,
132+
n_name,
133+
s_name,
134+
p_partkey
135+
limit 100;
136+
137+
/*
138+
LogicalLimit { skip: 0, fetch: 100 }
139+
└── LogicalSort
140+
├── exprs:
141+
│ ┌── SortOrder { order: Desc }
142+
│ │ └── #0
143+
│ ├── SortOrder { order: Asc }
144+
│ │ └── #2
145+
│ ├── SortOrder { order: Asc }
146+
│ │ └── #1
147+
│ └── SortOrder { order: Asc }
148+
│ └── #3
149+
└── LogicalProjection { exprs: [ #5, #2, #8, #0, #1, #3, #4, #6 ] }
150+
└── LogicalJoin
151+
├── join_type: Inner
152+
├── cond:And
153+
│ ├── Eq
154+
│ │ ├── #0
155+
│ │ └── #10
156+
│ └── Eq
157+
│ ├── #7
158+
│ └── #9
159+
├── LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8 ] }
160+
│ └── LogicalJoin
161+
│ ├── join_type: Inner
162+
│ ├── cond:Eq
163+
│ │ ├── #9
164+
│ │ └── #10
165+
│ ├── LogicalProjection { exprs: [ #0, #1, #2, #3, #5, #6, #7, #8, #10, #11 ] }
166+
│ │ └── LogicalJoin
167+
│ │ ├── join_type: Inner
168+
│ │ ├── cond:Eq
169+
│ │ │ ├── #4
170+
│ │ │ └── #9
171+
│ │ ├── LogicalProjection { exprs: [ #0, #1, #5, #6, #7, #8, #9, #10, #3 ] }
172+
│ │ │ └── LogicalJoin
173+
│ │ │ ├── join_type: Inner
174+
│ │ │ ├── cond:Eq
175+
│ │ │ │ ├── #2
176+
│ │ │ │ └── #4
177+
│ │ │ ├── LogicalProjection { exprs: [ #0, #1, #3, #4 ] }
178+
│ │ │ │ └── LogicalJoin
179+
│ │ │ │ ├── join_type: Inner
180+
│ │ │ │ ├── cond:Eq
181+
│ │ │ │ │ ├── #0
182+
│ │ │ │ │ └── #2
183+
│ │ │ │ ├── LogicalProjection { exprs: [ #0, #1 ] }
184+
│ │ │ │ │ └── LogicalFilter
185+
│ │ │ │ │ ├── cond:And
186+
│ │ │ │ │ │ ├── Eq
187+
│ │ │ │ │ │ │ ├── #3
188+
│ │ │ │ │ │ │ └── 4
189+
│ │ │ │ │ │ └── Like { expr: #2, pattern: "%TIN" }
190+
│ │ │ │ │ └── LogicalProjection { exprs: [ #0, #2, #4, #5 ] }
191+
│ │ │ │ │ └── LogicalScan { table: part }
192+
│ │ │ │ └── LogicalProjection { exprs: [ #0, #1, #3 ] }
193+
│ │ │ │ └── LogicalScan { table: partsupp }
194+
│ │ │ └── LogicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6 ] }
195+
│ │ │ └── LogicalScan { table: supplier }
196+
│ │ └── LogicalProjection { exprs: [ #0, #1, #2 ] }
197+
│ │ └── LogicalScan { table: nation }
198+
│ └── LogicalProjection { exprs: [ #0 ] }
199+
│ └── LogicalFilter
200+
│ ├── cond:Eq
201+
│ │ ├── #1
202+
│ │ └── "AFRICA"
203+
│ └── LogicalProjection { exprs: [ #0, #1 ] }
204+
│ └── LogicalScan { table: region }
205+
└── LogicalProjection { exprs: [ #1, #0 ] }
206+
└── LogicalAgg
207+
├── exprs:Agg(Min)
208+
│ └── [ #1 ]
209+
├── groups: [ #0 ]
210+
└── LogicalProjection { exprs: [ #0, #1 ] }
211+
└── LogicalJoin
212+
├── join_type: Inner
213+
├── cond:Eq
214+
│ ├── #2
215+
│ └── #3
216+
├── LogicalProjection { exprs: [ #0, #1, #4 ] }
217+
│ └── LogicalJoin
218+
│ ├── join_type: Inner
219+
│ ├── cond:Eq
220+
│ │ ├── #2
221+
│ │ └── #3
222+
│ ├── LogicalProjection { exprs: [ #0, #2, #4 ] }
223+
│ │ └── LogicalJoin
224+
│ │ ├── join_type: Inner
225+
│ │ ├── cond:Eq
226+
│ │ │ ├── #1
227+
│ │ │ └── #3
228+
│ │ ├── LogicalProjection { exprs: [ #0, #1, #3 ] }
229+
│ │ │ └── LogicalScan { table: partsupp }
230+
│ │ └── LogicalProjection { exprs: [ #0, #3 ] }
231+
│ │ └── LogicalScan { table: supplier }
232+
│ └── LogicalProjection { exprs: [ #0, #2 ] }
233+
│ └── LogicalScan { table: nation }
234+
└── LogicalProjection { exprs: [ #0 ] }
235+
└── LogicalFilter
236+
├── cond:Eq
237+
│ ├── #1
238+
│ └── "AFRICA"
239+
└── LogicalProjection { exprs: [ #0, #1 ] }
240+
└── LogicalScan { table: region }
241+
PhysicalLimit { skip: 0, fetch: 100 }
242+
└── PhysicalSort
243+
├── exprs:
244+
│ ┌── SortOrder { order: Desc }
245+
│ │ └── #0
246+
│ ├── SortOrder { order: Asc }
247+
│ │ └── #2
248+
│ ├── SortOrder { order: Asc }
249+
│ │ └── #1
250+
│ └── SortOrder { order: Asc }
251+
│ └── #3
252+
└── PhysicalProjection { exprs: [ #5, #2, #8, #0, #1, #3, #4, #6 ] }
253+
└── PhysicalNestedLoopJoin
254+
├── join_type: Inner
255+
├── cond:And
256+
│ ├── Eq
257+
│ │ ├── #0
258+
│ │ └── #10
259+
│ └── Eq
260+
│ ├── #7
261+
│ └── #9
262+
├── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6, #7, #8 ] }
263+
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #9 ], right_keys: [ #0 ] }
264+
│ ├── PhysicalProjection { exprs: [ #0, #1, #2, #3, #5, #6, #7, #8, #10, #11 ] }
265+
│ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #4 ], right_keys: [ #0 ] }
266+
│ │ ├── PhysicalProjection { exprs: [ #0, #1, #5, #6, #7, #8, #9, #10, #3 ] }
267+
│ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] }
268+
│ │ │ ├── PhysicalProjection { exprs: [ #0, #1, #3, #4 ] }
269+
│ │ │ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
270+
│ │ │ │ ├── PhysicalProjection { exprs: [ #0, #1 ] }
271+
│ │ │ │ │ └── PhysicalFilter
272+
│ │ │ │ │ ├── cond:And
273+
│ │ │ │ │ │ ├── Eq
274+
│ │ │ │ │ │ │ ├── #3
275+
│ │ │ │ │ │ │ └── 4
276+
│ │ │ │ │ │ └── Like { expr: #2, pattern: "%TIN" }
277+
│ │ │ │ │ └── PhysicalProjection { exprs: [ #0, #2, #4, #5 ] }
278+
│ │ │ │ │ └── PhysicalScan { table: part }
279+
│ │ │ │ └── PhysicalProjection { exprs: [ #0, #1, #3 ] }
280+
│ │ │ │ └── PhysicalScan { table: partsupp }
281+
│ │ │ └── PhysicalProjection { exprs: [ #0, #1, #2, #3, #4, #5, #6 ] }
282+
│ │ │ └── PhysicalScan { table: supplier }
283+
│ │ └── PhysicalProjection { exprs: [ #0, #1, #2 ] }
284+
│ │ └── PhysicalScan { table: nation }
285+
│ └── PhysicalProjection { exprs: [ #0 ] }
286+
│ └── PhysicalFilter
287+
│ ├── cond:Eq
288+
│ │ ├── #1
289+
│ │ └── "AFRICA"
290+
│ └── PhysicalProjection { exprs: [ #0, #1 ] }
291+
│ └── PhysicalScan { table: region }
292+
└── PhysicalProjection { exprs: [ #1, #0 ] }
293+
└── PhysicalAgg
294+
├── aggrs:Agg(Min)
295+
│ └── [ #1 ]
296+
├── groups: [ #0 ]
297+
└── PhysicalProjection { exprs: [ #0, #1 ] }
298+
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] }
299+
├── PhysicalProjection { exprs: [ #0, #1, #4 ] }
300+
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #2 ], right_keys: [ #0 ] }
301+
│ ├── PhysicalProjection { exprs: [ #0, #2, #4 ] }
302+
│ │ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #1 ], right_keys: [ #0 ] }
303+
│ │ ├── PhysicalProjection { exprs: [ #0, #1, #3 ] }
304+
│ │ │ └── PhysicalScan { table: partsupp }
305+
│ │ └── PhysicalProjection { exprs: [ #0, #3 ] }
306+
│ │ └── PhysicalScan { table: supplier }
307+
│ └── PhysicalProjection { exprs: [ #0, #2 ] }
308+
│ └── PhysicalScan { table: nation }
309+
└── PhysicalProjection { exprs: [ #0 ] }
310+
└── PhysicalFilter
311+
├── cond:Eq
312+
│ ├── #1
313+
│ └── "AFRICA"
314+
└── PhysicalProjection { exprs: [ #0, #1 ] }
315+
└── PhysicalScan { table: region }
316+
*/
317+
91318
-- TPC-H Q5
92319
SELECT
93320
n_name AS nation,

0 commit comments

Comments
 (0)