@@ -10,9 +10,10 @@ use std::pin::Pin;
1010use std:: sync:: Arc ;
1111
1212use anyhow:: Result ;
13+ use itertools:: Itertools ;
1314use tracing:: trace;
1415
15- use super :: memo:: { ArcMemoPlanNode , GroupInfo , Memo } ;
16+ use super :: memo:: { ArcMemoPlanNode , GroupInfo , Memo , WinnerInfo } ;
1617use super :: NaiveMemo ;
1718use crate :: cascades:: memo:: Winner ;
1819use crate :: cascades:: tasks2:: { TaskContext , TaskDesc } ;
@@ -38,16 +39,92 @@ pub struct OptimizerContext {
3839
3940#[ derive( Default , Clone , Debug ) ]
4041pub struct OptimizerProperties {
42+ /// Panic the optimizer if the budget is reached, used in planner tests.
4143 pub panic_on_budget : bool ,
4244 /// If the number of rules applied exceeds this number, we stop applying logical rules.
4345 pub partial_explore_iter : Option < usize > ,
4446 /// Plan space can be expanded by this number of times before we stop applying logical rules.
4547 pub partial_explore_space : Option < usize > ,
4648 /// Disable pruning during optimization.
4749 pub disable_pruning : bool ,
50+ /// Enable tracing during optimization.
51+ pub enable_tracing : bool ,
4852}
4953
50- #[ derive( Debug , Default ) ]
54+ #[ derive( Clone ) ]
55+ pub enum OptimizerTrace {
56+ /// A winner decision is made
57+ DecideWinner {
58+ /// The stage and step number when a trace is recorded
59+ stage : usize ,
60+ step : usize ,
61+ /// The group ID when a trace is recorded
62+ group_id : GroupId ,
63+ /// The proposed winner
64+ proposed_winner_info : WinnerInfo ,
65+ /// The winner of the children
66+ children_winner : Vec < ExprId > ,
67+ } ,
68+ /// The group is created by applying a rule
69+ ApplyRule {
70+ /// The step number when a trace is recorded
71+ stage : usize ,
72+ step : usize ,
73+ /// The group ID when a trace is recorded
74+ group_id : GroupId ,
75+ /// The expression being applied
76+ applied_expr_id : ExprId ,
77+ /// The expression being produced
78+ produced_expr_id : ExprId ,
79+ /// The rule ID
80+ rule_id : usize ,
81+ } ,
82+ }
83+
84+ impl OptimizerTrace {
85+ pub fn stage_step ( & self ) -> ( usize , usize ) {
86+ match self {
87+ OptimizerTrace :: DecideWinner { stage, step, .. } => ( * stage, * step) ,
88+ OptimizerTrace :: ApplyRule { stage, step, .. } => ( * stage, * step) ,
89+ }
90+ }
91+ }
92+
93+ impl std:: fmt:: Display for OptimizerTrace {
94+ fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
95+ match self {
96+ OptimizerTrace :: DecideWinner {
97+ stage,
98+ step,
99+ group_id,
100+ proposed_winner_info,
101+ children_winner,
102+ } => {
103+ write ! (
104+ f,
105+ "step={}/{} decide_winner group_id={} proposed_winner_expr={} children_winner_exprs=[{}] total_weighted_cost={}" ,
106+ stage, step, group_id, proposed_winner_info. expr_id, children_winner. iter( ) . join( "," ) , proposed_winner_info. total_weighted_cost
107+ )
108+ }
109+ OptimizerTrace :: ApplyRule {
110+ stage,
111+ step,
112+ group_id,
113+ applied_expr_id,
114+ produced_expr_id,
115+ rule_id,
116+ } => {
117+ write ! (
118+ f,
119+ "step={}/{} apply_rule group_id={} applied_expr_id={} produced_expr_id={} rule_id={}" ,
120+ stage, step, group_id, applied_expr_id, produced_expr_id, rule_id
121+ )
122+ }
123+ }
124+ }
125+ }
126+
127+ #[ derive( Default ) ]
51128pub struct CascadesStats {
52129 pub rule_match_count : HashMap < usize , usize > ,
53130 pub rule_total_bindings : HashMap < usize , usize > ,
@@ -56,6 +133,7 @@ pub struct CascadesStats {
56133 pub optimize_expr_count : usize ,
57134 pub apply_rule_count : usize ,
58135 pub optimize_input_count : usize ,
136+ pub trace : HashMap < GroupId , Vec < OptimizerTrace > > ,
59137}
60138
61139pub struct CascadesOptimizer < T : NodeType , M : Memo < T > = NaiveMemo < T > > {
@@ -70,6 +148,7 @@ pub struct CascadesOptimizer<T: NodeType, M: Memo<T> = NaiveMemo<T>> {
70148 logical_property_builders : Arc < [ Box < dyn LogicalPropertyBuilderAny < T > > ] > ,
71149 pub ctx : OptimizerContext ,
72150 pub prop : OptimizerProperties ,
151+ stage : usize ,
73152}
74153
75154/// `RelNode` only contains the representation of the plan nodes. Sometimes, we need more context,
@@ -137,6 +216,7 @@ impl<T: NodeType> CascadesOptimizer<T, NaiveMemo<T>> {
137216 prop,
138217 stats : CascadesStats :: default ( ) ,
139218 disabled_rules : HashSet :: new ( ) ,
219+ stage : 0 ,
140220 }
141221 }
142222
@@ -163,14 +243,6 @@ impl<T: NodeType> CascadesOptimizer<T, NaiveMemo<T>> {
163243}
164244
165245impl < T : NodeType , M : Memo < T > > CascadesOptimizer < T , M > {
166- pub fn panic_on_explore_limit ( & mut self , enabled : bool ) {
167- self . prop . panic_on_budget = enabled;
168- }
169-
170- pub fn disable_pruning ( & mut self , enabled : bool ) {
171- self . prop . disable_pruning = enabled;
172- }
173-
174246 pub fn cost ( & self ) -> Arc < dyn CostModel < T , M > > {
175247 self . cost . clone ( )
176248 }
@@ -217,7 +289,7 @@ impl<T: NodeType, M: Memo<T>> CascadesOptimizer<T, M> {
217289 self . disabled_rules . contains ( & rule_id)
218290 }
219291
220- pub fn dump ( & self ) {
292+ pub fn dump ( & self , mut f : impl std :: fmt :: Write ) -> std :: fmt :: Result {
221293 for group_id in self . memo . get_all_group_ids ( ) {
222294 let winner_str = match & self . memo . get_group_info ( group_id) . winner {
223295 Winner :: Impossible => "winner=<impossible>" . to_string ( ) ,
@@ -234,28 +306,41 @@ impl<T: NodeType, M: Memo<T>> CascadesOptimizer<T, M> {
234306 )
235307 }
236308 } ;
237- println ! ( "group_id={} {}" , group_id, winner_str) ;
309+ writeln ! ( f , "group_id={} {}" , group_id, winner_str) ? ;
238310 let group = self . memo . get_group ( group_id) ;
239311 for ( id, property) in self . logical_property_builders . iter ( ) . enumerate ( ) {
240- println ! (
312+ writeln ! (
313+ f,
241314 " {}={}" ,
242315 property. property_name( ) ,
243316 group. properties[ id] . as_ref( )
244- )
317+ ) ? ;
245318 }
246319 let mut all_predicates = BTreeSet :: new ( ) ;
247320 for expr_id in self . memo . get_all_exprs_in_group ( group_id) {
248321 let memo_node = self . memo . get_expr_memoed ( expr_id) ;
249322 for pred in & memo_node. predicates {
250323 all_predicates. insert ( * pred) ;
251324 }
252- println ! ( " expr_id={} | {}" , expr_id, memo_node) ;
325+ writeln ! ( f , " expr_id={} | {}" , expr_id, memo_node) ? ;
253326 }
254327 for pred in all_predicates {
255- println ! ( " {}={}" , pred, self . memo. get_pred( pred) ) ;
328+ writeln ! ( f, " {}={}" , pred, self . memo. get_pred( pred) ) ?;
329+ }
330+ let mut traces = Vec :: new ( ) ;
331+ for ( that_group_id, trace) in & self . stats . trace {
332+ if self . memo . reduce_group ( * that_group_id) == group_id {
333+ traces. extend ( trace. iter ( ) ) ;
334+ }
335+ }
336+ traces. sort_by_key ( |x| x. stage_step ( ) ) ;
337+ for t in traces {
338+ writeln ! ( f, " {}" , t) ?;
256339 }
257340 }
341+ Ok ( ( ) )
258342 }
343+
259344 /// Optimize a `RelNode`.
260345 pub fn step_optimize_rel ( & mut self , root_rel : ArcPlanNode < T > ) -> Result < GroupId > {
261346 trace ! ( event = "step_optimize_rel" , rel = %root_rel) ;
@@ -287,15 +372,18 @@ impl<T: NodeType, M: Memo<T>> CascadesOptimizer<T, M> {
287372 }
288373 } ) ;
289374 if res. is_err ( ) && cfg ! ( debug_assertions) {
290- self . dump ( ) ;
375+ let mut buf = String :: new ( ) ;
376+ self . dump ( & mut buf) . unwrap ( ) ;
377+ eprintln ! ( "{}" , buf) ;
291378 }
292379 res
293380 }
294381
295382 pub fn fire_optimize_tasks ( & mut self , group_id : GroupId ) -> Result < ( ) > {
296383 use pollster:: FutureExt as _;
297384 trace ! ( event = "fire_optimize_tasks" , root_group_id = %group_id) ;
298- let mut task = TaskContext :: new ( self ) ;
385+ self . stage += 1 ;
386+ let mut task = TaskContext :: new ( self , self . stage ) ;
299387 // 32MB stack for the optimization process, TODO: reduce memory footprint
300388 stacker:: grow ( 32 * 1024 * 1024 , || {
301389 let fut: Pin < Box < dyn Future < Output = ( ) > > > = Box :: pin ( task. fire_optimize ( group_id) ) ;
0 commit comments