@@ -24,7 +24,7 @@ struct BaseEdgeOpSpec {
2424 pub edge_anno : Option < EdgeAnnoSearchSpec > ,
2525 pub is_reflexive : bool ,
2626 pub op_str : Option < String > ,
27- pub check_cost_for_inverse_operator : bool ,
27+ pub inverse_operator_needs_cost_check : bool ,
2828}
2929
3030struct BaseEdgeOp {
@@ -61,24 +61,30 @@ fn calculate_max_node_estimate(
6161 gs : & Vec < Arc < dyn GraphStorage > > ,
6262 inverse : bool ,
6363) -> Result < usize > {
64- let all_part_of_components = spec
64+ let all_components_are_partof = spec
6565 . components
6666 . iter ( )
6767 . all ( |c| c. get_type ( ) == AnnotationComponentType :: PartOf ) ;
68- let max_nodes_estimate = if all_part_of_components && gs. len ( ) == 1 {
69- // PartOf components have a very skewed distribution of root nodes
70- // vs. the actual possible targets, thus do not use all nodes as
71- // population but only the non-roots.
68+ let max_nodes_estimate = if all_components_are_partof && gs. len ( ) == 1 {
69+ // PartOf components have a very skewed distribution of root nodes vs.
70+ // the actual possible targets, thus do not use all nodes as population
71+ // but only the non-roots. We can only use this formula for the actual
72+ // @* operator, but not the inverted one.
7273 if !inverse && let Some ( stats) = gs[ 0 ] . get_statistics ( ) {
7374 stats. nodes - stats. root_nodes
7475 } else {
75- // Fallback to guessing by using the node type
76- db. get_node_annos ( ) . guess_max_count (
77- Some ( & NODE_TYPE_KEY . ns ) ,
78- & NODE_TYPE_KEY . name ,
79- "corpus" ,
80- "datasource" ,
81- ) ?
76+ // Fallback to guessing how many nodes have the node type "corpus"
77+ // or "datasource" and thus could be reachable as RHS in a worst case
78+ // scenario. Since a node can't be part of itself, subtract 1 for
79+ // the node on the LHS.
80+ db. get_node_annos ( )
81+ . guess_max_count (
82+ Some ( & NODE_TYPE_KEY . ns ) ,
83+ & NODE_TYPE_KEY . name ,
84+ "corpus" ,
85+ "datasource" ,
86+ ) ?
87+ . saturating_sub ( 1 )
8288 }
8389 } else {
8490 db. get_node_annos ( ) . guess_max_count (
@@ -303,7 +309,7 @@ impl BinaryOperatorBase for BaseEdgeOp {
303309 // don't provide an inverse operator, because the plans would not
304310 // account for the different costs
305311 for g in & self . gs {
306- if self . spec . check_cost_for_inverse_operator && !g. inverse_has_same_cost ( ) {
312+ if self . spec . inverse_operator_needs_cost_check && !g. inverse_has_same_cost ( ) {
307313 return Ok ( None ) ;
308314 }
309315 if let Some ( stat) = g. get_statistics ( ) {
@@ -640,7 +646,7 @@ impl BinaryOperatorSpec for DominanceSpec {
640646 dist : self . dist . clone ( ) ,
641647 edge_anno : self . edge_anno . clone ( ) ,
642648 is_reflexive : true ,
643- check_cost_for_inverse_operator : true ,
649+ inverse_operator_needs_cost_check : true ,
644650 } ;
645651 base. create_operator ( db, cost_estimate)
646652 }
@@ -692,7 +698,7 @@ impl BinaryOperatorSpec for PointingSpec {
692698 edge_anno : self . edge_anno . clone ( ) ,
693699 is_reflexive : true ,
694700 op_str : Some ( op_str) ,
695- check_cost_for_inverse_operator : true ,
701+ inverse_operator_needs_cost_check : true ,
696702 } ;
697703 base. create_operator ( db, cost_estimate)
698704 }
@@ -737,7 +743,7 @@ impl BinaryOperatorSpec for PartOfSubCorpusSpec {
737743 ANNIS_NS . into( ) ,
738744 "" . into( ) ,
739745 ) ] ;
740- let check_cost_for_inverse_operator = if let Some ( ( _, rhs) ) = cost_estimate {
746+ let inverse_operator_needs_cost_check = if let Some ( ( _, rhs) ) = cost_estimate {
741747 // Only ignore different cost and risk a nested loop join if the RHS
742748 // has an estimated output size of 1 and thus a nested loop is not
743749 // as costly.
@@ -751,7 +757,7 @@ impl BinaryOperatorSpec for PartOfSubCorpusSpec {
751757 dist : self . dist . clone ( ) ,
752758 edge_anno : None ,
753759 is_reflexive : false ,
754- check_cost_for_inverse_operator ,
760+ inverse_operator_needs_cost_check ,
755761 } ;
756762
757763 base. create_operator ( db, cost_estimate)
0 commit comments