Skip to content

Commit d68b615

Browse files
committed
Fix the test: a node can't be part of itself so we can subtract 1 from the estimated worst case scenario
1 parent f6a9e73 commit d68b615

File tree

2 files changed

+27
-18
lines changed

2 files changed

+27
-18
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
- `execute_query_on_graph` now skips nodes that are not part of the output
1111
(optional negated nodes) and makes sure the resulting iterator only produces
1212
unique results.
13+
- Queries with `@` could have extremly slow execution plans when the query
14+
planner introduces an inverted `@` operator and miscalculated the cost
15+
compared to the non-inverted version.
1316

1417
## [4.0.0] - 2025-08-20
1518

graphannis/src/annis/db/aql/operators/edge_op.rs

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ struct BaseEdgeOpSpec {
2424
pub edge_anno: Option<EdgeAnnoSearchSpec>,
2525
pub is_reflexive: bool,
2626
pub op_str: Option<String>,
27-
pub check_cost_for_inverse_operator: bool,
27+
pub inverse_operator_needs_cost_check: bool,
2828
}
2929

3030
struct BaseEdgeOp {
@@ -61,24 +61,30 @@ fn calculate_max_node_estimate(
6161
gs: &Vec<Arc<dyn GraphStorage>>,
6262
inverse: bool,
6363
) -> Result<usize> {
64-
let all_part_of_components = spec
64+
let all_components_are_partof = spec
6565
.components
6666
.iter()
6767
.all(|c| c.get_type() == AnnotationComponentType::PartOf);
68-
let max_nodes_estimate = if all_part_of_components && gs.len() == 1 {
69-
// PartOf components have a very skewed distribution of root nodes
70-
// vs. the actual possible targets, thus do not use all nodes as
71-
// population but only the non-roots.
68+
let max_nodes_estimate = if all_components_are_partof && gs.len() == 1 {
69+
// PartOf components have a very skewed distribution of root nodes vs.
70+
// the actual possible targets, thus do not use all nodes as population
71+
// but only the non-roots. We can only use this formula for the actual
72+
// @* operator, but not the inverted one.
7273
if !inverse && let Some(stats) = gs[0].get_statistics() {
7374
stats.nodes - stats.root_nodes
7475
} else {
75-
// Fallback to guessing by using the node type
76-
db.get_node_annos().guess_max_count(
77-
Some(&NODE_TYPE_KEY.ns),
78-
&NODE_TYPE_KEY.name,
79-
"corpus",
80-
"datasource",
81-
)?
76+
// Fallback to guessing how many nodes have the node type "corpus"
77+
// or "datasource" and thus could be reachable as RHS in a worst case
78+
// scenario. Since a node can't be part of itself, subtract 1 for
79+
// the node on the LHS.
80+
db.get_node_annos()
81+
.guess_max_count(
82+
Some(&NODE_TYPE_KEY.ns),
83+
&NODE_TYPE_KEY.name,
84+
"corpus",
85+
"datasource",
86+
)?
87+
.saturating_sub(1)
8288
}
8389
} else {
8490
db.get_node_annos().guess_max_count(
@@ -303,7 +309,7 @@ impl BinaryOperatorBase for BaseEdgeOp {
303309
// don't provide an inverse operator, because the plans would not
304310
// account for the different costs
305311
for g in &self.gs {
306-
if self.spec.check_cost_for_inverse_operator && !g.inverse_has_same_cost() {
312+
if self.spec.inverse_operator_needs_cost_check && !g.inverse_has_same_cost() {
307313
return Ok(None);
308314
}
309315
if let Some(stat) = g.get_statistics() {
@@ -640,7 +646,7 @@ impl BinaryOperatorSpec for DominanceSpec {
640646
dist: self.dist.clone(),
641647
edge_anno: self.edge_anno.clone(),
642648
is_reflexive: true,
643-
check_cost_for_inverse_operator: true,
649+
inverse_operator_needs_cost_check: true,
644650
};
645651
base.create_operator(db, cost_estimate)
646652
}
@@ -692,7 +698,7 @@ impl BinaryOperatorSpec for PointingSpec {
692698
edge_anno: self.edge_anno.clone(),
693699
is_reflexive: true,
694700
op_str: Some(op_str),
695-
check_cost_for_inverse_operator: true,
701+
inverse_operator_needs_cost_check: true,
696702
};
697703
base.create_operator(db, cost_estimate)
698704
}
@@ -737,7 +743,7 @@ impl BinaryOperatorSpec for PartOfSubCorpusSpec {
737743
ANNIS_NS.into(),
738744
"".into(),
739745
)];
740-
let check_cost_for_inverse_operator = if let Some((_, rhs)) = cost_estimate {
746+
let inverse_operator_needs_cost_check = if let Some((_, rhs)) = cost_estimate {
741747
// Only ignore different cost and risk a nested loop join if the RHS
742748
// has an estimated output size of 1 and thus a nested loop is not
743749
// as costly.
@@ -751,7 +757,7 @@ impl BinaryOperatorSpec for PartOfSubCorpusSpec {
751757
dist: self.dist.clone(),
752758
edge_anno: None,
753759
is_reflexive: false,
754-
check_cost_for_inverse_operator,
760+
inverse_operator_needs_cost_check,
755761
};
756762

757763
base.create_operator(db, cost_estimate)

0 commit comments

Comments
 (0)