Skip to content

Commit

Permalink
Merge pull request #306 from korpling/feature/transitive-coverage
Browse files Browse the repository at this point in the history
Add support for coverage edges between spans and segmentation nodes
  • Loading branch information
thomaskrause authored Aug 20, 2024
2 parents 781dd89 + bc93aad commit 2ff9a4f
Show file tree
Hide file tree
Showing 10 changed files with 552 additions and 64 deletions.
7 changes: 5 additions & 2 deletions .github/workflows/release_capi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ on:
release:
types: [published]
workflow_run:
workflows:
workflows:
- Release
types:
types:
- completed
pull_request:
types: [labeled]
Expand All @@ -20,6 +20,7 @@ jobs:
uses: pozetroninc/[email protected]
with:
repository: ${{ github.repository }}
token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v2
- uses: actions-rs/[email protected]
with:
Expand Down Expand Up @@ -47,6 +48,7 @@ jobs:
uses: pozetroninc/[email protected]
with:
repository: ${{ github.repository }}
token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v2
- uses: actions-rs/[email protected]
with:
Expand Down Expand Up @@ -74,6 +76,7 @@ jobs:
uses: pozetroninc/[email protected]
with:
repository: ${{ github.repository }}
token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v2
- uses: actions-rs/[email protected]
with:
Expand Down
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Added support for coverage edges between span nodes an segmentation nodes when
calculating the AQL model index.

### Fixed

- Do not use recursion to calculate the indirect coverage edges in the model
index, since this could fail for deeply nested structures.

## [3.3.3] - 2024-07-12

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion cli/src/bin/annis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ impl AnnisRunner {
let readline = rl.readline(&prompt);
match readline {
Ok(line) => {
rl.add_history_entry(&line.clone());
rl.add_history_entry(line.clone());
if !self.exec(&line) {
break;
}
Expand Down
109 changes: 58 additions & 51 deletions graphannis/src/annis/db/aql/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,61 +111,50 @@ pub struct AQLGlobalStatistics {
fn calculate_inherited_coverage_edges(
graph: &mut AnnotationGraph,
n: NodeID,
all_cov_components: &[AnnotationComponent],
all_dom_gs: &[Arc<dyn GraphStorage>],
other_cov_gs: &[Arc<dyn GraphStorage>],
all_text_coverage_components: &[AnnotationComponent],
inherited_cov_component: &AnnotationComponent,
) -> std::result::Result<FxHashSet<NodeID>, ComponentTypeError> {
let mut directly_covered_token = FxHashSet::default();

for c in all_cov_components.iter() {
if let Some(gs) = graph.get_graphstorage_as_ref(c) {
let out: Result<Vec<u64>, graphannis_core::errors::GraphAnnisCoreError> =
gs.get_outgoing_edges(n).collect();
directly_covered_token.extend(out?);
}
}

if directly_covered_token.is_empty() {
let has_token_anno = graph
.get_node_annos()
.get_value_for_item(&n, &TOKEN_KEY)?
.is_some();
if has_token_anno {
// Even if technically a token does not cover itself, if we need to abort the recursion
// with the basic case
directly_covered_token.insert(n);
// Iterate over all all nodes that are somehow covered (by coverage or
// dominance edges) starting from the given node.
let all_text_cov_components_gs: Vec<_> = all_text_coverage_components
.iter()
.filter_map(|c| graph.get_graphstorage_as_ref(c))
.map(|gs| gs.as_edgecontainer())
.collect();

let all_text_cov_components_combined = UnionEdgeContainer::new(all_text_cov_components_gs);

let mut covered_token = FxHashSet::default();
{
let tok_helper = TokenHelper::new(graph)?;
for step in CycleSafeDFS::new(&all_text_cov_components_combined, n, 1, usize::MAX) {
let step = step?;
if tok_helper.is_token(step.node)? {
covered_token.insert(step.node);
}
}
}
};

let mut indirectly_covered_token = FxHashSet::default();
// recursivly get the covered token from all children connected by a dominance relation
for dom_gs in all_dom_gs {
for out in dom_gs.get_outgoing_edges(n) {
let out = out?;
indirectly_covered_token.extend(calculate_inherited_coverage_edges(
graph,
out,
all_cov_components,
all_dom_gs,
)?);
// Connect all non-token nodes to the covered token nodes if no such direct coverage already exists
let mut direct_coverage_targets = FxHashSet::default();
for gs in other_cov_gs.iter() {
for target in gs.get_outgoing_edges(n) {
direct_coverage_targets.insert(target?);
}
}
let inherited_gs_cov = graph.get_or_create_writable(inherited_cov_component)?;

if let Ok(gs_cov) = graph.get_or_create_writable(&AnnotationComponent::new(
AnnotationComponentType::Coverage,
ANNIS_NS.into(),
"inherited-coverage".into(),
)) {
// Ignore all already directly covered token when creating the inherited coverage edges
for t in indirectly_covered_token.difference(&directly_covered_token) {
gs_cov.add_edge(Edge {
for target in &covered_token {
if n != *target && !direct_coverage_targets.contains(target) {
inherited_gs_cov.add_edge(Edge {
source: n,
target: *t,
target: *target,
})?;
}
}

directly_covered_token.extend(indirectly_covered_token);
Ok(directly_covered_token)
Ok(covered_token)
}

pub struct AQLUpdateGraphIndex {
Expand Down Expand Up @@ -274,19 +263,37 @@ impl AQLUpdateGraphIndex {
) -> std::result::Result<(), ComponentTypeError> {
self.clear_left_right_token(graph)?;

let all_cov_components =
graph.get_all_components(Some(AnnotationComponentType::Coverage), None);
let all_dom_gs: Vec<Arc<dyn GraphStorage>> = graph
.get_all_components(Some(AnnotationComponentType::Dominance), Some(""))
let inherited_cov_component = AnnotationComponent::new(
AnnotationComponentType::Coverage,
ANNIS_NS.into(),
"inherited-coverage".into(),
);
let all_cov_components: Vec<_> = graph
.get_all_components(Some(AnnotationComponentType::Coverage), None)
.into_iter()
.filter_map(|c| graph.get_graphstorage(&c))
.filter(|c| c != &inherited_cov_component)
.collect();

let all_cov_gs: Vec<_> = all_cov_components
.iter()
.filter_map(|c| graph.get_graphstorage(c))
.collect();

let all_dom_components =
graph.get_all_components(Some(AnnotationComponentType::Dominance), None);
let all_text_coverage_components: Vec<AnnotationComponent> =
[all_cov_components, all_dom_components].concat();

// go over each node and calculate the left-most and right-most token
for invalid in self.invalid_nodes.iter()? {
let (n, _) = invalid?;
let covered_token =
calculate_inherited_coverage_edges(graph, n, &all_cov_components, &all_dom_gs)?;
let covered_token = calculate_inherited_coverage_edges(
graph,
n,
&all_cov_gs,
&all_text_coverage_components,
&inherited_cov_component,
)?;
self.calculate_token_alignment(
graph,
n,
Expand Down
Loading

0 comments on commit 2ff9a4f

Please sign in to comment.