From 2c360bfb1f8d2ebf1252379777be6254d83c505d Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Thu, 18 Apr 2024 16:13:12 +0200 Subject: [PATCH 01/96] Added PathSearch class --- src/engine/PathSearch.cpp | 14 ++++++++++++ src/engine/PathSearch.h | 45 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 src/engine/PathSearch.cpp create mode 100644 src/engine/PathSearch.h diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp new file mode 100644 index 0000000000..a547cc4c93 --- /dev/null +++ b/src/engine/PathSearch.cpp @@ -0,0 +1,14 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#include "PathSearch.h" + +#include + +// _____________________________________________________________________________ +PathSearch::PathSearch(QueryExecutionContext* qec, + std::shared_ptr child) + : Operation(qec), subtree_(child) { + AD_CORRECTNESS_CHECK(qec != nullptr); +} diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h new file mode 100644 index 0000000000..beeb49bb7a --- /dev/null +++ b/src/engine/PathSearch.h @@ -0,0 +1,45 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#include +#include +#include + +#include "engine/QueryExecutionContext.h" + +struct Edge {}; + +struct Path { + std::vector edges; +}; + +class PathSearch : public Operation { + std::shared_ptr subtree_; + boost::adjacency_list<> graph_; + + public: + PathSearch(QueryExecutionContext* qec, + std::shared_ptr child); + + std::vector getChildren() override; + + string getCacheKeyImpl() const override; + string getDescriptor() const override; + size_t getResultWidth() const override; + void setTextLimit(size_t limit) override; + + size_t getCostEstimate() override; + + uint64_t getSizeEstimateBeforeLimit() override; + float getMultiplicity(size_t col) override; + bool knownEmptyResult() override; + + vector resultSortedOn() const override; + + ResultTable computeResult() override; + VariableToColumnMap computeVariableToColumnMap() const override; + + private: + std::vector findPaths(); +}; From b620e53ccaf763e1ae602a071c1fab3b8e67a2eb Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 19 Apr 2024 10:00:23 +0200 Subject: [PATCH 02/96] Added test class for PathSearch --- test/PathSearchTest.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 test/PathSearchTest.cpp diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp new file mode 100644 index 0000000000..ecbb8350b9 --- /dev/null +++ b/test/PathSearchTest.cpp @@ -0,0 +1,25 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#include + +#include "engine/PathSearch.h" +#include "util/IdTestHelpers.h" +#include "util/IndexTestHelpers.h" + +using ad_utility::testing::getQec; +namespace { +auto V = ad_utility::testing::VocabId; +using Vars = std::vector>; + +} // namespace + +TEST(PathSearchTest, constructor) { + auto qec = getQec(); + PathSearch p = PathSearch(qec, nullptr); +} + +TEST(PathSearchTest, findPaths) {} + +TEST(PathSearchTest, buildGraph) {} From c8f4c28a160961208ea0f27f72007263093ef0a0 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 19 Apr 2024 10:00:39 +0200 Subject: [PATCH 03/96] Added new sources to CMakeLists --- src/engine/CMakeLists.txt | 2 +- test/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 403e90c342..97ab80ae91 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -12,5 +12,5 @@ add_library(engine Values.cpp Bind.cpp Minus.cpp RuntimeInformation.cpp CheckUsePatternTrick.cpp VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp - idTable/CompressedExternalIdTable.h) + idTable/CompressedExternalIdTable.h PathSearch.cpp) qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d8d77d8d1b..ab7eff1300 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -151,6 +151,8 @@ addLinkAndDiscoverTest(IdTableTest util) addLinkAndDiscoverTest(TransitivePathTest engine) +addLinkAndDiscoverTest(PathSearchTest engine) + addLinkAndDiscoverTest(BatchedPipelineTest) addLinkAndDiscoverTest(TupleHelpersTest) From 23d5eb5ef304b6aa4eb2f531764c5d1d6803fb1b Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 19 Apr 2024 10:01:10 +0200 Subject: [PATCH 04/96] Added boilerplate code for override --- src/engine/PathSearch.cpp | 75 +++++++++++++++++++++++++++++++++++++-- src/engine/PathSearch.h | 18 +++++++--- 2 files changed, 87 insertions(+), 6 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index a547cc4c93..960aa7b213 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -5,10 +5,81 @@ #include "PathSearch.h" #include +#include + +#include "util/Exception.h" // _____________________________________________________________________________ PathSearch::PathSearch(QueryExecutionContext* qec, - std::shared_ptr child) - : Operation(qec), subtree_(child) { + std::shared_ptr subtree) + : Operation(qec), subtree_(subtree), graph_() { AD_CORRECTNESS_CHECK(qec != nullptr); } + +// _____________________________________________________________________________ +std::vector PathSearch::getChildren() { + std::vector res; + res.push_back(subtree_.get()); + return res; +}; + +// _____________________________________________________________________________ +std::string PathSearch::getCacheKeyImpl() const { + std::ostringstream os; + AD_CORRECTNESS_CHECK(subtree_); + os << "Subtree:\n" << subtree_->getCacheKey() << '\n'; + return std::move(os).str(); +}; + +// _____________________________________________________________________________ +string PathSearch::getDescriptor() const { + std::ostringstream os; + os << "PathSearch"; + return std::move(os).str(); +}; + +// _____________________________________________________________________________ +size_t PathSearch::getResultWidth() const { return resultWidth_; }; + +// _____________________________________________________________________________ +void PathSearch::setTextLimit(size_t limit) { + for (auto child : getChildren()) { + child->setTextLimit(limit); + } +}; + +// _____________________________________________________________________________ +size_t PathSearch::getCostEstimate() { + // TODO: Figure out a smart way to estimate cost + return 1000; +}; + +// _____________________________________________________________________________ +uint64_t PathSearch::getSizeEstimateBeforeLimit() { + // TODO: Figure out a smart way to estimate size + return 1000; +}; + +// _____________________________________________________________________________ +float PathSearch::getMultiplicity(size_t col) { + (void)col; + return 1; +}; + +// _____________________________________________________________________________ +bool PathSearch::knownEmptyResult() { return subtree_->knownEmptyResult(); }; + +// _____________________________________________________________________________ +vector PathSearch::resultSortedOn() const { return {}; }; + +// _____________________________________________________________________________ +ResultTable PathSearch::computeResult() { + shared_ptr subRes = subtree_->getResult(); + IdTable idTable{allocator()}; + return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; +}; + +// _____________________________________________________________________________ +VariableToColumnMap PathSearch::computeVariableToColumnMap() const { + return variableColumns_; +}; diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index beeb49bb7a..ef23893134 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -3,10 +3,16 @@ // Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) #include +#include #include +#include #include -#include "engine/QueryExecutionContext.h" +#include "engine/Operation.h" +#include "engine/QueryExecutionTree.h" +#include "global/Id.h" + +typedef boost::adjacency_list Graph; struct Edge {}; @@ -16,11 +22,14 @@ struct Path { class PathSearch : public Operation { std::shared_ptr subtree_; - boost::adjacency_list<> graph_; + size_t resultWidth_; + VariableToColumnMap variableColumns_; + + Graph graph_; public: PathSearch(QueryExecutionContext* qec, - std::shared_ptr child); + std::shared_ptr subtree); std::vector getChildren() override; @@ -41,5 +50,6 @@ class PathSearch : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; private: - std::vector findPaths(); + void buildGraph(std::span startNodes, std::span endNodes); + std::vector findPaths() const; }; From f59ad6b8b455adb79441b739eb5d1431f1c96600 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 23 Apr 2024 14:24:13 +0200 Subject: [PATCH 05/96] First draft of path search --- src/engine/PathSearch.cpp | 73 ++++++++++++++++++++++++++++++- src/engine/PathSearch.h | 90 ++++++++++++++++++++++++++++++++++++--- test/PathSearchTest.cpp | 18 ++++++-- 3 files changed, 171 insertions(+), 10 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 960aa7b213..618affdd4e 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -4,15 +4,21 @@ #include "PathSearch.h" +#include #include #include +#include "engine/CallFixedSize.h" #include "util/Exception.h" // _____________________________________________________________________________ PathSearch::PathSearch(QueryExecutionContext* qec, - std::shared_ptr subtree) - : Operation(qec), subtree_(subtree), graph_() { + std::shared_ptr subtree, + PathSearchConfiguration config) + : Operation(qec), + subtree_(std::move(subtree)), + graph_(), + config_(std::move(config)) { AD_CORRECTNESS_CHECK(qec != nullptr); } @@ -76,6 +82,16 @@ vector PathSearch::resultSortedOn() const { return {}; }; ResultTable PathSearch::computeResult() { shared_ptr subRes = subtree_->getResult(); IdTable idTable{allocator()}; + + const IdTable& dynSub = subRes->idTable(); + buildGraph(dynSub.getColumn(config_.startColumn_), + dynSub.getColumn(config_.endColumn_)); + + auto paths = findPaths(); + + CALL_FIXED_SIZE(std::array{dynSub.numColumns()}, &PathSearch::normalizePaths, + this, idTable, paths); + return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; }; @@ -83,3 +99,56 @@ ResultTable PathSearch::computeResult() { VariableToColumnMap PathSearch::computeVariableToColumnMap() const { return variableColumns_; }; + +// _____________________________________________________________________________ +void PathSearch::buildGraph(std::span startNodes, + std::span endNodes) { + AD_CORRECTNESS_CHECK(startNodes.size() == endNodes.size()); + for (size_t i = 0; i < startNodes.size(); i++) { + boost::add_edge(startNodes[i].getBits(), endNodes[i].getBits(), graph_); + } +} + +// _____________________________________________________________________________ +std::vector PathSearch::findPaths() const { + switch (config_.algorithm_) { + case ALL_PATHS: + return allPaths(); + default: + AD_FAIL(); + } +} + +// _____________________________________________________________________________ +std::vector PathSearch::allPaths() const { + std::vector paths; + Path path; + AllPathsVisitor vis(config_.source_.getBits(), path, paths); + boost::depth_first_search(graph_, boost::visitor(vis)); + return paths; +} + +// _____________________________________________________________________________ +template +void PathSearch::normalizePaths(IdTable& tableDyn, + std::vector paths) const { + IdTableStatic table = std::move(tableDyn).toStatic(); + + size_t rowIndex = 0; + for (size_t pathIndex = 0; pathIndex < paths.size(); pathIndex++) { + auto path = paths[pathIndex]; + for (size_t edgeIndex = 0; edgeIndex < path.size(); edgeIndex++) { + auto edge = path.edges_[edgeIndex]; + auto [start, end] = edge.toIds(); + table.emplace_back(); + table(rowIndex, config_.startColumn_) = start; + table(rowIndex, config_.endColumn_) = end; + table(rowIndex, config_.pathIndexColumn_) = Id::makeFromInt(pathIndex); + table(rowIndex, config_.edgeIndexColumn_) = Id::makeFromInt(edgeIndex); + + rowIndex++; + } + } + + tableDyn = std::move(table).toDynamic(); +} diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index ef23893134..a090a4340e 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -3,8 +3,11 @@ // Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) #include +#include #include +#include #include +#include #include #include @@ -12,12 +15,84 @@ #include "engine/QueryExecutionTree.h" #include "global/Id.h" -typedef boost::adjacency_list Graph; +struct Edge { + uint64_t start_; + uint64_t end_; -struct Edge {}; + std::pair toIds() const { + return {Id::fromBits(start_), Id::fromBits(end_)}; + } +}; struct Path { - std::vector edges; + std::vector edges_; + + bool empty() const { return edges_.empty(); } + + size_t size() const { return edges_.size(); } + + std::optional firstNode() const { + return !empty() ? std::optional{edges_.front().start_} + : std::nullopt; + } + + std::optional lastNode() const { + return !empty() ? std::optional{edges_.back().end_} + : std::nullopt; + } +}; + +typedef boost::adjacency_list + Graph; +typedef boost::graph_traits::vertex_descriptor VertexDescriptor; +typedef boost::graph_traits::edge_descriptor EdgeDescriptor; + +class AllPathsVisitor : public boost::default_dfs_visitor { + VertexDescriptor target_; + VertexDescriptor lastVertex_; + Path& currentPath_; + std::vector& allPaths_; + + public: + AllPathsVisitor(VertexDescriptor target, Path& path, std::vector& paths) + : target_(target), currentPath_(path), allPaths_(paths) {} + + void discover_vertex(VertexDescriptor vertex, const Graph& graph) { + (void)graph; + lastVertex_ = vertex; + } + + void tree_edge(EdgeDescriptor edgeDesc, const Graph& graph) { + const Edge& edge = boost::get(boost::edge_bundle, graph)[edgeDesc]; + currentPath_.edges_.push_back(edge); + } + + void finish_vertex(VertexDescriptor vertex, const Graph& graph) { + (void)graph; + if (vertex == target_) { + allPaths_.push_back(currentPath_); + } + if (!currentPath_.empty()) { + if (currentPath_.lastNode() == vertex) { + currentPath_.edges_.pop_back(); + } + } + } +}; + +enum PathSearchAlgorithm { + ALL_PATHS, +}; + +struct PathSearchConfiguration { + PathSearchAlgorithm algorithm_; + Id source_; + Id destination_; + size_t startColumn_; + size_t endColumn_; + size_t pathIndexColumn_; + size_t edgeIndexColumn_; }; class PathSearch : public Operation { @@ -26,10 +101,12 @@ class PathSearch : public Operation { VariableToColumnMap variableColumns_; Graph graph_; + PathSearchConfiguration config_; public: PathSearch(QueryExecutionContext* qec, - std::shared_ptr subtree); + std::shared_ptr subtree, + PathSearchConfiguration config); std::vector getChildren() override; @@ -50,6 +127,9 @@ class PathSearch : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; private: - void buildGraph(std::span startNodes, std::span endNodes); + void buildGraph(std::span startNodes, std::span endNodes); std::vector findPaths() const; + std::vector allPaths() const; + template + void normalizePaths(IdTable& tableDyn, std::vector paths) const; }; diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index ecbb8350b9..e510422089 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -5,6 +5,8 @@ #include #include "engine/PathSearch.h" +#include "gmock/gmock.h" +#include "util/IdTableHelpers.h" #include "util/IdTestHelpers.h" #include "util/IndexTestHelpers.h" @@ -17,9 +19,19 @@ using Vars = std::vector>; TEST(PathSearchTest, constructor) { auto qec = getQec(); - PathSearch p = PathSearch(qec, nullptr); + PathSearchConfiguration config{ALL_PATHS, V(0), V(1), 0, 1, 2, 3}; + PathSearch p = PathSearch(qec, nullptr, config); } -TEST(PathSearchTest, findPaths) {} +TEST(PathSearchTest, singlePath) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({}); -TEST(PathSearchTest, buildGraph) {} + auto qec = getQec(); + PathSearchConfiguration config{ALL_PATHS, V(0), V(4), 0, 1, 2, 3}; + PathSearch p = PathSearch(qec, nullptr, config); + + auto resultTable = p.computeResult(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} From fcff8ba2510ed8f93024df4d951638f6dc66b54c Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 26 Apr 2024 10:39:11 +0200 Subject: [PATCH 06/96] Implemented Path Search using boost --- src/engine/PathSearch.cpp | 42 +++++++++++++++++++++++++++++++++------ src/engine/PathSearch.h | 36 ++++++++++++++++++++++++++------- test/PathSearchTest.cpp | 15 ++++++++++++-- 3 files changed, 78 insertions(+), 15 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 618affdd4e..c606954f91 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -18,7 +18,9 @@ PathSearch::PathSearch(QueryExecutionContext* qec, : Operation(qec), subtree_(std::move(subtree)), graph_(), - config_(std::move(config)) { + config_(std::move(config)), + indexToId_(), + idToIndex_(allocator()) { AD_CORRECTNESS_CHECK(qec != nullptr); } @@ -82,6 +84,7 @@ vector PathSearch::resultSortedOn() const { return {}; }; ResultTable PathSearch::computeResult() { shared_ptr subRes = subtree_->getResult(); IdTable idTable{allocator()}; + idTable.setNumColumns(getResultWidth()); const IdTable& dynSub = subRes->idTable(); buildGraph(dynSub.getColumn(config_.startColumn_), @@ -89,7 +92,7 @@ ResultTable PathSearch::computeResult() { auto paths = findPaths(); - CALL_FIXED_SIZE(std::array{dynSub.numColumns()}, &PathSearch::normalizePaths, + CALL_FIXED_SIZE(std::array{getResultWidth()}, &PathSearch::normalizePaths, this, idTable, paths); return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; @@ -100,12 +103,36 @@ VariableToColumnMap PathSearch::computeVariableToColumnMap() const { return variableColumns_; }; +// _____________________________________________________________________________ +void PathSearch::buildMapping(std::span startNodes, + std::span endNodes) { + auto addNode = [this](const Id node) { + if (idToIndex_.find(node) == idToIndex_.end()) { + idToIndex_[node] = indexToId_.size(); + indexToId_.push_back(node); + } + }; + for (size_t i = 0; i < startNodes.size(); i++) { + addNode(startNodes[i]); + addNode(endNodes[i]); + } +} + // _____________________________________________________________________________ void PathSearch::buildGraph(std::span startNodes, std::span endNodes) { AD_CORRECTNESS_CHECK(startNodes.size() == endNodes.size()); + buildMapping(startNodes, endNodes); + + while (boost::num_vertices(graph_) < indexToId_.size()) { + boost::add_vertex(graph_); + } + for (size_t i = 0; i < startNodes.size(); i++) { - boost::add_edge(startNodes[i].getBits(), endNodes[i].getBits(), graph_); + auto startIndex = idToIndex_[startNodes[i]]; + auto endIndex = idToIndex_[endNodes[i]]; + Edge edge{startNodes[i].getBits(), endNodes[i].getBits()}; + boost::add_edge(startIndex, endIndex, edge, graph_); } } @@ -123,15 +150,18 @@ std::vector PathSearch::findPaths() const { std::vector PathSearch::allPaths() const { std::vector paths; Path path; - AllPathsVisitor vis(config_.source_.getBits(), path, paths); - boost::depth_first_search(graph_, boost::visitor(vis)); + auto startIndex = idToIndex_.at(config_.source_); + auto targetIndex = idToIndex_.at(config_.target_); + AllPathsVisitor vis(targetIndex, path, paths, indexToId_); + boost::depth_first_search(graph_, + boost::visitor(vis).root_vertex(startIndex)); return paths; } // _____________________________________________________________________________ template void PathSearch::normalizePaths(IdTable& tableDyn, - std::vector paths) const { + std::vector& paths) const { IdTableStatic table = std::move(tableDyn).toStatic(); size_t rowIndex = 0; diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index a090a4340e..210cd0948c 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -9,12 +9,23 @@ #include #include #include +#include #include #include "engine/Operation.h" #include "engine/QueryExecutionTree.h" #include "global/Id.h" +// We deliberately use the `std::` variants of a hash set and hash map because +// `absl`s types are not exception safe. +struct HashId { + auto operator()(Id id) const { return std::hash{}(id.getBits()); } +}; + +using Map = std::unordered_map< + Id, size_t, HashId, std::equal_to, + ad_utility::AllocatorWithLimit>>; + struct Edge { uint64_t start_; uint64_t end_; @@ -54,9 +65,15 @@ class AllPathsVisitor : public boost::default_dfs_visitor { Path& currentPath_; std::vector& allPaths_; + const std::vector& indexToId_; + public: - AllPathsVisitor(VertexDescriptor target, Path& path, std::vector& paths) - : target_(target), currentPath_(path), allPaths_(paths) {} + AllPathsVisitor(VertexDescriptor target, Path& path, std::vector& paths, + const std::vector& indexToId) + : target_(target), + currentPath_(path), + allPaths_(paths), + indexToId_(indexToId) {} void discover_vertex(VertexDescriptor vertex, const Graph& graph) { (void)graph; @@ -64,7 +81,7 @@ class AllPathsVisitor : public boost::default_dfs_visitor { } void tree_edge(EdgeDescriptor edgeDesc, const Graph& graph) { - const Edge& edge = boost::get(boost::edge_bundle, graph)[edgeDesc]; + const Edge& edge = graph[edgeDesc]; currentPath_.edges_.push_back(edge); } @@ -74,7 +91,7 @@ class AllPathsVisitor : public boost::default_dfs_visitor { allPaths_.push_back(currentPath_); } if (!currentPath_.empty()) { - if (currentPath_.lastNode() == vertex) { + if (Id::fromBits(currentPath_.lastNode().value()) == indexToId_[vertex]) { currentPath_.edges_.pop_back(); } } @@ -88,7 +105,7 @@ enum PathSearchAlgorithm { struct PathSearchConfiguration { PathSearchAlgorithm algorithm_; Id source_; - Id destination_; + Id target_; size_t startColumn_; size_t endColumn_; size_t pathIndexColumn_; @@ -97,12 +114,15 @@ struct PathSearchConfiguration { class PathSearch : public Operation { std::shared_ptr subtree_; - size_t resultWidth_; + size_t resultWidth_ = 4; VariableToColumnMap variableColumns_; Graph graph_; PathSearchConfiguration config_; + std::vector indexToId_; + Map idToIndex_; + public: PathSearch(QueryExecutionContext* qec, std::shared_ptr subtree, @@ -128,8 +148,10 @@ class PathSearch : public Operation { private: void buildGraph(std::span startNodes, std::span endNodes); + void buildMapping(std::span startNodes, + std::span endNodes); std::vector findPaths() const; std::vector allPaths() const; template - void normalizePaths(IdTable& tableDyn, std::vector paths) const; + void normalizePaths(IdTable& tableDyn, std::vector& paths) const; }; diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index e510422089..50cbca9cf1 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -5,6 +5,8 @@ #include #include "engine/PathSearch.h" +#include "engine/QueryExecutionTree.h" +#include "engine/ValuesForTesting.h" #include "gmock/gmock.h" #include "util/IdTableHelpers.h" #include "util/IdTestHelpers.h" @@ -13,6 +15,7 @@ using ad_utility::testing::getQec; namespace { auto V = ad_utility::testing::VocabId; +auto I = ad_utility::testing::IntId; using Vars = std::vector>; } // namespace @@ -25,11 +28,19 @@ TEST(PathSearchTest, constructor) { TEST(PathSearchTest, singlePath) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); - auto expected = makeIdTableFromVector({}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(3), I(0), I(2)}, + {V(3), V(4), I(0), I(3)}, + }); auto qec = getQec(); + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(sub), vars); PathSearchConfiguration config{ALL_PATHS, V(0), V(4), 0, 1, 2, 3}; - PathSearch p = PathSearch(qec, nullptr, config); + PathSearch p = PathSearch(qec, std::move(subtree), config); auto resultTable = p.computeResult(); ASSERT_THAT(resultTable.idTable(), From 92826a9aa6d5f555a98e44ea21cf85e138c1ecf5 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 26 Apr 2024 18:07:46 +0200 Subject: [PATCH 07/96] Simplified visitor, added cycle test --- src/engine/PathSearch.h | 6 ------ test/PathSearchTest.cpp | 33 ++++++++++++++++++++++++++++----- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 210cd0948c..a7a0cf52ca 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -61,7 +61,6 @@ typedef boost::graph_traits::edge_descriptor EdgeDescriptor; class AllPathsVisitor : public boost::default_dfs_visitor { VertexDescriptor target_; - VertexDescriptor lastVertex_; Path& currentPath_; std::vector& allPaths_; @@ -75,11 +74,6 @@ class AllPathsVisitor : public boost::default_dfs_visitor { allPaths_(paths), indexToId_(indexToId) {} - void discover_vertex(VertexDescriptor vertex, const Graph& graph) { - (void)graph; - lastVertex_ = vertex; - } - void tree_edge(EdgeDescriptor edgeDesc, const Graph& graph) { const Edge& edge = graph[edgeDesc]; currentPath_.edges_.push_back(edge); diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 50cbca9cf1..e03501c4d5 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -6,6 +6,7 @@ #include "engine/PathSearch.h" #include "engine/QueryExecutionTree.h" +#include "engine/ResultTable.h" #include "engine/ValuesForTesting.h" #include "gmock/gmock.h" #include "util/IdTableHelpers.h" @@ -20,6 +21,16 @@ using Vars = std::vector>; } // namespace +ResultTable performPathSearch(PathSearchConfiguration config, IdTable input, + Vars vars) { + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(input), vars); + PathSearch p = PathSearch(qec, std::move(subtree), config); + + return p.computeResult(); +} + TEST(PathSearchTest, constructor) { auto qec = getQec(); PathSearchConfiguration config{ALL_PATHS, V(0), V(1), 0, 1, 2, 3}; @@ -35,14 +46,26 @@ TEST(PathSearchTest, singlePath) { {V(3), V(4), I(0), I(3)}, }); - auto qec = getQec(); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - auto subtree = ad_utility::makeExecutionTree( - qec, std::move(sub), vars); PathSearchConfiguration config{ALL_PATHS, V(0), V(4), 0, 1, 2, 3}; - PathSearch p = PathSearch(qec, std::move(subtree), config); - auto resultTable = p.computeResult(); + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, cycle) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(0), I(0), I(2)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{ALL_PATHS, V(0), V(0), 0, 1, 2, 3}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } From 982708e5f6fe929bfbe1e6770bfff8806dc16e55 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Sun, 28 Apr 2024 10:53:36 +0200 Subject: [PATCH 08/96] Added test, fixed cycles --- src/engine/PathSearch.cpp | 3 +-- src/engine/PathSearch.h | 20 ++++++++++++------ test/PathSearchTest.cpp | 44 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index c606954f91..72208a22d8 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -151,8 +151,7 @@ std::vector PathSearch::allPaths() const { std::vector paths; Path path; auto startIndex = idToIndex_.at(config_.source_); - auto targetIndex = idToIndex_.at(config_.target_); - AllPathsVisitor vis(targetIndex, path, paths, indexToId_); + AllPathsVisitor vis(config_.target_, path, paths, indexToId_); boost::depth_first_search(graph_, boost::visitor(vis).root_vertex(startIndex)); return paths; diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index a7a0cf52ca..b6240125ae 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -42,6 +42,8 @@ struct Path { size_t size() const { return edges_.size(); } + void push_back(Edge edge) { edges_.push_back(edge); } + std::optional firstNode() const { return !empty() ? std::optional{edges_.front().start_} : std::nullopt; @@ -60,20 +62,29 @@ typedef boost::graph_traits::vertex_descriptor VertexDescriptor; typedef boost::graph_traits::edge_descriptor EdgeDescriptor; class AllPathsVisitor : public boost::default_dfs_visitor { - VertexDescriptor target_; + uint64_t target_; Path& currentPath_; std::vector& allPaths_; const std::vector& indexToId_; public: - AllPathsVisitor(VertexDescriptor target, Path& path, std::vector& paths, + AllPathsVisitor(Id target, Path& path, std::vector& paths, const std::vector& indexToId) - : target_(target), + : target_(target.getBits()), currentPath_(path), allPaths_(paths), indexToId_(indexToId) {} + void examine_edge(EdgeDescriptor edgeDesc, const Graph& graph) { + const Edge& edge = graph[edgeDesc]; + if (edge.end_ == target_) { + auto pathCopy = currentPath_; + pathCopy.push_back(edge); + allPaths_.push_back(pathCopy); + } + } + void tree_edge(EdgeDescriptor edgeDesc, const Graph& graph) { const Edge& edge = graph[edgeDesc]; currentPath_.edges_.push_back(edge); @@ -81,9 +92,6 @@ class AllPathsVisitor : public boost::default_dfs_visitor { void finish_vertex(VertexDescriptor vertex, const Graph& graph) { (void)graph; - if (vertex == target_) { - allPaths_.push_back(currentPath_); - } if (!currentPath_.empty()) { if (Id::fromBits(currentPath_.lastNode().value()) == indexToId_[vertex]) { currentPath_.edges_.pop_back(); diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index e03501c4d5..152ec3f4f6 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -54,6 +54,23 @@ TEST(PathSearchTest, singlePath) { ::testing::UnorderedElementsAreArray(expected)); } +TEST(PathSearchTest, twoPaths) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 3}, {3, 2}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(0), V(3), I(1), I(0)}, + {V(3), V(2), I(1), I(1)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{ALL_PATHS, V(0), V(2), 0, 1, 2, 3}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + TEST(PathSearchTest, cycle) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}}); auto expected = makeIdTableFromVector({ @@ -69,3 +86,30 @@ TEST(PathSearchTest, cycle) { ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } + +/** + * Graph: + * + * 2<---1--->3 + * \ ^ / + * \ | / + * > 0 < + */ +TEST(PathSearchTest, twoCycle) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}, {1, 3}, {3, 0}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(0), I(0), I(2)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(3), I(1), I(1)}, + {V(3), V(0), I(1), I(2)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{ALL_PATHS, V(0), V(0), 0, 1, 2, 3}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} From 3c9345e078f47c683ef6028801f47393b9d966da Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Sun, 28 Apr 2024 12:02:02 +0200 Subject: [PATCH 09/96] Added pathfinding for multiple targets --- src/engine/PathSearch.cpp | 8 +++- src/engine/PathSearch.h | 12 +++--- test/PathSearchTest.cpp | 83 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 90 insertions(+), 13 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 72208a22d8..e8d48ff70b 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -151,7 +151,13 @@ std::vector PathSearch::allPaths() const { std::vector paths; Path path; auto startIndex = idToIndex_.at(config_.source_); - AllPathsVisitor vis(config_.target_, path, paths, indexToId_); + + std::unordered_set targets; + for (auto target : config_.targets_) { + targets.insert(target.getBits()); + } + + AllPathsVisitor vis(targets, path, paths, indexToId_); boost::depth_first_search(graph_, boost::visitor(vis).root_vertex(startIndex)); return paths; diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index b6240125ae..4975aca94b 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -62,23 +62,23 @@ typedef boost::graph_traits::vertex_descriptor VertexDescriptor; typedef boost::graph_traits::edge_descriptor EdgeDescriptor; class AllPathsVisitor : public boost::default_dfs_visitor { - uint64_t target_; + std::unordered_set targets_; Path& currentPath_; std::vector& allPaths_; const std::vector& indexToId_; public: - AllPathsVisitor(Id target, Path& path, std::vector& paths, - const std::vector& indexToId) - : target_(target.getBits()), + AllPathsVisitor(std::unordered_set targets, Path& path, + std::vector& paths, const std::vector& indexToId) + : targets_(std::move(targets)), currentPath_(path), allPaths_(paths), indexToId_(indexToId) {} void examine_edge(EdgeDescriptor edgeDesc, const Graph& graph) { const Edge& edge = graph[edgeDesc]; - if (edge.end_ == target_) { + if (targets_.empty() || targets_.find(edge.end_) != targets_.end()) { auto pathCopy = currentPath_; pathCopy.push_back(edge); allPaths_.push_back(pathCopy); @@ -107,7 +107,7 @@ enum PathSearchAlgorithm { struct PathSearchConfiguration { PathSearchAlgorithm algorithm_; Id source_; - Id target_; + std::vector targets_; size_t startColumn_; size_t endColumn_; size_t pathIndexColumn_; diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 152ec3f4f6..2ef37e82ec 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -33,7 +33,7 @@ ResultTable performPathSearch(PathSearchConfiguration config, IdTable input, TEST(PathSearchTest, constructor) { auto qec = getQec(); - PathSearchConfiguration config{ALL_PATHS, V(0), V(1), 0, 1, 2, 3}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(1)}, 0, 1, 2, 3}; PathSearch p = PathSearch(qec, nullptr, config); } @@ -47,14 +47,22 @@ TEST(PathSearchTest, singlePath) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), V(4), 0, 1, 2, 3}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, 0, 1, 2, 3}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } -TEST(PathSearchTest, twoPaths) { +/** + * Graph: + * 0 + * / \ + * 1 < > 3 + * \ / + * > 2 < + */ +TEST(PathSearchTest, twoPathsOneTarget) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 3}, {3, 2}}); auto expected = makeIdTableFromVector({ {V(0), V(1), I(0), I(0)}, @@ -64,13 +72,46 @@ TEST(PathSearchTest, twoPaths) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), V(2), 0, 1, 2, 3}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(2)}, 0, 1, 2, 3}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * 0 + * / \ + * 1 < > 3 + * / \ + * 2 < > 4 + */ +TEST(PathSearchTest, twoPathsTwoTargets) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(0), V(3), I(1), I(0)}, + {V(3), V(4), I(1), I(1)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(2), V(4)}, 0, 1, 2, 3}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } +/** + * Graph: + * + * 2<---1 + * \ ^ + * \ | + * > 0 + */ TEST(PathSearchTest, cycle) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}}); auto expected = makeIdTableFromVector({ @@ -80,7 +121,7 @@ TEST(PathSearchTest, cycle) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), V(0), 0, 1, 2, 3}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(0)}, 0, 1, 2, 3}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -107,7 +148,37 @@ TEST(PathSearchTest, twoCycle) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), V(0), 0, 1, 2, 3}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(0)}, 0, 1, 2, 3}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * + * 0 + * / \ + * 1 2 + * \ / \ + * 3 4 + */ +TEST(PathSearchTest, allPaths) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 3}, {0, 2}, {2, 3}, {2, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(3), I(1), I(1)}, + {V(0), V(2), I(2), I(0)}, + {V(0), V(2), I(3), I(0)}, + {V(2), V(3), I(3), I(1)}, + {V(0), V(2), I(4), I(0)}, + {V(2), V(4), I(4), I(1)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {}, 0, 1, 2, 3}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), From 5def5ebe8079a95ada19d6f77eda6026d702ca55 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 29 Apr 2024 14:29:57 +0200 Subject: [PATCH 10/96] Added edge properties --- src/engine/PathSearch.cpp | 33 +++++++++++++++++---- src/engine/PathSearch.h | 9 ++++-- test/PathSearchTest.cpp | 62 ++++++++++++++++++++++++++++++++++----- 3 files changed, 88 insertions(+), 16 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index e8d48ff70b..a322372d57 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -22,6 +22,7 @@ PathSearch::PathSearch(QueryExecutionContext* qec, indexToId_(), idToIndex_(allocator()) { AD_CORRECTNESS_CHECK(qec != nullptr); + resultWidth_ = 4 + config_.edgePropertyIndices_.size(); } // _____________________________________________________________________________ @@ -87,12 +88,18 @@ ResultTable PathSearch::computeResult() { idTable.setNumColumns(getResultWidth()); const IdTable& dynSub = subRes->idTable(); + + std::vector> edgePropertyLists; + for (auto edgePropertyIndex : config_.edgePropertyIndices_) { + edgePropertyLists.push_back(dynSub.getColumn(edgePropertyIndex)); + } + buildGraph(dynSub.getColumn(config_.startColumn_), - dynSub.getColumn(config_.endColumn_)); + dynSub.getColumn(config_.endColumn_), edgePropertyLists); auto paths = findPaths(); - CALL_FIXED_SIZE(std::array{getResultWidth()}, &PathSearch::normalizePaths, + CALL_FIXED_SIZE(std::array{getResultWidth()}, &PathSearch::pathsToResultTable, this, idTable, paths); return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; @@ -120,7 +127,8 @@ void PathSearch::buildMapping(std::span startNodes, // _____________________________________________________________________________ void PathSearch::buildGraph(std::span startNodes, - std::span endNodes) { + std::span endNodes, + std::span> edgePropertyLists) { AD_CORRECTNESS_CHECK(startNodes.size() == endNodes.size()); buildMapping(startNodes, endNodes); @@ -131,7 +139,13 @@ void PathSearch::buildGraph(std::span startNodes, for (size_t i = 0; i < startNodes.size(); i++) { auto startIndex = idToIndex_[startNodes[i]]; auto endIndex = idToIndex_[endNodes[i]]; - Edge edge{startNodes[i].getBits(), endNodes[i].getBits()}; + + std::vector edgeProperties; + for (size_t j = 0; j < edgePropertyLists.size(); j++) { + edgeProperties.push_back(edgePropertyLists[j][i]); + } + + Edge edge{startNodes[i].getBits(), endNodes[i].getBits(), edgeProperties}; boost::add_edge(startIndex, endIndex, edge, graph_); } } @@ -165,8 +179,8 @@ std::vector PathSearch::allPaths() const { // _____________________________________________________________________________ template -void PathSearch::normalizePaths(IdTable& tableDyn, - std::vector& paths) const { +void PathSearch::pathsToResultTable(IdTable& tableDyn, + std::vector& paths) const { IdTableStatic table = std::move(tableDyn).toStatic(); size_t rowIndex = 0; @@ -181,6 +195,13 @@ void PathSearch::normalizePaths(IdTable& tableDyn, table(rowIndex, config_.pathIndexColumn_) = Id::makeFromInt(pathIndex); table(rowIndex, config_.edgeIndexColumn_) = Id::makeFromInt(edgeIndex); + for (size_t edgePropertyIndex = 0; + edgePropertyIndex < edge.edgeProperties_.size(); + edgePropertyIndex++) { + table(rowIndex, 4 + edgePropertyIndex) = + edge.edgeProperties_[edgePropertyIndex]; + } + rowIndex++; } } diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 4975aca94b..f62e97f7e3 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -29,6 +29,7 @@ using Map = std::unordered_map< struct Edge { uint64_t start_; uint64_t end_; + std::vector edgeProperties_; std::pair toIds() const { return {Id::fromBits(start_), Id::fromBits(end_)}; @@ -112,11 +113,12 @@ struct PathSearchConfiguration { size_t endColumn_; size_t pathIndexColumn_; size_t edgeIndexColumn_; + std::vector edgePropertyIndices_; }; class PathSearch : public Operation { std::shared_ptr subtree_; - size_t resultWidth_ = 4; + size_t resultWidth_; VariableToColumnMap variableColumns_; Graph graph_; @@ -149,11 +151,12 @@ class PathSearch : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; private: - void buildGraph(std::span startNodes, std::span endNodes); + void buildGraph(std::span startNodes, std::span endNodes, + std::span> edgePropertyLists); void buildMapping(std::span startNodes, std::span endNodes); std::vector findPaths() const; std::vector allPaths() const; template - void normalizePaths(IdTable& tableDyn, std::vector& paths) const; + void pathsToResultTable(IdTable& tableDyn, std::vector& paths) const; }; diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 2ef37e82ec..c937b71954 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -33,10 +33,14 @@ ResultTable performPathSearch(PathSearchConfiguration config, IdTable input, TEST(PathSearchTest, constructor) { auto qec = getQec(); - PathSearchConfiguration config{ALL_PATHS, V(0), {V(1)}, 0, 1, 2, 3}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(1)}, 0, 1, 2, 3, {}}; PathSearch p = PathSearch(qec, nullptr, config); } +/** + * Graph: + * 0 -> 1 -> 2 -> 3 -> 4 + */ TEST(PathSearchTest, singlePath) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); auto expected = makeIdTableFromVector({ @@ -47,7 +51,25 @@ TEST(PathSearchTest, singlePath) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, 0, 1, 2, 3}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, 0, 1, 2, 3, {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, singlePathWithProperties) { + auto sub = + makeIdTableFromVector({{0, 1, 10}, {1, 2, 20}, {2, 3, 30}, {3, 4, 40}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(10)}, + {V(1), V(2), I(0), I(1), V(20)}, + {V(2), V(3), I(0), I(2), V(30)}, + {V(3), V(4), I(0), I(3), V(40)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, 0, 1, 2, 3, {2}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -72,7 +94,7 @@ TEST(PathSearchTest, twoPathsOneTarget) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(2)}, 0, 1, 2, 3}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(2)}, 0, 1, 2, 3, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -97,7 +119,7 @@ TEST(PathSearchTest, twoPathsTwoTargets) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(2), V(4)}, 0, 1, 2, 3}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(2), V(4)}, 0, 1, 2, 3, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -121,7 +143,7 @@ TEST(PathSearchTest, cycle) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(0)}, 0, 1, 2, 3}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(0)}, 0, 1, 2, 3, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -148,7 +170,7 @@ TEST(PathSearchTest, twoCycle) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(0)}, 0, 1, 2, 3}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(0)}, 0, 1, 2, 3, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -178,7 +200,33 @@ TEST(PathSearchTest, allPaths) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {}, 0, 1, 2, 3}; + PathSearchConfiguration config{ALL_PATHS, V(0), {}, 0, 1, 2, 3, {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, allPathsWithPropertiesSwitched) { + auto sub = makeIdTableFromVector({{0, 1, 10, 11}, + {1, 3, 20, 21}, + {0, 2, 30, 31}, + {2, 3, 40, 41}, + {2, 4, 50, 51}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(11), V(10)}, + {V(0), V(1), I(1), I(0), V(11), V(10)}, + {V(1), V(3), I(1), I(1), V(21), V(20)}, + {V(0), V(2), I(2), I(0), V(31), V(30)}, + {V(0), V(2), I(3), I(0), V(31), V(30)}, + {V(2), V(3), I(3), I(1), V(41), V(40)}, + {V(0), V(2), I(4), I(0), V(31), V(30)}, + {V(2), V(4), I(4), I(1), V(51), V(50)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty1"}, + Variable{"?edgeProperty2"}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {}, 0, 1, 2, 3, {3, 2}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), From fce274e5aa84b528c7e7fc46e8df8c21019bcb8c Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 30 Apr 2024 12:45:28 +0200 Subject: [PATCH 11/96] Added shortest path search --- src/engine/PathSearch.cpp | 32 ++++++++++++++ src/engine/PathSearch.h | 56 +++++++++++++++++++++++- test/PathSearchTest.cpp | 90 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+), 2 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index a322372d57..0901e1e720 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -5,6 +5,7 @@ #include "PathSearch.h" #include +#include #include #include @@ -155,6 +156,8 @@ std::vector PathSearch::findPaths() const { switch (config_.algorithm_) { case ALL_PATHS: return allPaths(); + case SHORTEST_PATHS: + return shortestPaths(); default: AD_FAIL(); } @@ -177,6 +180,35 @@ std::vector PathSearch::allPaths() const { return paths; } +// _____________________________________________________________________________ +std::vector PathSearch::shortestPaths() const { + std::vector paths; + Path path; + auto startIndex = idToIndex_.at(config_.source_); + + std::unordered_set targets; + for (auto target : config_.targets_) { + targets.insert(target.getBits()); + } + std::vector predecessors(indexToId_.size()); + std::vector distances(indexToId_.size(), + std::numeric_limits::max()); + + DijkstraAllPathsVisitor vis(startIndex, targets, path, paths, predecessors, + distances); + + auto weight_map = get(&Edge::weight_, graph_); + + boost::dijkstra_shortest_paths( + graph_, startIndex, + boost::visitor(vis) + .weight_map(weight_map) + .predecessor_map(predecessors.data()) + .distance_map(distances.data()) + .distance_compare(std::less_equal())); + return paths; +} + // _____________________________________________________________________________ template void PathSearch::pathsToResultTable(IdTable& tableDyn, diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index f62e97f7e3..57d7792d49 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -2,8 +2,10 @@ // Chair of Algorithms and Data Structures. // Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) +#include #include #include +#include #include #include #include @@ -30,6 +32,7 @@ struct Edge { uint64_t start_; uint64_t end_; std::vector edgeProperties_; + double weight_ = 1; std::pair toIds() const { return {Id::fromBits(start_), Id::fromBits(end_)}; @@ -45,6 +48,8 @@ struct Path { void push_back(Edge edge) { edges_.push_back(edge); } + void reverse() { std::reverse(edges_.begin(), edges_.end()); } + std::optional firstNode() const { return !empty() ? std::optional{edges_.front().start_} : std::nullopt; @@ -101,10 +106,56 @@ class AllPathsVisitor : public boost::default_dfs_visitor { } }; -enum PathSearchAlgorithm { - ALL_PATHS, +class DijkstraAllPathsVisitor : public boost::default_dijkstra_visitor { + VertexDescriptor source_; + std::unordered_set targets_; + Path& currentPath_; + std::vector& allPaths_; + std::vector& predecessors_; + std::vector& distances_; + + public: + DijkstraAllPathsVisitor(VertexDescriptor source, + std::unordered_set targets, Path& path, + std::vector& paths, + std::vector& predecessors, + std::vector& distances) + : source_(source), + targets_(std::move(targets)), + currentPath_(path), + allPaths_(paths), + predecessors_(predecessors), + distances_(distances) {} + + const std::vector& getPredecessors() const { + return predecessors_; + } + const std::vector& getDistances() const { return distances_; } + + void edge_relaxed(EdgeDescriptor edgeDesc, const Graph& graph) { + const Edge& edge = graph[edgeDesc]; + if (targets_.empty() || targets_.find(edge.end_) != targets_.end()) { + rebuild_path(target(edgeDesc, graph), graph); + } + } + + void rebuild_path(VertexDescriptor vertex, const Graph& graph) { + currentPath_.edges_.clear(); + for (VertexDescriptor v = vertex; v != source_; v = predecessors_[v]) { + EdgeDescriptor e; + bool exists; + boost::tie(e, exists) = edge(predecessors_[v], v, graph); + if (exists) { + currentPath_.push_back(graph[e]); + } + } + currentPath_.reverse(); + allPaths_.push_back(currentPath_); + } }; +enum PathSearchAlgorithm { ALL_PATHS, SHORTEST_PATHS }; + struct PathSearchConfiguration { PathSearchAlgorithm algorithm_; Id source_; @@ -157,6 +208,7 @@ class PathSearch : public Operation { std::span endNodes); std::vector findPaths() const; std::vector allPaths() const; + std::vector shortestPaths() const; template void pathsToResultTable(IdTable& tableDyn, std::vector& paths) const; }; diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index c937b71954..883008cf15 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -76,6 +76,41 @@ TEST(PathSearchTest, singlePathWithProperties) { ::testing::UnorderedElementsAreArray(expected)); } +TEST(PathSearchTest, singlePathWithDijkstra) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(3), I(0), I(2)}, + {V(3), V(4), I(0), I(3)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, 0, 1, 2, 3, {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, singlePathWithDijkstraAndProperties) { + auto sub = + makeIdTableFromVector({{0, 1, 10}, {1, 2, 20}, {2, 3, 30}, {3, 4, 40}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(10)}, + {V(1), V(2), I(0), I(1), V(20)}, + {V(2), V(3), I(0), I(2), V(30)}, + {V(3), V(4), I(0), I(3), V(40)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; + PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, 0, 1, 2, 3, {2}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + /** * Graph: * 0 @@ -232,3 +267,58 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } + +/** + * Graph: + * + * 0 + * / \ + * 1 2 + * | | + * | 3 + * \ / + * 4 + */ +TEST(PathSearchTest, singleShortestPath) { + auto sub = makeIdTableFromVector({{0, 1}, {0, 2}, {1, 4}, {2, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(4), I(0), I(1)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, 0, 1, 2, 3, {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * + * 0 + * /|\ + * 1 2 4 + * | | | + * | 3 | + * \|/ + * 5 + */ +TEST(PathSearchTest, twoShortestPaths) { + auto sub = makeIdTableFromVector( + {{0, 1}, {0, 2}, {0, 4}, {1, 5}, {2, 3}, {3, 5}, {4, 5}}); + auto expected = makeIdTableFromVector({ + {V(0), V(4), I(0), I(0)}, + {V(4), V(5), I(0), I(1)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(5), I(1), I(1)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(5)}, 0, 1, 2, 3, {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} From e3715429af7ffa6fe0dc7d59be84160555616036 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 30 Apr 2024 13:55:44 +0200 Subject: [PATCH 12/96] Fixed setTextLimit error after merge --- src/engine/PathSearch.cpp | 7 ------- src/engine/PathSearch.h | 1 - 2 files changed, 8 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 0901e1e720..90c9fe389c 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -51,13 +51,6 @@ string PathSearch::getDescriptor() const { // _____________________________________________________________________________ size_t PathSearch::getResultWidth() const { return resultWidth_; }; -// _____________________________________________________________________________ -void PathSearch::setTextLimit(size_t limit) { - for (auto child : getChildren()) { - child->setTextLimit(limit); - } -}; - // _____________________________________________________________________________ size_t PathSearch::getCostEstimate() { // TODO: Figure out a smart way to estimate cost diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 57d7792d49..ac75a4619d 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -188,7 +188,6 @@ class PathSearch : public Operation { string getCacheKeyImpl() const override; string getDescriptor() const override; size_t getResultWidth() const override; - void setTextLimit(size_t limit) override; size_t getCostEstimate() override; From 178fd144d955cfdeb1df220b92b790b289a741e4 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 19 Jun 2024 19:40:23 +0200 Subject: [PATCH 13/96] Added PathSearch parsing --- src/engine/CheckUsePatternTrick.cpp | 3 +- src/engine/PathSearch.cpp | 43 +++-- src/engine/PathSearch.h | 47 +++-- src/engine/QueryPlanner.cpp | 41 +++++ src/engine/QueryPlanner.h | 1 + src/parser/GraphPatternOperation.cpp | 48 ++++++ src/parser/GraphPatternOperation.h | 20 ++- .../sparqlParser/SparqlQleverVisitor.cpp | 65 ++++--- src/parser/sparqlParser/SparqlQleverVisitor.h | 2 +- test/PathSearchTest.cpp | 90 ++++++++-- test/QueryPlannerTest.cpp | 161 ++++++++++++++++++ test/QueryPlannerTestHelpers.h | 18 ++ 12 files changed, 475 insertions(+), 64 deletions(-) diff --git a/src/engine/CheckUsePatternTrick.cpp b/src/engine/CheckUsePatternTrick.cpp index 5a0fde8cfb..ed297d5d60 100644 --- a/src/engine/CheckUsePatternTrick.cpp +++ b/src/engine/CheckUsePatternTrick.cpp @@ -69,7 +69,8 @@ bool isVariableContainedInGraphPatternOperation( } else if constexpr (std::is_same_v) { return ad_utility::contains(arg.visibleVariables_, variable); } else { - static_assert(std::is_same_v); + static_assert(std::is_same_v || + std::is_same_v); // The `TransPath` is set up later in the query planning, when this // function should not be called anymore. AD_FAIL(); diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 90c9fe389c..4116a92250 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -10,6 +10,8 @@ #include #include "engine/CallFixedSize.h" +#include "engine/VariableToColumnMap.h" +#include "parser/GraphPatternOperation.h" #include "util/Exception.h" // _____________________________________________________________________________ @@ -23,7 +25,16 @@ PathSearch::PathSearch(QueryExecutionContext* qec, indexToId_(), idToIndex_(allocator()) { AD_CORRECTNESS_CHECK(qec != nullptr); - resultWidth_ = 4 + config_.edgePropertyIndices_.size(); + resultWidth_ = 4 + config_.edgeProperties_.size(); + variableColumns_[config_.start_] = makeAlwaysDefinedColumn(0); + variableColumns_[config_.end_] = makeAlwaysDefinedColumn(1); + variableColumns_[config_.pathColumn_] = makeAlwaysDefinedColumn(2); + variableColumns_[config_.edgeColumn_] = makeAlwaysDefinedColumn(3); + + for (size_t edgePropertyIndex = 0; edgePropertyIndex < config_.edgeProperties_.size(); edgePropertyIndex++) { + auto edgeProperty = config_.edgeProperties_[edgePropertyIndex]; + variableColumns_[edgeProperty] = makeAlwaysDefinedColumn(4 + edgePropertyIndex); + } } // _____________________________________________________________________________ @@ -83,18 +94,21 @@ ResultTable PathSearch::computeResult() { const IdTable& dynSub = subRes->idTable(); - std::vector> edgePropertyLists; - for (auto edgePropertyIndex : config_.edgePropertyIndices_) { - edgePropertyLists.push_back(dynSub.getColumn(edgePropertyIndex)); - } + if (dynSub.size() > 0) { + std::vector> edgePropertyLists; + for (auto edgeProperty : config_.edgeProperties_) { + auto edgePropertyIndex = subtree_->getVariableColumn(edgeProperty); + edgePropertyLists.push_back(dynSub.getColumn(edgePropertyIndex)); + } - buildGraph(dynSub.getColumn(config_.startColumn_), - dynSub.getColumn(config_.endColumn_), edgePropertyLists); + buildGraph(dynSub.getColumn(getStartIndex()), + dynSub.getColumn(getEndIndex()), edgePropertyLists); - auto paths = findPaths(); + auto paths = findPaths(); - CALL_FIXED_SIZE(std::array{getResultWidth()}, &PathSearch::pathsToResultTable, - this, idTable, paths); + CALL_FIXED_SIZE(std::array{getResultWidth()}, &PathSearch::pathsToResultTable, + this, idTable, paths); + } return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; }; @@ -160,6 +174,7 @@ std::vector PathSearch::findPaths() const { std::vector PathSearch::allPaths() const { std::vector paths; Path path; + auto startIndex = idToIndex_.at(config_.source_); std::unordered_set targets; @@ -215,10 +230,10 @@ void PathSearch::pathsToResultTable(IdTable& tableDyn, auto edge = path.edges_[edgeIndex]; auto [start, end] = edge.toIds(); table.emplace_back(); - table(rowIndex, config_.startColumn_) = start; - table(rowIndex, config_.endColumn_) = end; - table(rowIndex, config_.pathIndexColumn_) = Id::makeFromInt(pathIndex); - table(rowIndex, config_.edgeIndexColumn_) = Id::makeFromInt(edgeIndex); + table(rowIndex, getStartIndex()) = start; + table(rowIndex, getEndIndex()) = end; + table(rowIndex, getPathIndex()) = Id::makeFromInt(pathIndex); + table(rowIndex, getEdgeIndex()) = Id::makeFromInt(edgeIndex); for (size_t edgePropertyIndex = 0; edgePropertyIndex < edge.edgeProperties_.size(); diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index ac75a4619d..d0312f3734 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -2,6 +2,8 @@ // Chair of Algorithms and Data Structures. // Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) +#pragma once + #include #include #include @@ -15,17 +17,18 @@ #include #include "engine/Operation.h" -#include "engine/QueryExecutionTree.h" +#include "engine/VariableToColumnMap.h" #include "global/Id.h" +#include "index/Vocabulary.h" // We deliberately use the `std::` variants of a hash set and hash map because // `absl`s types are not exception safe. -struct HashId { +struct IdHash { auto operator()(Id id) const { return std::hash{}(id.getBits()); } }; -using Map = std::unordered_map< - Id, size_t, HashId, std::equal_to, +using IdToNodeMap = std::unordered_map< + Id, size_t, IdHash, std::equal_to, ad_utility::AllocatorWithLimit>>; struct Edge { @@ -59,6 +62,10 @@ struct Path { return !empty() ? std::optional{edges_.back().end_} : std::nullopt; } + + bool ends_with(uint64_t node) const { + return (!empty() && node == lastNode().value()); + } }; typedef boost::adjacency_list targets_; - size_t startColumn_; - size_t endColumn_; - size_t pathIndexColumn_; - size_t edgeIndexColumn_; - std::vector edgePropertyIndices_; + Variable start_; + Variable end_; + Variable pathColumn_; + Variable edgeColumn_; + std::vector edgeProperties_; }; class PathSearch : public Operation { @@ -174,9 +179,11 @@ class PathSearch : public Operation { Graph graph_; PathSearchConfiguration config_; + Id source_; + std::vector targets_; std::vector indexToId_; - Map idToIndex_; + IdToNodeMap idToIndex_; public: PathSearch(QueryExecutionContext* qec, @@ -184,6 +191,16 @@ class PathSearch : public Operation { PathSearchConfiguration config); std::vector getChildren() override; + const Id& getSource() const { return source_; } + const std::vector& getTargets() const { return targets_; } + + const PathSearchConfiguration& getConfig() const { return config_; } + + ColumnIndex getStartIndex() const { return variableColumns_.at(config_.start_).columnIndex_; } + ColumnIndex getEndIndex() const { return variableColumns_.at(config_.end_).columnIndex_; } + ColumnIndex getPathIndex() const { return variableColumns_.at(config_.pathColumn_).columnIndex_; } + ColumnIndex getEdgeIndex() const { return variableColumns_.at(config_.edgeColumn_).columnIndex_; } + string getCacheKeyImpl() const override; string getDescriptor() const override; diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index c743338a29..f42e98eb69 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -24,6 +24,7 @@ #include "engine/NeutralElementOperation.h" #include "engine/OptionalJoin.h" #include "engine/OrderBy.h" +#include "engine/PathSearch.h" #include "engine/Service.h" #include "engine/Sort.h" #include "engine/TextIndexScanForEntity.h" @@ -34,6 +35,7 @@ #include "engine/Values.h" #include "parser/Alias.h" #include "parser/SparqlParserHelpers.h" +#include "util/Exception.h" namespace p = parsedQuery; namespace { @@ -1961,6 +1963,8 @@ void QueryPlanner::GraphPatternPlanner::graphPatternOperationVisitor(Arg& arg) { c.type = SubtreePlan::MINUS; } visitGroupOptionalOrMinus(std::move(candidates)); + } else if constexpr (std::is_same_v) { + visitPathSearch(arg); } else { static_assert(std::is_same_v); visitBasicGraphPattern(arg); @@ -2068,6 +2072,43 @@ void QueryPlanner::GraphPatternPlanner::visitTransitivePath( visitGroupOptionalOrMinus(std::move(candidatesOut)); } +// _______________________________________________________________ +void QueryPlanner::GraphPatternPlanner::visitPathSearch( + parsedQuery::PathQuery& pathQuery) { + auto candidatesIn = planner_.optimize(&pathQuery.childGraphPattern_); + std::vector candidatesOut; + auto tripleComponentToId = [this](TripleComponent& comp) -> Id { + auto opt = comp.toValueId(planner_._qec->getIndex().getVocab()); + if (opt.has_value()) { + return opt.value(); + } else { + AD_THROW("No vocabulary entry for " + comp.toString()); + } + }; + auto source = tripleComponentToId(pathQuery.source_); + std::vector targets; + for (auto comp: pathQuery.targets_) { + targets.push_back(tripleComponentToId(comp)); + } + auto config = PathSearchConfiguration{ + pathQuery.algorithm_, + std::move(source), + std::move(targets), + std::move(pathQuery.start_.value()), + std::move(pathQuery.end_.value()), + std::move(pathQuery.pathColumn_.value()), + std::move(pathQuery.edgeColumn_.value()), + std::move(pathQuery.edgeProperties_) + }; + + for (auto& sub : candidatesIn) { + auto pathSearch = std::make_shared(PathSearch(qec_, std::move(sub._qet), config)); + auto plan = makeSubtreePlan(std::move(pathSearch)); + candidatesOut.push_back(std::move(plan)); + } + visitGroupOptionalOrMinus(std::move(candidatesOut)); +} + // _______________________________________________________________ void QueryPlanner::GraphPatternPlanner::visitUnion(parsedQuery::Union& arg) { // TODO here we could keep all the candidates, and create a diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 980741fa8b..1547bdfe81 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -494,6 +494,7 @@ class QueryPlanner { void visitBasicGraphPattern(const parsedQuery::BasicGraphPattern& pattern); void visitBind(const parsedQuery::Bind& bind); void visitTransitivePath(parsedQuery::TransPath& transitivePath); + void visitPathSearch(parsedQuery::PathQuery& config); void visitUnion(parsedQuery::Union& un); void visitSubquery(parsedQuery::Subquery& subquery); diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index bddd21c5e9..de0173e1e7 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -67,6 +67,54 @@ void BasicGraphPattern::appendTriples(BasicGraphPattern other) { ad_utility::appendVector(_triples, std::move(other._triples)); } +// ____________________________________________________________________________ +void PathQuery::addParameter(SparqlTriple& triple) { + auto simpleTriple = triple.getSimple(); + TripleComponent predicate = simpleTriple.p_; + TripleComponent object = simpleTriple.o_; + AD_CORRECTNESS_CHECK(predicate.isIri()); + if (predicate.getIri().toStringRepresentation().ends_with("source>")) { + AD_CORRECTNESS_CHECK(object.isIri()); + source_ = std::move(object); + } else if (predicate.getIri().toStringRepresentation().ends_with("target>")) { + AD_CORRECTNESS_CHECK(object.isIri()); + targets_.push_back(std::move(object)); + } else if (predicate.getIri().toStringRepresentation().ends_with("start>")) { + AD_CORRECTNESS_CHECK(object.isVariable()); + start_ = object.getVariable(); + } else if (predicate.getIri().toStringRepresentation().ends_with("end>")) { + AD_CORRECTNESS_CHECK(object.isVariable()); + end_ = object.getVariable(); + } else if (predicate.getIri().toStringRepresentation().ends_with("pathColumn>")) { + AD_CORRECTNESS_CHECK(object.isVariable()); + pathColumn_ = object.getVariable(); + } else if (predicate.getIri().toStringRepresentation().ends_with("edgeColumn>")) { + AD_CORRECTNESS_CHECK(object.isVariable()); + edgeColumn_ = object.getVariable(); + } else if (predicate.getIri().toStringRepresentation().ends_with("edgeProperty>")) { + AD_CORRECTNESS_CHECK(object.isVariable()); + edgeProperties_.push_back(object.getVariable()); + } else if (predicate.getIri().toStringRepresentation().ends_with("algorithm>")) { + AD_CORRECTNESS_CHECK(object.isIri()); + if (object.getIri().toStringRepresentation().ends_with("allPaths>")) { + algorithm_ = PathSearchAlgorithm::ALL_PATHS; + } else if (object.getIri().toStringRepresentation().ends_with("shortestPaths>")) { + algorithm_ = PathSearchAlgorithm::SHORTEST_PATHS; + } else { + AD_THROW("Unsupported algorithm in PathSearch"); + } + } else { + AD_THROW("Unsupported argument in PathSearch"); + } +} + +// ____________________________________________________________________________ +void PathQuery::fromBasicPattern(const BasicGraphPattern& pattern) { + for (SparqlTriple triple: pattern._triples) { + addParameter(triple); + } +} + // ____________________________________________________________________________ cppcoro::generator Bind::containedVariables() const { for (const auto* ptr : _expression.containedVariables()) { diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 07741d14af..87b79e7354 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -8,6 +8,7 @@ #include #include +#include "engine/PathSearch.h" #include "engine/sparqlExpressions/SparqlExpressionPimpl.h" #include "parser/GraphPattern.h" #include "parser/TripleComponent.h" @@ -136,6 +137,22 @@ struct TransPath { GraphPattern _childGraphPattern; }; +struct PathQuery { + TripleComponent source_; + std::vector targets_; + std::optional start_; + std::optional end_; + std::optional pathColumn_; + std::optional edgeColumn_; + std::vector edgeProperties_; + PathSearchAlgorithm algorithm_; + + GraphPattern childGraphPattern_; + + void addParameter(SparqlTriple& triple); + void fromBasicPattern(const BasicGraphPattern& pattern); +}; + // A SPARQL Bind construct. struct Bind { sparqlExpression::SparqlExpressionPimpl _expression; @@ -152,7 +169,8 @@ struct Bind { // class actually becomes `using GraphPatternOperation = std::variant<...>` using GraphPatternOperationVariant = std::variant; + Values, Service, PathQuery, Minus, + GroupGraphPattern>; struct GraphPatternOperation : public GraphPatternOperationVariant, public VisitMixin { diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 073a725fdf..e44fff2699 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -459,7 +459,7 @@ GraphPatternOperation Visitor::visit(Parser::OptionalGraphPatternContext* ctx) { } // Parsing for the `serviceGraphPattern` rule. -parsedQuery::Service Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { +GraphPatternOperation Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { // If SILENT is specified, report that we do not support it yet. // // TODO: Support it, it's not hard. The semantics of SILENT is that if no @@ -484,6 +484,29 @@ parsedQuery::Service Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { } AD_CONTRACT_CHECK(std::holds_alternative(varOrIri)); Iri serviceIri = std::get(varOrIri); + if (serviceIri.toSparql() == + "") { + + auto parsePathQuery = [](parsedQuery::PathQuery& pathQuery, const parsedQuery::GraphPatternOperation& op){ + if (std::holds_alternative(op)) { + pathQuery.fromBasicPattern(std::get(op)); + } else if (std::holds_alternative(op)) { + auto pattern = std::get(op); + pathQuery.childGraphPattern_ = std::move(pattern._child); + } else { + AD_THROW("Unsupported argument in PathSearch"); + } + }; + + parsedQuery::GraphPattern graphPattern = visit(ctx->groupGraphPattern()); + parsedQuery::PathQuery pathQuery; + for (auto op: graphPattern._graphPatterns) { + parsePathQuery(pathQuery, op); + } + + return pathQuery; + }; + // Parse the body of the SERVICE query. Add the visible variables from the // SERVICE clause to the visible variables so far, but also remember them // separately (with duplicates removed) because we need them in `Service.cpp` @@ -499,9 +522,9 @@ parsedQuery::Service Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { visibleVariablesServiceQuery.begin(), visibleVariablesServiceQuery.end()); // Create suitable `parsedQuery::Service` object and return it. - return {std::move(visibleVariablesServiceQuery), std::move(serviceIri), - prologueString_, - getOriginalInputForContext(ctx->groupGraphPattern())}; + return parsedQuery::Service{ + std::move(visibleVariablesServiceQuery), std::move(serviceIri), + prologueString_, getOriginalInputForContext(ctx->groupGraphPattern())}; } // ____________________________________________________________________________ @@ -1724,24 +1747,28 @@ ExpressionPtr Visitor::visit([[maybe_unused]] Parser::BuiltInCallContext* ctx) { using namespace sparqlExpression; // Create the expression using the matching factory function from // `NaryExpression.h`. - auto createUnary = [&argList](Function function) - requires std::is_invocable_r_v { + auto createUnary = + [&argList](Function function) + requires std::is_invocable_r_v + { AD_CORRECTNESS_CHECK(argList.size() == 1, argList.size()); return function(std::move(argList[0])); }; - auto createBinary = [&argList](Function function) - requires std::is_invocable_r_v { - AD_CORRECTNESS_CHECK(argList.size() == 2); - return function(std::move(argList[0]), std::move(argList[1])); - }; - auto createTernary = [&argList](Function function) - requires std::is_invocable_r_v { - AD_CORRECTNESS_CHECK(argList.size() == 3); - return function(std::move(argList[0]), std::move(argList[1]), - std::move(argList[2])); - }; + auto createBinary = + [&argList](Function function) + requires std::is_invocable_r_v { + AD_CORRECTNESS_CHECK(argList.size() == 2); + return function(std::move(argList[0]), std::move(argList[1])); + }; + auto createTernary = + [&argList](Function function) + requires std::is_invocable_r_v { + AD_CORRECTNESS_CHECK(argList.size() == 3); + return function(std::move(argList[0]), std::move(argList[1]), + std::move(argList[2])); + }; if (functionName == "str") { return createUnary(&makeStrExpression); } else if (functionName == "strlen") { diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h index 0b00af4aeb..73766bfbd8 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.h +++ b/src/parser/sparqlParser/SparqlQleverVisitor.h @@ -221,7 +221,7 @@ class SparqlQleverVisitor { [[noreturn]] static parsedQuery::GraphPatternOperation visit( const Parser::GraphGraphPatternContext* ctx); - [[nodiscard]] parsedQuery::Service visit( + [[nodiscard]] GraphPatternOperation visit( Parser::ServiceGraphPatternContext* ctx); [[nodiscard]] parsedQuery::GraphPatternOperation visit( diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 883008cf15..f6912100bc 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -17,6 +17,7 @@ using ad_utility::testing::getQec; namespace { auto V = ad_utility::testing::VocabId; auto I = ad_utility::testing::IntId; +using Var = Variable; using Vars = std::vector>; } // namespace @@ -33,10 +34,24 @@ ResultTable performPathSearch(PathSearchConfiguration config, IdTable input, TEST(PathSearchTest, constructor) { auto qec = getQec(); - PathSearchConfiguration config{ALL_PATHS, V(0), {V(1)}, 0, 1, 2, 3, {}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(1)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; PathSearch p = PathSearch(qec, nullptr, config); } +TEST(PathSearchTest, emptyGraph) { + auto sub = makeIdTableFromVector({}); + sub.setNumColumns(2); + auto expected = makeIdTableFromVector({}); + expected.setNumColumns(4); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + /** * Graph: * 0 -> 1 -> 2 -> 3 -> 4 @@ -51,7 +66,7 @@ TEST(PathSearchTest, singlePath) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, 0, 1, 2, 3, {}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -69,7 +84,7 @@ TEST(PathSearchTest, singlePathWithProperties) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, 0, 1, 2, 3, {2}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {Var{"?edgeProperty"}}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -86,7 +101,7 @@ TEST(PathSearchTest, singlePathWithDijkstra) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, 0, 1, 2, 3, {}}; + PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -104,7 +119,7 @@ TEST(PathSearchTest, singlePathWithDijkstraAndProperties) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; - PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, 0, 1, 2, 3, {2}}; + PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {Var{"?edgeProperty"}}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -129,7 +144,7 @@ TEST(PathSearchTest, twoPathsOneTarget) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(2)}, 0, 1, 2, 3, {}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(2)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -154,7 +169,7 @@ TEST(PathSearchTest, twoPathsTwoTargets) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(2), V(4)}, 0, 1, 2, 3, {}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(2), V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -178,7 +193,7 @@ TEST(PathSearchTest, cycle) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(0)}, 0, 1, 2, 3, {}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(0)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -205,7 +220,7 @@ TEST(PathSearchTest, twoCycle) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(0)}, 0, 1, 2, 3, {}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(0)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -235,7 +250,7 @@ TEST(PathSearchTest, allPaths) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {}, 0, 1, 2, 3, {}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -261,7 +276,7 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty1"}, Variable{"?edgeProperty2"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {}, 0, 1, 2, 3, {3, 2}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {Var{"?edgeProperty2"}, Var{"?edgeProperty1"}}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -287,7 +302,7 @@ TEST(PathSearchTest, singleShortestPath) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, 0, 1, 2, 3, {}}; + PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -316,9 +331,58 @@ TEST(PathSearchTest, twoShortestPaths) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(5)}, 0, 1, 2, 3, {}}; + PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(5)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + + +/** + * Graph: + * 0 -> 1 -> 2 -> 3 -> 4 + * ^ + * / + * 5 + */ +TEST(PathSearchTest, singlePathWithIrrelevantNode) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}, {5, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(3), I(0), I(2)}, + {V(3), V(4), I(0), I(3)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, shortestPathWithIrrelevantNode) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}, {5, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(3), I(0), I(2)}, + {V(3), V(4), I(0), I(3)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } + + + + + + + diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 6b950301d2..bc1e627ce9 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -6,6 +6,7 @@ #include "QueryPlannerTestHelpers.h" #include "engine/QueryPlanner.h" +#include "parser/GraphPatternOperation.h" #include "parser/SparqlParser.h" #include "util/TripleComponentTestHelpers.h" @@ -771,6 +772,166 @@ TEST(QueryPlanner, TransitivePathBindRight) { ad_utility::testing::getQec("

.

")); } +TEST(QueryPlanner, PathSearchSingleTarget) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + PathSearchConfiguration config{ + ALL_PATHS, + getId(""), + {getId("")}, + Variable("?start"), + Variable("?end"), + Variable("?pathColumn"), + Variable("?edgeColumn"), + {} + }; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch( + config, + scan("?start", "

", "?end")), + qec); +} + +TEST(QueryPlanner, PathSearchMultipleTargets) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + PathSearchConfiguration config{ + ALL_PATHS, + getId(""), + {getId(""), getId("")}, + Variable("?start"), + Variable("?end"), + Variable("?pathColumn"), + Variable("?edgeColumn"), + {} + }; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch( + config, + scan("?start", "

", "?end")), + qec); +} + +TEST(QueryPlanner, PathSearchWithEdgeProperties) { + auto scan = h::IndexScanFromStrings; + auto join = h::Join; + auto qec = ad_utility::testing::getQec(" . . . "); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + PathSearchConfiguration config{ + ALL_PATHS, + getId(""), + {getId("")}, + Variable("?start"), + Variable("?end"), + Variable("?pathColumn"), + Variable("?edgeColumn"), + {Variable("?middle")} + }; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:edgeProperty ?middle;" + "{SELECT * WHERE {" + "?start ?middle." + "?middle ?end." + "}}}}", + h::PathSearch( + config, + join(scan("?start", "", "?middle"), + scan("?middle", "", "?end"))), + qec); +} + +TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { + auto scan = h::IndexScanFromStrings; + auto join = h::Join; + auto qec = ad_utility::testing::getQec( + " ." + " ." + " ." + " ." + " ." + " " + ); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + PathSearchConfiguration config{ + ALL_PATHS, + getId(""), + {getId(""), getId("")}, + Variable("?start"), + Variable("?end"), + Variable("?pathColumn"), + Variable("?edgeColumn"), + {Variable("?middle"), Variable("?middleAttribute")} + }; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:edgeProperty ?middle;" + "pathSearch:edgeProperty ?middleAttribute;" + "{SELECT * WHERE {" + "?start ?middle." + "?middle ?middleAttribute." + "?middle ?end." + "}}}}", + h::PathSearch( + config, + join(scan("?start", "", "?middle"), + join(scan("?middle", "", "?middleAttribute"), + scan("?middle", "", "?end")))), + qec); +} + // __________________________________________________________________________ TEST(QueryPlanner, BindAtBeginningOfQuery) { h::expect( diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index e73ab6b891..7ab9a0d1cc 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -13,6 +13,7 @@ #include "engine/MultiColumnJoin.h" #include "engine/NeutralElementOperation.h" #include "engine/OrderBy.h" +#include "engine/PathSearch.h" #include "engine/QueryExecutionTree.h" #include "engine/QueryPlanner.h" #include "engine/Sort.h" @@ -256,6 +257,23 @@ inline auto TransitivePath = TransitivePathSideMatcher(right)))); }; +inline auto PathSearchConfigMatcher = [](PathSearchConfiguration config) { + return AllOf( + AD_FIELD(PathSearchConfiguration, algorithm_, Eq(config.algorithm_)) + ); +}; + +// Match a PathSearch operation +inline auto PathSearch = + [](PathSearchConfiguration config, const std::same_as auto&... childMatchers) { + return RootOperation<::PathSearch>( + AllOf(Property("getChildren", &Operation::getChildren, + ElementsAre(Pointee(childMatchers)...)), + AD_PROPERTY(PathSearch, getConfig, PathSearchConfigMatcher(config)) + ) + ); + }; + // Match a sort operation. Currently, this is only required by the binary search // version of the transitive path operation. This matcher checks only the // children of the sort operation. From f228eb622916e5dfc882627da84d86c990293bae Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 19 Jun 2024 20:36:25 +0200 Subject: [PATCH 14/96] Moved visitors to new file --- src/engine/PathSearch.h | 136 +----------------------------- src/engine/PathSearchVisitors.h | 144 ++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 135 deletions(-) create mode 100644 src/engine/PathSearchVisitors.h diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index d0312f3734..31346c3c31 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -4,20 +4,14 @@ #pragma once -#include -#include -#include -#include -#include -#include #include -#include #include #include #include #include "engine/Operation.h" #include "engine/VariableToColumnMap.h" +#include "engine/PathSearchVisitors.h" #include "global/Id.h" #include "index/Vocabulary.h" @@ -31,134 +25,6 @@ using IdToNodeMap = std::unordered_map< Id, size_t, IdHash, std::equal_to, ad_utility::AllocatorWithLimit>>; -struct Edge { - uint64_t start_; - uint64_t end_; - std::vector edgeProperties_; - double weight_ = 1; - - std::pair toIds() const { - return {Id::fromBits(start_), Id::fromBits(end_)}; - } -}; - -struct Path { - std::vector edges_; - - bool empty() const { return edges_.empty(); } - - size_t size() const { return edges_.size(); } - - void push_back(Edge edge) { edges_.push_back(edge); } - - void reverse() { std::reverse(edges_.begin(), edges_.end()); } - - std::optional firstNode() const { - return !empty() ? std::optional{edges_.front().start_} - : std::nullopt; - } - - std::optional lastNode() const { - return !empty() ? std::optional{edges_.back().end_} - : std::nullopt; - } - - bool ends_with(uint64_t node) const { - return (!empty() && node == lastNode().value()); - } -}; - -typedef boost::adjacency_list - Graph; -typedef boost::graph_traits::vertex_descriptor VertexDescriptor; -typedef boost::graph_traits::edge_descriptor EdgeDescriptor; - -class AllPathsVisitor : public boost::default_dfs_visitor { - std::unordered_set targets_; - Path& currentPath_; - std::vector& allPaths_; - - const std::vector& indexToId_; - - public: - AllPathsVisitor(std::unordered_set targets, Path& path, - std::vector& paths, const std::vector& indexToId) - : targets_(std::move(targets)), - currentPath_(path), - allPaths_(paths), - indexToId_(indexToId) {} - - void examine_edge(EdgeDescriptor edgeDesc, const Graph& graph) { - const Edge& edge = graph[edgeDesc]; - if (targets_.empty() || (currentPath_.ends_with(edge.start_) && targets_.find(edge.end_) != targets_.end())) { - auto pathCopy = currentPath_; - pathCopy.push_back(edge); - allPaths_.push_back(pathCopy); - } - } - - void tree_edge(EdgeDescriptor edgeDesc, const Graph& graph) { - const Edge& edge = graph[edgeDesc]; - currentPath_.edges_.push_back(edge); - } - - void finish_vertex(VertexDescriptor vertex, const Graph& graph) { - (void)graph; - if (!currentPath_.empty() && Id::fromBits(currentPath_.lastNode().value()) == indexToId_[vertex]) { - currentPath_.edges_.pop_back(); - } - } -}; - -class DijkstraAllPathsVisitor : public boost::default_dijkstra_visitor { - VertexDescriptor source_; - std::unordered_set targets_; - Path& currentPath_; - std::vector& allPaths_; - std::vector& predecessors_; - std::vector& distances_; - - public: - DijkstraAllPathsVisitor(VertexDescriptor source, - std::unordered_set targets, Path& path, - std::vector& paths, - std::vector& predecessors, - std::vector& distances) - : source_(source), - targets_(std::move(targets)), - currentPath_(path), - allPaths_(paths), - predecessors_(predecessors), - distances_(distances) {} - - const std::vector& getPredecessors() const { - return predecessors_; - } - const std::vector& getDistances() const { return distances_; } - - void edge_relaxed(EdgeDescriptor edgeDesc, const Graph& graph) { - const Edge& edge = graph[edgeDesc]; - if (targets_.empty() || targets_.find(edge.end_) != targets_.end()) { - rebuild_path(target(edgeDesc, graph), graph); - } - } - - void rebuild_path(VertexDescriptor vertex, const Graph& graph) { - currentPath_.edges_.clear(); - for (VertexDescriptor v = vertex; v != source_; v = predecessors_[v]) { - EdgeDescriptor e; - bool exists; - boost::tie(e, exists) = edge(predecessors_[v], v, graph); - if (exists) { - currentPath_.push_back(graph[e]); - } - } - currentPath_.reverse(); - allPaths_.push_back(currentPath_); - } -}; - enum PathSearchAlgorithm { ALL_PATHS, SHORTEST_PATHS }; struct PathSearchConfiguration { diff --git a/src/engine/PathSearchVisitors.h b/src/engine/PathSearchVisitors.h new file mode 100644 index 0000000000..183db37fe1 --- /dev/null +++ b/src/engine/PathSearchVisitors.h @@ -0,0 +1,144 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + + +struct Edge { + uint64_t start_; + uint64_t end_; + std::vector edgeProperties_; + double weight_ = 1; + + std::pair toIds() const { + return {Id::fromBits(start_), Id::fromBits(end_)}; + } +}; + +struct Path { + std::vector edges_; + + bool empty() const { return edges_.empty(); } + + size_t size() const { return edges_.size(); } + + void push_back(Edge edge) { edges_.push_back(edge); } + + void reverse() { std::reverse(edges_.begin(), edges_.end()); } + + std::optional firstNode() const { + return !empty() ? std::optional{edges_.front().start_} + : std::nullopt; + } + + std::optional lastNode() const { + return !empty() ? std::optional{edges_.back().end_} + : std::nullopt; + } + + bool ends_with(uint64_t node) const { + return (!empty() && node == lastNode().value()); + } +}; + + +typedef boost::adjacency_list + Graph; +typedef boost::graph_traits::vertex_descriptor VertexDescriptor; +typedef boost::graph_traits::edge_descriptor EdgeDescriptor; + +class AllPathsVisitor : public boost::default_dfs_visitor { + std::unordered_set targets_; + Path& currentPath_; + std::vector& allPaths_; + + const std::vector& indexToId_; + + public: + AllPathsVisitor(std::unordered_set targets, Path& path, + std::vector& paths, const std::vector& indexToId) + : targets_(std::move(targets)), + currentPath_(path), + allPaths_(paths), + indexToId_(indexToId) {} + + void examine_edge(EdgeDescriptor edgeDesc, const Graph& graph) { + const Edge& edge = graph[edgeDesc]; + if (targets_.empty() || (currentPath_.ends_with(edge.start_) && targets_.find(edge.end_) != targets_.end())) { + auto pathCopy = currentPath_; + pathCopy.push_back(edge); + allPaths_.push_back(pathCopy); + } + } + + void tree_edge(EdgeDescriptor edgeDesc, const Graph& graph) { + const Edge& edge = graph[edgeDesc]; + currentPath_.edges_.push_back(edge); + } + + void finish_vertex(VertexDescriptor vertex, const Graph& graph) { + (void)graph; + if (!currentPath_.empty() && Id::fromBits(currentPath_.lastNode().value()) == indexToId_[vertex]) { + currentPath_.edges_.pop_back(); + } + } +}; + +class DijkstraAllPathsVisitor : public boost::default_dijkstra_visitor { + VertexDescriptor source_; + std::unordered_set targets_; + Path& currentPath_; + std::vector& allPaths_; + std::vector& predecessors_; + std::vector& distances_; + + public: + DijkstraAllPathsVisitor(VertexDescriptor source, + std::unordered_set targets, Path& path, + std::vector& paths, + std::vector& predecessors, + std::vector& distances) + : source_(source), + targets_(std::move(targets)), + currentPath_(path), + allPaths_(paths), + predecessors_(predecessors), + distances_(distances) {} + + const std::vector& getPredecessors() const { + return predecessors_; + } + const std::vector& getDistances() const { return distances_; } + + void edge_relaxed(EdgeDescriptor edgeDesc, const Graph& graph) { + const Edge& edge = graph[edgeDesc]; + if (targets_.empty() || targets_.find(edge.end_) != targets_.end()) { + rebuild_path(target(edgeDesc, graph), graph); + } + } + + void rebuild_path(VertexDescriptor vertex, const Graph& graph) { + currentPath_.edges_.clear(); + for (VertexDescriptor v = vertex; v != source_; v = predecessors_[v]) { + EdgeDescriptor e; + bool exists; + boost::tie(e, exists) = edge(predecessors_[v], v, graph); + if (exists) { + currentPath_.push_back(graph[e]); + } + } + currentPath_.reverse(); + allPaths_.push_back(currentPath_); + } +}; From 3966a69fd1edfcda51578cb2ee6293009a1a2c53 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Sun, 23 Jun 2024 16:33:52 +0200 Subject: [PATCH 15/96] Fixed a bug where the wrong sub columns were read --- src/engine/PathSearch.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 4116a92250..3135fa00ed 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -101,8 +101,11 @@ ResultTable PathSearch::computeResult() { edgePropertyLists.push_back(dynSub.getColumn(edgePropertyIndex)); } - buildGraph(dynSub.getColumn(getStartIndex()), - dynSub.getColumn(getEndIndex()), edgePropertyLists); + + auto subStartColumn = subtree_->getVariableColumn(config_.start_); + auto subEndColumn = subtree_->getVariableColumn(config_.end_); + buildGraph(dynSub.getColumn(subStartColumn), + dynSub.getColumn(subEndColumn), edgePropertyLists); auto paths = findPaths(); From 1aa8350658440660885c88f56b861bdb0378f6fd Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Sun, 23 Jun 2024 16:57:29 +0200 Subject: [PATCH 16/96] Fixed QueryPlanner PathSearch tests --- test/QueryPlannerTest.cpp | 16 ++++++++-------- test/QueryPlannerTestHelpers.h | 9 ++++++++- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index bc1e627ce9..d374994a35 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -783,8 +783,8 @@ TEST(QueryPlanner, PathSearchSingleTarget) { {getId("")}, Variable("?start"), Variable("?end"), - Variable("?pathColumn"), - Variable("?edgeColumn"), + Variable("?path"), + Variable("?edge"), {} }; h::expect( @@ -818,8 +818,8 @@ TEST(QueryPlanner, PathSearchMultipleTargets) { {getId(""), getId("")}, Variable("?start"), Variable("?end"), - Variable("?pathColumn"), - Variable("?edgeColumn"), + Variable("?path"), + Variable("?edge"), {} }; h::expect( @@ -855,8 +855,8 @@ TEST(QueryPlanner, PathSearchWithEdgeProperties) { {getId("")}, Variable("?start"), Variable("?end"), - Variable("?pathColumn"), - Variable("?edgeColumn"), + Variable("?path"), + Variable("?edge"), {Variable("?middle")} }; h::expect( @@ -901,8 +901,8 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { {getId(""), getId("")}, Variable("?start"), Variable("?end"), - Variable("?pathColumn"), - Variable("?edgeColumn"), + Variable("?path"), + Variable("?edge"), {Variable("?middle"), Variable("?middleAttribute")} }; h::expect( diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index 7ab9a0d1cc..d380978a2a 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -259,7 +259,14 @@ inline auto TransitivePath = inline auto PathSearchConfigMatcher = [](PathSearchConfiguration config) { return AllOf( - AD_FIELD(PathSearchConfiguration, algorithm_, Eq(config.algorithm_)) + AD_FIELD(PathSearchConfiguration, algorithm_, Eq(config.algorithm_)), + AD_FIELD(PathSearchConfiguration, source_, Eq(config.source_)), + AD_FIELD(PathSearchConfiguration, start_, Eq(config.start_)), + AD_FIELD(PathSearchConfiguration, targets_, UnorderedElementsAreArray(config.targets_)), + AD_FIELD(PathSearchConfiguration, end_, Eq(config.end_)), + AD_FIELD(PathSearchConfiguration, pathColumn_, Eq(config.pathColumn_)), + AD_FIELD(PathSearchConfiguration, edgeColumn_, Eq(config.edgeColumn_)), + AD_FIELD(PathSearchConfiguration, edgeProperties_, UnorderedElementsAreArray(config.edgeProperties_)) ); }; From 536e5fea0d4e137c875c5f657f875e08e0c4d499 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Sun, 23 Jun 2024 23:27:35 +0200 Subject: [PATCH 17/96] Added documentation to PathSearch and visitors --- src/engine/PathSearch.h | 64 +++++++++++++++-- src/engine/PathSearchVisitors.h | 124 +++++++++++++++++++++++++++++++- 2 files changed, 179 insertions(+), 9 deletions(-) diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 31346c3c31..c8876accaf 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -27,23 +27,39 @@ using IdToNodeMap = std::unordered_map< enum PathSearchAlgorithm { ALL_PATHS, SHORTEST_PATHS }; +/** + * @brief Struct to hold configuration parameters for the path search. + */ struct PathSearchConfiguration { + // The path search algorithm to use. PathSearchAlgorithm algorithm_; + // The source node ID. Id source_; - std::vector targets_; - Variable start_; - Variable end_; - Variable pathColumn_; - Variable edgeColumn_; + // A list of target node IDs. + std::vector targets_; + // Variable representing the start column in the result. + Variable start_; + // Variable representing the end column in the result. + Variable end_; + // Variable representing the path column in the result. + Variable pathColumn_; + // Variable representing the edge column in the result. + Variable edgeColumn_; + // Variables representing edge property columns. std::vector edgeProperties_; }; +/** + * @brief Class to perform various path search algorithms on a graph. + */ class PathSearch : public Operation { std::shared_ptr subtree_; size_t resultWidth_; VariableToColumnMap variableColumns_; + // The graph on which the path search is performed. Graph graph_; + // Configuration for the path search. PathSearchConfiguration config_; Id source_; std::vector targets_; @@ -57,8 +73,8 @@ class PathSearch : public Operation { PathSearchConfiguration config); std::vector getChildren() override; - const Id& getSource() const { return source_; } - const std::vector& getTargets() const { return targets_; } + const Id& getSource() const { return config_.source_; } + const std::vector& getTargets() const { return config_.targets_; } const PathSearchConfiguration& getConfig() const { return config_; } @@ -84,13 +100,47 @@ class PathSearch : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; private: + /** + * @brief Builds the graph from the given nodes and edge properties. + * @param startNodes A span of start nodes. + * @param endNodes A span of end nodes. + * @param edgePropertyLists A span of edge property lists. + */ void buildGraph(std::span startNodes, std::span endNodes, std::span> edgePropertyLists); + + /** + * @brief Builds the mapping from node IDs to indices. + * @param startNodes A span of start nodes. + * @param endNodes A span of end nodes. + */ void buildMapping(std::span startNodes, std::span endNodes); + + /** + * @brief Finds paths based on the configured algorithm. + * @return A vector of paths. + */ std::vector findPaths() const; + + /** + * @brief Finds all paths in the graph. + * @return A vector of all paths. + */ std::vector allPaths() const; + + /** + * @brief Finds the shortest paths in the graph. + * @return A vector of the shortest paths. + */ std::vector shortestPaths() const; + + /** + * @brief Converts paths to a result table with a specified width. + * @tparam WIDTH The width of the result table. + * @param tableDyn The dynamic table to store the results. + * @param paths The vector of paths to convert. + */ template void pathsToResultTable(IdTable& tableDyn, std::vector& paths) const; }; diff --git a/src/engine/PathSearchVisitors.h b/src/engine/PathSearchVisitors.h index 183db37fe1..b3ec1a9c46 100644 --- a/src/engine/PathSearchVisitors.h +++ b/src/engine/PathSearchVisitors.h @@ -13,59 +13,122 @@ #include #include - +/** + * @brief Represents an edge in the graph. + */ struct Edge { + // The starting node ID. uint64_t start_; + + // The ending node ID. uint64_t end_; + + // Properties associated with the edge. std::vector edgeProperties_; + + // The weight of the edge. double weight_ = 1; + /** + * @brief Converts the edge to a pair of IDs. + * @return A pair of IDs representing the start and end of the edge. + */ std::pair toIds() const { return {Id::fromBits(start_), Id::fromBits(end_)}; } }; +/** + * @brief Represents a path consisting of multiple edges. + */ struct Path { + // The edges that make up the path. std::vector edges_; + /** + * @brief Checks if the path is empty. + * @return True if the path is empty, false otherwise. + */ bool empty() const { return edges_.empty(); } + /** + * @brief Returns the number of edges in the path. + * @return The number of edges in the path. + */ size_t size() const { return edges_.size(); } + /** + * @brief Adds an edge to the end of the path. + * @param edge The edge to add. + */ void push_back(Edge edge) { edges_.push_back(edge); } + /** + * @brief Reverses the order of the edges in the path. + */ void reverse() { std::reverse(edges_.begin(), edges_.end()); } + /** + * @brief Returns the ID of the first node in the path, if it exists. + * @return The ID of the first node, or std::nullopt if the path is empty. + */ std::optional firstNode() const { return !empty() ? std::optional{edges_.front().start_} : std::nullopt; } + /** + * @brief Returns the ID of the last node in the path, if it exists. + * @return The ID of the last node, or std::nullopt if the path is empty. + */ std::optional lastNode() const { return !empty() ? std::optional{edges_.back().end_} : std::nullopt; } + /** + * @brief Checks if the path ends with the given node ID. + * @param node The node ID to check. + * @return True if the path ends with the given node ID, false otherwise. + */ bool ends_with(uint64_t node) const { return (!empty() && node == lastNode().value()); } }; - +/** + * @brief Boost graph types and descriptors. + */ typedef boost::adjacency_list Graph; typedef boost::graph_traits::vertex_descriptor VertexDescriptor; typedef boost::graph_traits::edge_descriptor EdgeDescriptor; +/** + * @brief Visitor for performing a depth-first search to find all paths. + */ class AllPathsVisitor : public boost::default_dfs_visitor { + // Set of target node IDs. std::unordered_set targets_; + + // Reference to the current path being explored. Path& currentPath_; + + // Reference to the collection of all found paths. std::vector& allPaths_; + // Mapping from indices to IDs. const std::vector& indexToId_; public: + /** + * @brief Constructor for AllPathsVisitor. + * @param targets Set of target node IDs. + * @param path Reference to the current path being explored. + * @param paths Reference to the collection of all found paths. + * @param indexToId Mapping from indices to IDs. + */ AllPathsVisitor(std::unordered_set targets, Path& path, std::vector& paths, const std::vector& indexToId) : targets_(std::move(targets)), @@ -73,6 +136,11 @@ class AllPathsVisitor : public boost::default_dfs_visitor { allPaths_(paths), indexToId_(indexToId) {} + /** + * @brief Examines an edge during the depth-first search. + * @param edgeDesc The descriptor of the edge being examined. + * @param graph The graph being searched. + */ void examine_edge(EdgeDescriptor edgeDesc, const Graph& graph) { const Edge& edge = graph[edgeDesc]; if (targets_.empty() || (currentPath_.ends_with(edge.start_) && targets_.find(edge.end_) != targets_.end())) { @@ -82,11 +150,21 @@ class AllPathsVisitor : public boost::default_dfs_visitor { } } + /** + * @brief Processes a tree edge during the depth-first search. + * @param edgeDesc The descriptor of the edge being processed. + * @param graph The graph being searched. + */ void tree_edge(EdgeDescriptor edgeDesc, const Graph& graph) { const Edge& edge = graph[edgeDesc]; currentPath_.edges_.push_back(edge); } + /** + * @brief Called when a vertex has been finished during the depth-first search. + * @param vertex The descriptor of the vertex being finished. + * @param graph The graph being searched. + */ void finish_vertex(VertexDescriptor vertex, const Graph& graph) { (void)graph; if (!currentPath_.empty() && Id::fromBits(currentPath_.lastNode().value()) == indexToId_[vertex]) { @@ -95,15 +173,38 @@ class AllPathsVisitor : public boost::default_dfs_visitor { } }; +/** + * @brief Visitor for performing Dijkstra's algorithm to find all shortest paths. + */ class DijkstraAllPathsVisitor : public boost::default_dijkstra_visitor { + // The source vertex descriptor. VertexDescriptor source_; + + // Set of target node IDs. std::unordered_set targets_; + + // Reference to the current path being explored. Path& currentPath_; + + // Reference to the collection of all found paths. std::vector& allPaths_; + + // Reference to the vector of predecessors. std::vector& predecessors_; + + // Reference to the vector of distances. std::vector& distances_; public: + /** + * @brief Constructor for DijkstraAllPathsVisitor. + * @param source The source vertex descriptor. + * @param targets Set of target node IDs. + * @param path Reference to the current path being explored. + * @param paths Reference to the collection of all found paths. + * @param predecessors Reference to the vector of predecessors. + * @param distances Reference to the vector of distances. + */ DijkstraAllPathsVisitor(VertexDescriptor source, std::unordered_set targets, Path& path, std::vector& paths, @@ -116,11 +217,25 @@ class DijkstraAllPathsVisitor : public boost::default_dijkstra_visitor { predecessors_(predecessors), distances_(distances) {} + /** + * @brief Returns the vector of predecessors. + * @return The vector of predecessors. + */ const std::vector& getPredecessors() const { return predecessors_; } + + /** + * @brief Returns the vector of distances. + * @return The vector of distances. + */ const std::vector& getDistances() const { return distances_; } + /** + * @brief Called when an edge is relaxed during Dijkstra's algorithm. + * @param edgeDesc The descriptor of the edge being relaxed. + * @param graph The graph being searched. + */ void edge_relaxed(EdgeDescriptor edgeDesc, const Graph& graph) { const Edge& edge = graph[edgeDesc]; if (targets_.empty() || targets_.find(edge.end_) != targets_.end()) { @@ -128,6 +243,11 @@ class DijkstraAllPathsVisitor : public boost::default_dijkstra_visitor { } } + /** + * @brief Rebuilds the path from the source to the given vertex. + * @param vertex The descriptor of the vertex. + * @param graph The graph being searched. + */ void rebuild_path(VertexDescriptor vertex, const Graph& graph) { currentPath_.edges_.clear(); for (VertexDescriptor v = vertex; v != source_; v = predecessors_[v]) { From 2451fd76ef0247dc92e158a4a81197362e045575 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 24 Jun 2024 10:30:28 +0200 Subject: [PATCH 18/96] Rename ResultTable to Result in PathSearch --- src/engine/PathSearch.cpp | 4 ++-- src/engine/PathSearch.h | 2 +- test/PathSearchTest.cpp | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 3135fa00ed..fba074fe1d 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -87,8 +87,8 @@ bool PathSearch::knownEmptyResult() { return subtree_->knownEmptyResult(); }; vector PathSearch::resultSortedOn() const { return {}; }; // _____________________________________________________________________________ -ResultTable PathSearch::computeResult() { - shared_ptr subRes = subtree_->getResult(); +Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { + std::shared_ptr subRes = subtree_->getResult(); IdTable idTable{allocator()}; idTable.setNumColumns(getResultWidth()); diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index c8876accaf..f7ad8c4b17 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -96,7 +96,7 @@ class PathSearch : public Operation { vector resultSortedOn() const override; - ResultTable computeResult() override; + Result computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; private: diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index f6912100bc..8d04c001a9 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -6,7 +6,7 @@ #include "engine/PathSearch.h" #include "engine/QueryExecutionTree.h" -#include "engine/ResultTable.h" +#include "engine/Result.h" #include "engine/ValuesForTesting.h" #include "gmock/gmock.h" #include "util/IdTableHelpers.h" @@ -22,14 +22,14 @@ using Vars = std::vector>; } // namespace -ResultTable performPathSearch(PathSearchConfiguration config, IdTable input, +Result performPathSearch(PathSearchConfiguration config, IdTable input, Vars vars) { auto qec = getQec(); auto subtree = ad_utility::makeExecutionTree( qec, std::move(input), vars); PathSearch p = PathSearch(qec, std::move(subtree), config); - return p.computeResult(); + return p.computeResult(false); } TEST(PathSearchTest, constructor) { From d700df2106d89971c0909aecc833a75bf45d3167 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 24 Jun 2024 11:17:27 +0200 Subject: [PATCH 19/96] Format fix --- src/engine/CMakeLists.txt | 34 +- src/engine/GroupByHashMapOptimization.h | 5 +- src/engine/PathSearch.cpp | 16 +- src/engine/PathSearch.h | 29 +- src/engine/PathSearchVisitors.h | 15 +- src/engine/QueryPlanner.cpp | 24 +- src/engine/idTable/IdTableRow.h | 6 +- src/engine/raw_thoughts.txt | 2 - src/engine/sparqlExpressions/CMakeLists.txt | 31 +- .../ConditionalExpressions.cpp | 68 +- .../SparqlExpressionGenerators.h | 75 +- .../sparqlExpressions/StringExpressions.cpp | 5 +- src/parser/CMakeLists.txt | 45 +- src/parser/GraphPatternOperation.cpp | 17 +- src/parser/GraphPatternOperation.h | 3 +- src/parser/sparqlParser/CMakeLists.txt | 26 +- .../sparqlParser/SparqlQleverVisitor.cpp | 9 +- src/parser/sparqlParser/SparqlQleverVisitor.h | 3 +- src/util/CMakeLists.txt | 13 +- src/util/ComparisonWithNan.h | 42 +- test/CMakeLists.txt | 810 ++++++++++++------ test/ConfigManagerTest.cpp | 49 +- test/PathSearchTest.cpp | 103 ++- test/QueryPlannerTest.cpp | 224 +++-- test/QueryPlannerTestHelpers.h | 35 +- 25 files changed, 1001 insertions(+), 688 deletions(-) diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 8c628add78..166db6fd00 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -1,17 +1,19 @@ add_subdirectory(sparqlExpressions) -add_library(SortPerformanceEstimator SortPerformanceEstimator.cpp) -qlever_target_link_libraries(SortPerformanceEstimator) -add_library(engine - Engine.cpp QueryExecutionTree.cpp Operation.cpp Result.cpp LocalVocab.cpp - IndexScan.cpp Join.cpp Sort.cpp - Distinct.cpp OrderBy.cpp Filter.cpp - Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp - OptionalJoin.cpp CountAvailablePredicates.cpp GroupBy.cpp HasPredicateScan.cpp - Union.cpp MultiColumnJoin.cpp TransitivePathBase.cpp - TransitivePathHashMap.cpp TransitivePathBinSearch.cpp Service.cpp - Values.cpp Bind.cpp Minus.cpp RuntimeInformation.cpp CheckUsePatternTrick.cpp - VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp - CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp - TextLimit.cpp - idTable/CompressedExternalIdTable.h PathSearch.cpp) -qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams) + add_library(SortPerformanceEstimator SortPerformanceEstimator.cpp) + qlever_target_link_libraries(SortPerformanceEstimator) add_library( + engine Engine.cpp QueryExecutionTree.cpp Operation.cpp Result + .cpp LocalVocab.cpp IndexScan.cpp Join.cpp Sort.cpp Distinct + .cpp OrderBy.cpp Filter.cpp Server.cpp QueryPlanner + .cpp QueryPlanningCostFactors.cpp OptionalJoin + .cpp CountAvailablePredicates.cpp GroupBy.cpp HasPredicateScan + .cpp Union.cpp MultiColumnJoin.cpp TransitivePathBase + .cpp TransitivePathHashMap.cpp TransitivePathBinSearch + .cpp Service.cpp Values.cpp Bind.cpp Minus + .cpp RuntimeInformation.cpp CheckUsePatternTrick + .cpp VariableToColumnMap.cpp ExportQueryExecutionTrees + .cpp CartesianProductJoin.cpp TextIndexScanForWord + .cpp TextIndexScanForEntity.cpp TextLimit.cpp idTable / + CompressedExternalIdTable.h PathSearch.cpp) + qlever_target_link_libraries( + engine util index parser sparqlExpressions http + SortPerformanceEstimator Boost::iostreams) diff --git a/src/engine/GroupByHashMapOptimization.h b/src/engine/GroupByHashMapOptimization.h index d0be51bf36..8dc52b01ce 100644 --- a/src/engine/GroupByHashMapOptimization.h +++ b/src/engine/GroupByHashMapOptimization.h @@ -15,9 +15,8 @@ static constexpr auto valueAdder = []() { auto numericValueAdder = [](T value, double& sum, [[maybe_unused]] const bool& error) - requires std::is_arithmetic_v { - sum += static_cast(value); - }; + requires std::is_arithmetic_v + { sum += static_cast(value); }; auto nonNumericValueAdder = [](sparqlExpression::detail::NotNumeric, [[maybe_unused]] const double& sum, bool& error) { error = true; }; diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index fba074fe1d..86862a8003 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -31,9 +31,12 @@ PathSearch::PathSearch(QueryExecutionContext* qec, variableColumns_[config_.pathColumn_] = makeAlwaysDefinedColumn(2); variableColumns_[config_.edgeColumn_] = makeAlwaysDefinedColumn(3); - for (size_t edgePropertyIndex = 0; edgePropertyIndex < config_.edgeProperties_.size(); edgePropertyIndex++) { + for (size_t edgePropertyIndex = 0; + edgePropertyIndex < config_.edgeProperties_.size(); + edgePropertyIndex++) { auto edgeProperty = config_.edgeProperties_[edgePropertyIndex]; - variableColumns_[edgeProperty] = makeAlwaysDefinedColumn(4 + edgePropertyIndex); + variableColumns_[edgeProperty] = + makeAlwaysDefinedColumn(4 + edgePropertyIndex); } } @@ -101,16 +104,15 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { edgePropertyLists.push_back(dynSub.getColumn(edgePropertyIndex)); } - auto subStartColumn = subtree_->getVariableColumn(config_.start_); auto subEndColumn = subtree_->getVariableColumn(config_.end_); - buildGraph(dynSub.getColumn(subStartColumn), - dynSub.getColumn(subEndColumn), edgePropertyLists); + buildGraph(dynSub.getColumn(subStartColumn), dynSub.getColumn(subEndColumn), + edgePropertyLists); auto paths = findPaths(); - CALL_FIXED_SIZE(std::array{getResultWidth()}, &PathSearch::pathsToResultTable, - this, idTable, paths); + CALL_FIXED_SIZE(std::array{getResultWidth()}, + &PathSearch::pathsToResultTable, this, idTable, paths); } return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index f7ad8c4b17..ccb652fbbf 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -10,8 +10,8 @@ #include #include "engine/Operation.h" -#include "engine/VariableToColumnMap.h" #include "engine/PathSearchVisitors.h" +#include "engine/VariableToColumnMap.h" #include "global/Id.h" #include "index/Vocabulary.h" @@ -36,15 +36,15 @@ struct PathSearchConfiguration { // The source node ID. Id source_; // A list of target node IDs. - std::vector targets_; + std::vector targets_; // Variable representing the start column in the result. - Variable start_; + Variable start_; // Variable representing the end column in the result. - Variable end_; + Variable end_; // Variable representing the path column in the result. - Variable pathColumn_; + Variable pathColumn_; // Variable representing the edge column in the result. - Variable edgeColumn_; + Variable edgeColumn_; // Variables representing edge property columns. std::vector edgeProperties_; }; @@ -78,11 +78,18 @@ class PathSearch : public Operation { const PathSearchConfiguration& getConfig() const { return config_; } - ColumnIndex getStartIndex() const { return variableColumns_.at(config_.start_).columnIndex_; } - ColumnIndex getEndIndex() const { return variableColumns_.at(config_.end_).columnIndex_; } - ColumnIndex getPathIndex() const { return variableColumns_.at(config_.pathColumn_).columnIndex_; } - ColumnIndex getEdgeIndex() const { return variableColumns_.at(config_.edgeColumn_).columnIndex_; } - + ColumnIndex getStartIndex() const { + return variableColumns_.at(config_.start_).columnIndex_; + } + ColumnIndex getEndIndex() const { + return variableColumns_.at(config_.end_).columnIndex_; + } + ColumnIndex getPathIndex() const { + return variableColumns_.at(config_.pathColumn_).columnIndex_; + } + ColumnIndex getEdgeIndex() const { + return variableColumns_.at(config_.edgeColumn_).columnIndex_; + } string getCacheKeyImpl() const override; string getDescriptor() const override; diff --git a/src/engine/PathSearchVisitors.h b/src/engine/PathSearchVisitors.h index b3ec1a9c46..d1557e2393 100644 --- a/src/engine/PathSearchVisitors.h +++ b/src/engine/PathSearchVisitors.h @@ -4,14 +4,13 @@ #pragma once -#include #include - #include #include #include #include #include +#include /** * @brief Represents an edge in the graph. @@ -143,7 +142,8 @@ class AllPathsVisitor : public boost::default_dfs_visitor { */ void examine_edge(EdgeDescriptor edgeDesc, const Graph& graph) { const Edge& edge = graph[edgeDesc]; - if (targets_.empty() || (currentPath_.ends_with(edge.start_) && targets_.find(edge.end_) != targets_.end())) { + if (targets_.empty() || (currentPath_.ends_with(edge.start_) && + targets_.find(edge.end_) != targets_.end())) { auto pathCopy = currentPath_; pathCopy.push_back(edge); allPaths_.push_back(pathCopy); @@ -161,20 +161,23 @@ class AllPathsVisitor : public boost::default_dfs_visitor { } /** - * @brief Called when a vertex has been finished during the depth-first search. + * @brief Called when a vertex has been finished during the depth-first + * search. * @param vertex The descriptor of the vertex being finished. * @param graph The graph being searched. */ void finish_vertex(VertexDescriptor vertex, const Graph& graph) { (void)graph; - if (!currentPath_.empty() && Id::fromBits(currentPath_.lastNode().value()) == indexToId_[vertex]) { + if (!currentPath_.empty() && + Id::fromBits(currentPath_.lastNode().value()) == indexToId_[vertex]) { currentPath_.edges_.pop_back(); } } }; /** - * @brief Visitor for performing Dijkstra's algorithm to find all shortest paths. + * @brief Visitor for performing Dijkstra's algorithm to find all shortest + * paths. */ class DijkstraAllPathsVisitor : public boost::default_dijkstra_visitor { // The source vertex descriptor. diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index c13348cb2a..54089a5d51 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -2124,22 +2124,22 @@ void QueryPlanner::GraphPatternPlanner::visitPathSearch( }; auto source = tripleComponentToId(pathQuery.source_); std::vector targets; - for (auto comp: pathQuery.targets_) { + for (auto comp : pathQuery.targets_) { targets.push_back(tripleComponentToId(comp)); } - auto config = PathSearchConfiguration{ - pathQuery.algorithm_, - std::move(source), - std::move(targets), - std::move(pathQuery.start_.value()), - std::move(pathQuery.end_.value()), - std::move(pathQuery.pathColumn_.value()), - std::move(pathQuery.edgeColumn_.value()), - std::move(pathQuery.edgeProperties_) - }; + auto config = + PathSearchConfiguration{pathQuery.algorithm_, + std::move(source), + std::move(targets), + std::move(pathQuery.start_.value()), + std::move(pathQuery.end_.value()), + std::move(pathQuery.pathColumn_.value()), + std::move(pathQuery.edgeColumn_.value()), + std::move(pathQuery.edgeProperties_)}; for (auto& sub : candidatesIn) { - auto pathSearch = std::make_shared(PathSearch(qec_, std::move(sub._qet), config)); + auto pathSearch = std::make_shared( + PathSearch(qec_, std::move(sub._qet), config)); auto plan = makeSubtreePlan(std::move(pathSearch)); candidatesOut.push_back(std::move(plan)); } diff --git a/src/engine/idTable/IdTableRow.h b/src/engine/idTable/IdTableRow.h index 21294df659..b9b0c3a7fe 100644 --- a/src/engine/idTable/IdTableRow.h +++ b/src/engine/idTable/IdTableRow.h @@ -299,9 +299,9 @@ class RowReferenceImpl { } // Assignment from a `const` RowReference to a `mutable` RowReference - This& operator=(const RowReferenceWithRestrictedAccess< - Table, ad_utility::IsConst::True>& other) && - requires(!isConst) { + This& operator=( + const RowReferenceWithRestrictedAccess< + Table, ad_utility::IsConst::True>& other) && requires(!isConst) { return assignmentImpl(*this, other); } diff --git a/src/engine/raw_thoughts.txt b/src/engine/raw_thoughts.txt index df5ebe05bb..a617bf7616 100644 --- a/src/engine/raw_thoughts.txt +++ b/src/engine/raw_thoughts.txt @@ -146,5 +146,3 @@ I don't see a problem why it couldn't also be a parameter per context variable. HARDER THAN BROCCOLI: cross product with contexts cross product (not full) with co-occurring n-tuples of entities (?x ?y ?z - see graphs above). - - diff --git a/src/engine/sparqlExpressions/CMakeLists.txt b/src/engine/sparqlExpressions/CMakeLists.txt index c8c998f1a1..9dd47c45e1 100644 --- a/src/engine/sparqlExpressions/CMakeLists.txt +++ b/src/engine/sparqlExpressions/CMakeLists.txt @@ -1,22 +1,11 @@ -add_library(sparqlExpressions - SparqlExpressionValueGetters.cpp - NaryExpression.cpp - SetOfIntervals.cpp - SparqlExpressionPimpl.cpp - SampleExpression.cpp - RelationalExpressions.cpp - AggregateExpression.cpp - RegexExpression.cpp - LangExpression.cpp - NumericUnaryExpressions.cpp - NumericBinaryExpressions.cpp - DateExpressions.cpp - StringExpressions.cpp - IsSomethingExpressions.cpp - ConditionalExpressions.cpp - SparqlExpressionTypes.cpp - SparqlExpression.cpp - ConvertToNumericExpression.cpp - RdfTermExpressions.cpp) +add_library( + sparqlExpressions SparqlExpressionValueGetters.cpp NaryExpression + .cpp SetOfIntervals.cpp SparqlExpressionPimpl.cpp SampleExpression + .cpp RelationalExpressions.cpp AggregateExpression.cpp RegexExpression + .cpp LangExpression.cpp NumericUnaryExpressions + .cpp NumericBinaryExpressions.cpp DateExpressions.cpp StringExpressions + .cpp IsSomethingExpressions.cpp ConditionalExpressions + .cpp SparqlExpressionTypes.cpp SparqlExpression + .cpp ConvertToNumericExpression.cpp RdfTermExpressions.cpp) -qlever_target_link_libraries(sparqlExpressions util index Boost::url) + qlever_target_link_libraries(sparqlExpressions util index Boost::url) diff --git a/src/engine/sparqlExpressions/ConditionalExpressions.cpp b/src/engine/sparqlExpressions/ConditionalExpressions.cpp index 29aec5a7b2..2f0454934b 100644 --- a/src/engine/sparqlExpressions/ConditionalExpressions.cpp +++ b/src/engine/sparqlExpressions/ConditionalExpressions.cpp @@ -66,35 +66,36 @@ class CoalesceExpression : public VariadicExpression { auto visitConstantExpressionResult = [&nextUnboundIndices, &unboundIndices, &isUnbound, &result, - ctx ](T && childResult) + ctx](T&& childResult) requires isConstantResult { - IdOrLiteralOrIri constantResult{AD_FWD(childResult)}; - if (isUnbound(constantResult)) { - nextUnboundIndices = std::move(unboundIndices); - return; - } - ad_utility::chunkedForLoop( - 0, unboundIndices.size(), - [&unboundIndices, &result, &constantResult](size_t idx) { - // GCC 12 & 13 report this as potential uninitialized - // use of a variable when compiling with -O3, which seems to - // be a false positive, so we suppress the warning here. See - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109561 for - // more information. - DISABLE_UNINITIALIZED_WARNINGS - result[unboundIndices[idx]] = constantResult; - }, - [ctx]() { ctx->cancellationHandle_->throwIfCancelled(); }); - }; + IdOrLiteralOrIri constantResult{AD_FWD(childResult)}; + if (isUnbound(constantResult)) { + nextUnboundIndices = std::move(unboundIndices); + return; + } + ad_utility::chunkedForLoop( + 0, unboundIndices.size(), + [&unboundIndices, &result, &constantResult](size_t idx) { + // GCC 12 & 13 report this as potential uninitialized + // use of a variable when compiling with -O3, which seems to + // be a false positive, so we suppress the warning here. See + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109561 for + // more information. + DISABLE_UNINITIALIZED_WARNINGS + result[unboundIndices[idx]] = constantResult; + }, + [ctx]() { ctx->cancellationHandle_->throwIfCancelled(); }); + }; ENABLE_UNINITIALIZED_WARNINGS // For a single child result, write the result at the indices where the // result so far is unbound, and the child result is bound. While doing so, // set up the `nextUnboundIndices` vector for the next step. auto visitVectorExpressionResult = - [&result, &unboundIndices, &nextUnboundIndices, &ctx, & - isUnbound ](T && childResult) - requires std::is_rvalue_reference_v { + [&result, &unboundIndices, &nextUnboundIndices, &ctx, + &isUnbound](T&& childResult) + requires std::is_rvalue_reference_v + { static_assert(!isConstantResult); auto gen = detail::makeGenerator(AD_FWD(childResult), ctx->size(), ctx); // Iterator to the next index where the result so far is unbound. @@ -126,18 +127,17 @@ class CoalesceExpression : public VariadicExpression { [ctx]() { ctx->cancellationHandle_->throwIfCancelled(); }); }; auto visitExpressionResult = - [ - &visitConstantExpressionResult, &visitVectorExpressionResult - ](T && childResult) - requires std::is_rvalue_reference_v { - // If the previous expression result is a constant, we can skip the - // loop. - if constexpr (isConstantResult) { - visitConstantExpressionResult(AD_FWD(childResult)); - } else { - visitVectorExpressionResult(AD_FWD(childResult)); - } - }; + [&visitConstantExpressionResult, + &visitVectorExpressionResult]( + T&& childResult) requires std::is_rvalue_reference_v { + // If the previous expression result is a constant, we can skip the + // loop. + if constexpr (isConstantResult) { + visitConstantExpressionResult(AD_FWD(childResult)); + } else { + visitVectorExpressionResult(AD_FWD(childResult)); + } + }; // Evaluate the children one by one, stopping as soon as all result are // bound. diff --git a/src/engine/sparqlExpressions/SparqlExpressionGenerators.h b/src/engine/sparqlExpressions/SparqlExpressionGenerators.h index 6197cf0e32..d56dff1c67 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionGenerators.h +++ b/src/engine/sparqlExpressions/SparqlExpressionGenerators.h @@ -15,9 +15,9 @@ namespace sparqlExpression::detail { /// Convert a variable to a vector of all the Ids it is bound to in the /// `context`. -inline std::span getIdsFromVariable(const ::Variable& variable, - const EvaluationContext* context, - size_t beginIndex, size_t endIndex) { +inline std::span getIdsFromVariable( + const ::Variable& variable, const EvaluationContext* context, + size_t beginIndex, size_t endIndex) { const auto& inputTable = context->_inputTable; const auto& varToColMap = context->_variableToColumnMap; @@ -28,15 +28,18 @@ inline std::span getIdsFromVariable(const ::Variable& variable, std::span completeColumn = inputTable.getColumn(columnIndex); - AD_CONTRACT_CHECK(beginIndex <= endIndex && endIndex <= completeColumn.size()); - return {completeColumn.begin() + beginIndex, completeColumn.begin() + endIndex}; + AD_CONTRACT_CHECK(beginIndex <= endIndex && + endIndex <= completeColumn.size()); + return {completeColumn.begin() + beginIndex, + completeColumn.begin() + endIndex}; } // Overload that reads the `beginIndex` and the `endIndex` directly from the // `context -inline std::span getIdsFromVariable(const ::Variable& variable, - const EvaluationContext* context) { - return getIdsFromVariable(variable, context, context->_beginIndex, context->_endIndex); +inline std::span getIdsFromVariable( + const ::Variable& variable, const EvaluationContext* context) { + return getIdsFromVariable(variable, context, context->_beginIndex, + context->_endIndex); } /// Generators that yield `numItems` items for the various @@ -45,8 +48,9 @@ inline std::span getIdsFromVariable(const ::Variable& variable, /// `SparqlExpressionValueGetters` with an already bound `EvaluationContext`. template requires isConstantResult && std::invocable -cppcoro::generator>> resultGenerator( - T constant, size_t numItems, Transformation transformation = {}) { +cppcoro::generator>> +resultGenerator(T constant, size_t numItems, + Transformation transformation = {}) { auto transformed = transformation(constant); for (size_t i = 0; i < numItems; ++i) { co_yield transformed; @@ -55,9 +59,10 @@ cppcoro::generator>> template requires std::ranges::input_range -auto resultGenerator(T vector, size_t numItems, Transformation transformation = {}) - -> cppcoro::generator>>> { +auto resultGenerator(T vector, size_t numItems, + Transformation transformation = {}) + -> cppcoro::generator>>> { AD_CONTRACT_CHECK(numItems == vector.size()); for (auto& element : vector) { auto cpy = transformation(std::move(element)); @@ -66,7 +71,8 @@ auto resultGenerator(T vector, size_t numItems, Transformation transformation = } template -inline cppcoro::generator>> +inline cppcoro::generator< + const std::decay_t>> resultGenerator(ad_utility::SetOfIntervals set, size_t targetSize, Transformation transformation = {}) { size_t i = 0; @@ -90,36 +96,43 @@ resultGenerator(ad_utility::SetOfIntervals set, size_t targetSize, /// Return a generator that yields `numItems` many items for the various /// `SingleExpressionResult` template -auto makeGenerator(Input&& input, size_t numItems, const EvaluationContext* context, +auto makeGenerator(Input&& input, size_t numItems, + const EvaluationContext* context, Transformation transformation = {}) { if constexpr (ad_utility::isSimilar<::Variable, Input>) { std::span inputWithVariableResolved{ getIdsFromVariable(std::forward(input), context)}; return resultGenerator(inputWithVariableResolved, numItems, transformation); } else { - return resultGenerator(std::forward(input), numItems, transformation); + return resultGenerator(std::forward(input), numItems, + transformation); } } /// Generate `numItems` many values from the `input` and apply the /// `valueGetter` to each of the values. -inline auto valueGetterGenerator = []( - size_t numElements, EvaluationContext* context, - Input&& input, ValueGetter&& valueGetter) { - auto transformation = [ context, valueGetter ](I && i) - requires std::invocable { - context->cancellationHandle_->throwIfCancelled(); - return valueGetter(AD_FWD(i), context); - }; - return makeGenerator(std::forward(input), numElements, context, transformation); -}; +inline auto valueGetterGenerator = + []( + size_t numElements, EvaluationContext* context, Input&& input, + ValueGetter&& valueGetter) { + auto transformation = + [context, valueGetter](I&& i) + requires std::invocable { + context->cancellationHandle_->throwIfCancelled(); + return valueGetter(AD_FWD(i), context); + }; + return makeGenerator(std::forward(input), numElements, context, + transformation); + }; /// Do the following `numItems` times: Obtain the next elements e_1, ..., e_n /// from the `generators` and yield `function(e_1, ..., e_n)`, also as a /// generator. inline auto applyFunction = []( - Function&& function, size_t numItems, Generators... generators) - -> cppcoro::generator> { + Function&& function, size_t numItems, + Generators... generators) + -> cppcoro::generator< + std::invoke_result_t> { // A tuple holding one iterator to each of the generators. std::tuple iterators{generators.begin()...}; @@ -150,7 +163,8 @@ auto applyOperation(size_t numElements, Operation&&, EvaluationContext* context, // Function that takes all the generators as a parameter pack and computes the // generator for the operation result; - auto getResultFromGenerators = std::bind_front(applyFunction, Function{}, numElements); + auto getResultFromGenerators = + std::bind_front(applyFunction, Function{}, numElements); /// The `ValueGetters` are stored in a `std::tuple`, so we have to extract /// them via `std::apply`. First set up a lambda that performs the actual @@ -159,7 +173,8 @@ auto applyOperation(size_t numElements, Operation&&, EvaluationContext* context, // Both `operands` and `valueGetters` are parameter packs of equal size, // so there will be one call to `getValue` for each pair of // (`operands`, `valueGetter`) - return getResultFromGenerators(getValue(std::forward(operands), valueGetters)...); + return getResultFromGenerators( + getValue(std::forward(operands), valueGetters)...); }; return std::apply(getResultFromValueGetters, ValueGetters{}); diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index d8828fa748..ef9ea13b08 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -316,8 +316,9 @@ class ConcatExpression : public detail::VariadicExpression { // were constants (see above). std::variant result{std::string{""}}; auto visitSingleExpressionResult = - [&ctx, &result ](T && s) - requires std::is_rvalue_reference_v { + [&ctx, &result](T&& s) + requires std::is_rvalue_reference_v + { if constexpr (isConstantResult) { std::string strFromConstant = StringValueGetter{}(s, ctx).value_or(""); if (std::holds_alternative(result)) { diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt index 29ca836c71..d9f031160b 100644 --- a/src/parser/CMakeLists.txt +++ b/src/parser/CMakeLists.txt @@ -1,29 +1,22 @@ add_library(rdfEscaping RdfEscaping.h RdfEscaping.cpp) -qlever_target_link_libraries(rdfEscaping) + qlever_target_link_libraries(rdfEscaping) -add_subdirectory(sparqlParser) -add_subdirectory(data) - -add_library(parser - sparqlParser/SparqlQleverVisitor.cpp - SparqlParser.cpp - ParsedQuery.cpp - TurtleParser.cpp - Tokenizer.cpp - ContextFileParser.cpp - TurtleTokenId.h - ParallelBuffer.cpp - SparqlParserHelpers.cpp - TripleComponent.cpp - GraphPatternOperation.cpp - PropertyPath.cpp - data/SparqlFilter.cpp - SelectClause.cpp - GraphPatternOperation.cpp - # The `Variable.cpp` from the subdirectory is linked here because otherwise we get linking errors. - GraphPattern.cpp data/Variable.cpp - Iri.cpp - Literal.cpp - LiteralOrIri.cpp) -qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2::re2 util engine) + add_subdirectory(sparqlParser) add_subdirectory(data) + add_library( + parser sparqlParser / + SparqlQleverVisitor.cpp SparqlParser.cpp ParsedQuery + .cpp TurtleParser.cpp Tokenizer.cpp ContextFileParser + .cpp TurtleTokenId.h ParallelBuffer.cpp SparqlParserHelpers + .cpp TripleComponent.cpp GraphPatternOperation + .cpp PropertyPath.cpp data / + SparqlFilter.cpp SelectClause.cpp GraphPatternOperation + .cpp +#The `Variable \ + .cpp` from the subdirectory is linked here because otherwise we get \ + linking errors. + GraphPattern.cpp data / + Variable.cpp Iri.cpp Literal.cpp LiteralOrIri.cpp) + qlever_target_link_libraries( + parser sparqlParser parserData sparqlExpressions rdfEscaping + re2::re2 util engine) diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index de0173e1e7..b51b3cec51 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -85,20 +85,25 @@ void PathQuery::addParameter(SparqlTriple& triple) { } else if (predicate.getIri().toStringRepresentation().ends_with("end>")) { AD_CORRECTNESS_CHECK(object.isVariable()); end_ = object.getVariable(); - } else if (predicate.getIri().toStringRepresentation().ends_with("pathColumn>")) { + } else if (predicate.getIri().toStringRepresentation().ends_with( + "pathColumn>")) { AD_CORRECTNESS_CHECK(object.isVariable()); pathColumn_ = object.getVariable(); - } else if (predicate.getIri().toStringRepresentation().ends_with("edgeColumn>")) { + } else if (predicate.getIri().toStringRepresentation().ends_with( + "edgeColumn>")) { AD_CORRECTNESS_CHECK(object.isVariable()); edgeColumn_ = object.getVariable(); - } else if (predicate.getIri().toStringRepresentation().ends_with("edgeProperty>")) { + } else if (predicate.getIri().toStringRepresentation().ends_with( + "edgeProperty>")) { AD_CORRECTNESS_CHECK(object.isVariable()); edgeProperties_.push_back(object.getVariable()); - } else if (predicate.getIri().toStringRepresentation().ends_with("algorithm>")) { + } else if (predicate.getIri().toStringRepresentation().ends_with( + "algorithm>")) { AD_CORRECTNESS_CHECK(object.isIri()); if (object.getIri().toStringRepresentation().ends_with("allPaths>")) { algorithm_ = PathSearchAlgorithm::ALL_PATHS; - } else if (object.getIri().toStringRepresentation().ends_with("shortestPaths>")) { + } else if (object.getIri().toStringRepresentation().ends_with( + "shortestPaths>")) { algorithm_ = PathSearchAlgorithm::SHORTEST_PATHS; } else { AD_THROW("Unsupported algorithm in PathSearch"); @@ -110,7 +115,7 @@ void PathQuery::addParameter(SparqlTriple& triple) { // ____________________________________________________________________________ void PathQuery::fromBasicPattern(const BasicGraphPattern& pattern) { - for (SparqlTriple triple: pattern._triples) { + for (SparqlTriple triple : pattern._triples) { addParameter(triple); } } diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 87b79e7354..f49e4fe8ae 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -169,8 +169,7 @@ struct Bind { // class actually becomes `using GraphPatternOperation = std::variant<...>` using GraphPatternOperationVariant = std::variant; + Values, Service, PathQuery, Minus, GroupGraphPattern>; struct GraphPatternOperation : public GraphPatternOperationVariant, public VisitMixin { diff --git a/src/parser/sparqlParser/CMakeLists.txt b/src/parser/sparqlParser/CMakeLists.txt index 3ba1f47429..fd1fd85ba6 100644 --- a/src/parser/sparqlParser/CMakeLists.txt +++ b/src/parser/sparqlParser/CMakeLists.txt @@ -1,10 +1,16 @@ -add_library(sparqlParser SparqlQleverVisitor.h - generated/SparqlAutomaticBaseListener.h generated/SparqlAutomaticBaseListener.cpp - generated/SparqlAutomaticLexer.h generated/SparqlAutomaticLexer.cpp - generated/SparqlAutomaticListener.h generated/SparqlAutomaticListener.cpp - generated/SparqlAutomaticParser.h generated/SparqlAutomaticParser.cpp - generated/SparqlAutomaticVisitor.h generated/SparqlAutomaticVisitor.cpp) -qlever_target_link_libraries(sparqlParser antlr4_static sparqlExpressions rdfEscaping util) -# Silence warnings in files that are auto-generated by ANTLR. -# TODO Submit a pull request to ANTLR to fix those warnings. -target_compile_options(sparqlParser PRIVATE -Wno-logical-op-parentheses -Wno-parentheses) +add_library(sparqlParser SparqlQleverVisitor.h generated / + SparqlAutomaticBaseListener.h generated / + SparqlAutomaticBaseListener.cpp generated / + SparqlAutomaticLexer.h generated / + SparqlAutomaticLexer.cpp generated / + SparqlAutomaticListener.h generated / + SparqlAutomaticListener.cpp generated / + SparqlAutomaticParser.h generated / + SparqlAutomaticParser.cpp generated / + SparqlAutomaticVisitor.h generated / SparqlAutomaticVisitor.cpp) + qlever_target_link_libraries( + sparqlParser antlr4_static sparqlExpressions rdfEscaping util) +#Silence warnings in files that are auto - generated by ANTLR. +#TODO < joka921> Submit a pull request to ANTLR to fix those warnings. + target_compile_options(sparqlParser PRIVATE - Wno - logical - op - + parentheses - Wno - parentheses) diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index d5c0f640db..c378b69781 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -725,10 +725,11 @@ GraphPatternOperation Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { Iri serviceIri = std::get(varOrIri); if (serviceIri.toSparql() == "") { - - auto parsePathQuery = [](parsedQuery::PathQuery& pathQuery, const parsedQuery::GraphPatternOperation& op){ + auto parsePathQuery = [](parsedQuery::PathQuery& pathQuery, + const parsedQuery::GraphPatternOperation& op) { if (std::holds_alternative(op)) { - pathQuery.fromBasicPattern(std::get(op)); + pathQuery.fromBasicPattern( + std::get(op)); } else if (std::holds_alternative(op)) { auto pattern = std::get(op); pathQuery.childGraphPattern_ = std::move(pattern._child); @@ -739,7 +740,7 @@ GraphPatternOperation Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { parsedQuery::GraphPattern graphPattern = visit(ctx->groupGraphPattern()); parsedQuery::PathQuery pathQuery; - for (auto op: graphPattern._graphPatterns) { + for (auto op : graphPattern._graphPatterns) { parsePathQuery(pathQuery, op); } diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h index 195ca98975..5654d1923f 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.h +++ b/src/parser/sparqlParser/SparqlQleverVisitor.h @@ -245,7 +245,8 @@ class SparqlQleverVisitor { [[noreturn]] static parsedQuery::GraphPatternOperation visit( const Parser::GraphGraphPatternContext* ctx); - parsedQuery::GraphPatternOperation visit(Parser::ServiceGraphPatternContext* ctx); + parsedQuery::GraphPatternOperation visit( + Parser::ServiceGraphPatternContext* ctx); parsedQuery::GraphPatternOperation visit(Parser::BindContext* ctx); diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index e11014b318..ae38fa02aa 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -1,5 +1,8 @@ -add_subdirectory(ConfigManager) -add_subdirectory(MemorySize) -add_subdirectory(http) -add_library(util GeoSparqlHelpers.cpp antlr/ANTLRErrorHandling.cpp ParseException.cpp Conversions.cpp Date.cpp antlr/GenerateAntlrExceptionMetadata.cpp CancellationHandle.cpp StringUtils.cpp) -qlever_target_link_libraries(util re2::re2) +add_subdirectory(ConfigManager) add_subdirectory(MemorySize) + add_subdirectory(http) + add_library(util GeoSparqlHelpers.cpp antlr / + ANTLRErrorHandling.cpp ParseException.cpp Conversions + .cpp Date.cpp antlr / + GenerateAntlrExceptionMetadata.cpp CancellationHandle + .cpp StringUtils.cpp) + qlever_target_link_libraries(util re2::re2) diff --git a/src/util/ComparisonWithNan.h b/src/util/ComparisonWithNan.h index 748ba429a5..208b3307e4 100644 --- a/src/util/ComparisonWithNan.h +++ b/src/util/ComparisonWithNan.h @@ -26,27 +26,27 @@ namespace ad_utility { template inline auto makeComparatorForNans(Comparator comparator) { return [comparator](const A& a, const B& b) - requires std::is_invocable_r_v { - auto isNan = [](const T& t) { - if constexpr (std::is_floating_point_v) { - return std::isnan(t); - } else { - (void)t; - return false; - } - }; + requires std::is_invocable_r_v { + auto isNan = [](const T& t) { + if constexpr (std::is_floating_point_v) { + return std::isnan(t); + } else { + (void)t; + return false; + } + }; - bool aIsNan = isNan(a); - bool bIsNan = isNan(b); - if (aIsNan && bIsNan) { - return comparator(0.0, 0.0); - } else if (aIsNan) { - return comparator(1.0, 0.0); - } else if (bIsNan) { - return comparator(0.0, 1.0); - } else { - return comparator(a, b); - } - }; + bool aIsNan = isNan(a); + bool bIsNan = isNan(b); + if (aIsNan && bIsNan) { + return comparator(0.0, 0.0); + } else if (aIsNan) { + return comparator(1.0, 0.0); + } else if (bIsNan) { + return comparator(0.0, 1.0); + } else { + return comparator(a, b); + } + }; } } // namespace ad_utility diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6e74705935..e2c110ad2b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,397 +1,653 @@ include(GoogleTest) -# Needed for creating the `testUil`-library. -add_subdirectory(util) - -# Link binary ${basename} against `gmock_main`, the threading library, the -# general test utilities and all libraries that are specified as additional -# arguments. -function (linkTest basename) - qlever_target_link_libraries(${basename} ${ARGN} GTest::gtest GTest::gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) -endfunction() - -# Add the executable ${basename} that is compiled from the source file -# "${basename}".cpp -function (addTest basename) - add_executable(${basename} "${basename}.cpp") -endfunction() - -# Usage: `linkAndDiscoverTest(basename, [additionalLibraries...]` -# Link the executable `basename` against `gmock_main`,threading library, -# and all `additionLibraries` which are passed as arguments. -# Then run `gtest_discover_tests` to add the tests cases from the executable. -# Typically you should use `addAndLinkTest` (below) but this function can be used, -# if a test binary requires multiple sources -function(linkAndDiscoverTest basename) - linkTest(${basename} ${ARGN}) - gtest_discover_tests(${basename} ${basename} DISCOVERY_TIMEOUT 600) -endfunction() - -# Usage: `linkAndDiscoverTestSerial(basename, [additionalLibraries...]` -# Similar to `linkAndDiscoverTestSerial` but also requires that the test is run serially -# (without any of the other test cases running in parallel). This can be -# required e.g. if several tests cases write to the same file. -function(linkAndDiscoverTestSerial basename) - linkTest(${basename} ${ARGN}) - gtest_discover_tests(${basename} ${basename} PROPERTIES RUN_SERIAL - TRUE) -endfunction() - -if (SINGLE_TEST_BINARY) - message(STATUS "All tests are linked into a single executable `QLeverAllUnitTestsMain`") - add_executable(QLeverAllUnitTestsMain) - qlever_target_link_libraries(QLeverAllUnitTestsMain gtest gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) - gtest_discover_tests(QLeverAllUnitTestsMain QLeverAllUnitTestsMain PROPERTIES RUN_SERIAL - TRUE) -else() - message(STATUS "The tests are split over multiple binaries") - -endif() -# Usage: `addAndLinkTest(basename, [additionalLibraries...]` -# Add a GTest/GMock test case that is called `basename` and compiled from a file called -# `basename.cpp`. All tests are linked against `gmock_main` and the threading library. -# additional libraries against which the test case has to be linked can be specified as -# additional arguments after the `basename` -function(addLinkAndDiscoverTest basename) - if (SINGLE_TEST_BINARY) - target_sources(QLeverAllUnitTestsMain PUBLIC ${basename}.cpp) - qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) - else() - addTest(${basename}) - linkAndDiscoverTest(${basename} ${ARGN}) - endif() +#Needed for creating the `testUil`- library. + add_subdirectory(util) + +#Link binary ${basename } against `gmock_main`, the threading library, the +#general test utilities and all libraries that are specified as additional +#arguments. + function(linkTest basename) qlever_target_link_libraries(${basename} ${ + ARGN} GTest::gtest GTest::gmock_main testUtil ${ + CMAKE_THREAD_LIBS_INIT}) endfunction() + +#Add the executable ${basename } that is compiled from the source file +#"${basename}".cpp + function(addTest basename) add_executable($ { + basename + } "${basename}.cpp") endfunction() + +#Usage : `linkAndDiscoverTest(basename, [additionalLibraries...]` +#Link the executable `basename` against `gmock_main`, threading library, +#and all `additionLibraries` which are passed as arguments. +#Then run `gtest_discover_tests` to add the tests cases from the executable. +#Typically you should use `addAndLinkTest` ( \ + below)but this function can be used, +#if a test binary requires multiple sources + function(linkAndDiscoverTest basename) linkTest(${basename} ${ + ARGN}) gtest_discover_tests(${basename} ${ + basename} DISCOVERY_TIMEOUT 600) endfunction() + +#Usage : `linkAndDiscoverTestSerial(basename, [additionalLibraries...]` +#Similar to `linkAndDiscoverTestSerial` but also requires that the test is run \ + serially +#(without any of the other test cases running in parallel).This can be +#required e.g.if several tests cases write to the same file. + function(linkAndDiscoverTestSerial basename) linkTest(${ + basename} ${ARGN}) gtest_discover_tests(${basename} ${ + basename} PROPERTIES RUN_SERIAL TRUE) endfunction() + + if (SINGLE_TEST_BINARY) message( + STATUS + "All tests are linked into a single executable " + "`QLeverAllUnitTestsMain`") add_executable(QLeverAllUnitTestsMain) + qlever_target_link_libraries( + QLeverAllUnitTestsMain gtest gmock_main testUtil + ${CMAKE_THREAD_LIBS_INIT}) + gtest_discover_tests( + QLeverAllUnitTestsMain + QLeverAllUnitTestsMain PROPERTIES + RUN_SERIAL + TRUE) else() message(STATUS + "The " + "tests " + "are " + "split " + "over " + "multiple " + "binaries") + + endif() +#Usage : `addAndLinkTest(basename, [additionalLibraries...]` +#Add a GTest / \ + GMock test case that is called `basename` and compiled from a file called +# `basename.cpp` \ + .All tests are linked against `gmock_main` and the threading library. +#additional libraries against which the test case has to be linked can be \ + specified as +#additional arguments after the `basename` + function(addLinkAndDiscoverTest basename) if (SINGLE_TEST_BINARY) target_sources( + QLeverAllUnitTestsMain PUBLIC ${ + basename} + .cpp) qlever_target_link_libraries(QLeverAllUnitTestsMain ${ + ARGN}) else() addTest(${ + basename}) linkAndDiscoverTest(${ + basename} ${ARGN}) endif() + + endfunction() + +#Usage : `addAndLinkTestSerial(basename, [additionalLibraries...]` +#Similar to `addAndLinkTest` but also requires that the test is run serially +#(without any of the other test cases running in parallel).This can be +#required e.g.if several tests cases write to the same file. + function( + addLinkAndDiscoverTestSerial + basename) if (SINGLE_TEST_BINARY) + target_sources( + QLeverAllUnitTestsMain + PUBLIC ${basename} + .cpp) + qlever_target_link_libraries( + QLeverAllUnitTestsMain + ${ARGN}) else() addTest(${ + basename}) linkAndDiscoverTestSerial(${ + basename} ${ + ARGN}) endif() endfunction() + +#Only compile and link the test, but do not run it. +#Usage : Same as for the two functions above. + function( + addAndLinkTest + basename) + addTest(${ + basename}) + linkTest(${ + basename} ${ + ARGN}) + endfunction() + + add_subdirectory( + engine) + add_subdirectory( + parser) + add_subdirectory( + index) + + addLinkAndDiscoverTest( + ValueIdComparatorsTest + util) + + addLinkAndDiscoverTest( + SparqlParserTest parser engine + sparqlExpressions) + + addLinkAndDiscoverTest( + StringUtilsTest util) + + addLinkAndDiscoverTest( + CryptographicHashUtilsTest + util) + + addLinkAndDiscoverTest( + CacheTest) + + addLinkAndDiscoverTest( + ConcurrentCacheTest) + +#This test also seems to use the same filenames and should be fixed. + addLinkAndDiscoverTestSerial( + FileTest) + + addLinkAndDiscoverTest( + Simple8bTest) + + addLinkAndDiscoverTest( + ContextFileParserTest + parser) + + addLinkAndDiscoverTest( + IndexMetaDataTest + index) + +#Stxxl currently always uses a file./ -stxxl.disk for all indices, which +#makes it impossible to run the test cases for the Index class in parallel. +#TODO < qup42, joka921> fix this + addLinkAndDiscoverTestSerial( + IndexTest + index) + + addLinkAndDiscoverTest( + EngineTest + engine) + + addLinkAndDiscoverTest( + JoinTest + engine) + + addLinkAndDiscoverTest(TextLimitOperationTest engine) + + addLinkAndDiscoverTest( + QueryPlannerTest + engine) + + addLinkAndDiscoverTest( + HashMapTest) + + addLinkAndDiscoverTest( + HashSetTest) + + addLinkAndDiscoverTestSerial( + GroupByTest engine) + + addLinkAndDiscoverTest( + VocabularyGeneratorTest + index) + + addLinkAndDiscoverTest( + HasPredicateScanTest + engine) + + addLinkAndDiscoverTest( + MmapVectorTest) + +#BufferedVectorTest also uses conflicting filenames. + addLinkAndDiscoverTestSerial( + BufferedVectorTest) + + addLinkAndDiscoverTest(UnionTest + engine) + + if (SINGLE_TEST_BINARY) + target_sources( + QLeverAllUnitTestsMain + PUBLIC + TokenTest + .cpp TokenTestCtreHelper + .cpp) qlever_target_link_libraries(QLeverAllUnitTestsMain parser + re2 util) else() + add_executable(TokenTest TokenTest + .cpp + TokenTestCtreHelper + .cpp) + linkAndDiscoverTest(TokenTest parser re2 util) + endif() -endfunction() + addLinkAndDiscoverTestSerial( + TurtleParserTest + parser re2) -# Usage: `addAndLinkTestSerial(basename, [additionalLibraries...]` -# Similar to `addAndLinkTest` but also requires that the test is run serially -# (without any of the other test cases running in parallel). This can be -# required e.g. if several tests cases write to the same file. -function(addLinkAndDiscoverTestSerial basename) - if (SINGLE_TEST_BINARY) - target_sources(QLeverAllUnitTestsMain PUBLIC ${basename}.cpp) - qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) - else() - addTest(${basename}) - linkAndDiscoverTestSerial(${basename} ${ARGN}) - endif() -endfunction() + addLinkAndDiscoverTest(MultiColumnJoinTest + engine) -# Only compile and link the test, but do not run it. -# Usage: Same as for the two functions above. -function(addAndLinkTest basename) - addTest(${basename}) - linkTest(${basename} ${ARGN}) -endfunction() + addLinkAndDiscoverTest( + IdTableTest util) -add_subdirectory(engine) -add_subdirectory(parser) -add_subdirectory(index) + addLinkAndDiscoverTest( + TransitivePathTest + engine) -addLinkAndDiscoverTest(ValueIdComparatorsTest util) + addLinkAndDiscoverTest( + PathSearchTest + engine) -addLinkAndDiscoverTest(SparqlParserTest parser engine sparqlExpressions) + addLinkAndDiscoverTest( + BatchedPipelineTest) -addLinkAndDiscoverTest(StringUtilsTest util) + addLinkAndDiscoverTest( + TupleHelpersTest) -addLinkAndDiscoverTest(CryptographicHashUtilsTest util) + addLinkAndDiscoverTest( + StringSortComparatorTest) + + addLinkAndDiscoverTest( + PriorityQueueTest) + + addLinkAndDiscoverTest( + SynchronizedTest) -addLinkAndDiscoverTest(CacheTest) + addLinkAndDiscoverTest( + AllocatorWithLimitTest) -addLinkAndDiscoverTest(ConcurrentCacheTest) + addLinkAndDiscoverTest( + MinusTest + engine) -# This test also seems to use the same filenames and should be fixed. -addLinkAndDiscoverTestSerial(FileTest) - -addLinkAndDiscoverTest(Simple8bTest) - -addLinkAndDiscoverTest(ContextFileParserTest parser) - -addLinkAndDiscoverTest(IndexMetaDataTest index) - -# Stxxl currently always uses a file ./-stxxl.disk for all indices, which -# makes it impossible to run the test cases for the Index class in parallel. -# TODO fix this -addLinkAndDiscoverTestSerial(IndexTest index) - -addLinkAndDiscoverTest(EngineTest engine) - -addLinkAndDiscoverTest(JoinTest engine) - -addLinkAndDiscoverTest(TextLimitOperationTest engine) - -addLinkAndDiscoverTest(QueryPlannerTest engine) - -addLinkAndDiscoverTest(HashMapTest) - -addLinkAndDiscoverTest(HashSetTest) - -addLinkAndDiscoverTestSerial(GroupByTest engine) - -addLinkAndDiscoverTest(VocabularyGeneratorTest index) - -addLinkAndDiscoverTest(HasPredicateScanTest engine) - -addLinkAndDiscoverTest(MmapVectorTest) - -# BufferedVectorTest also uses conflicting filenames. -addLinkAndDiscoverTestSerial(BufferedVectorTest) - -addLinkAndDiscoverTest(UnionTest engine) - -if (SINGLE_TEST_BINARY) - target_sources(QLeverAllUnitTestsMain PUBLIC TokenTest.cpp TokenTestCtreHelper.cpp) - qlever_target_link_libraries(QLeverAllUnitTestsMain parser re2 util) -else() - add_executable(TokenTest TokenTest.cpp TokenTestCtreHelper.cpp) - linkAndDiscoverTest(TokenTest parser re2 util) -endif() - -addLinkAndDiscoverTestSerial(TurtleParserTest parser re2) - -addLinkAndDiscoverTest(MultiColumnJoinTest engine) - -addLinkAndDiscoverTest(IdTableTest util) - -addLinkAndDiscoverTest(TransitivePathTest engine) - -addLinkAndDiscoverTest(PathSearchTest engine) - -addLinkAndDiscoverTest(BatchedPipelineTest) - -addLinkAndDiscoverTest(TupleHelpersTest) - -addLinkAndDiscoverTest(StringSortComparatorTest) - -addLinkAndDiscoverTest(PriorityQueueTest) - -addLinkAndDiscoverTest(SynchronizedTest) - -addLinkAndDiscoverTest(AllocatorWithLimitTest) - -addLinkAndDiscoverTest(MinusTest engine) - -# this test runs for quite some time and might have spurious failures! -# Therefore it is compiled, but not run. If you want to run it, -# change the following two lines. -addAndLinkTest(SortPerformanceEstimatorTest SortPerformanceEstimator) +#this test runs for quite some time and might have spurious failures ! +#Therefore it is compiled, but not run.If you want to run it, +#change the following two lines. + addAndLinkTest( + SortPerformanceEstimatorTest + SortPerformanceEstimator) #addLinkAndDiscoverTest(SortPerformanceEstimatorTest SortPerformanceEstimator) -addLinkAndDiscoverTestSerial(SparqlAntlrParserTest parser sparqlExpressions engine) + addLinkAndDiscoverTestSerial( + SparqlAntlrParserTest + parser + sparqlExpressions + engine) -# The SerializerTest uses temporary files. The tests fail when multiple test -# cases are run in parallel. This should be fixed by using distinct filenames -# for each test case. -# TODO fix this -addLinkAndDiscoverTestSerial(SerializerTest) +#The SerializerTest uses temporary files.The tests fail when multiple test +#cases are run in parallel.This should be fixed by using distinct filenames +#for each test case. +#TODO < qup42, joka921> fix this + addLinkAndDiscoverTestSerial( + SerializerTest) -addLinkAndDiscoverTest(ParametersTest) + addLinkAndDiscoverTest( + ParametersTest) -addLinkAndDiscoverTest(ZstdCompressionTest zstd ${cmake_thread_libs_init}) + addLinkAndDiscoverTest(ZstdCompressionTest zstd ${ + cmake_thread_libs_init}) -addLinkAndDiscoverTest(TaskQueueTest) + addLinkAndDiscoverTest( + TaskQueueTest) -addLinkAndDiscoverTest(SetOfIntervalsTest sparqlExpressions) + addLinkAndDiscoverTest( + SetOfIntervalsTest + sparqlExpressions) -addLinkAndDiscoverTest(TypeTraitsTest) + addLinkAndDiscoverTest( + TypeTraitsTest) -addLinkAndDiscoverTestSerial(SparqlExpressionTest sparqlExpressions index engine) + addLinkAndDiscoverTestSerial( + SparqlExpressionTest + sparqlExpressions + index engine) -addLinkAndDiscoverTest(StreamableBodyTest http) + addLinkAndDiscoverTest( + StreamableBodyTest + http) -addLinkAndDiscoverTest(StreamableGeneratorTest) + addLinkAndDiscoverTest( + StreamableGeneratorTest) -addLinkAndDiscoverTest(AcceptHeaderTest mediaTypes httpParser) + addLinkAndDiscoverTest( + AcceptHeaderTest + mediaTypes + httpParser) -addLinkAndDiscoverTest(RdfEscapingTest parser) + addLinkAndDiscoverTest( + RdfEscapingTest + parser) -addLinkAndDiscoverTest(CompactStringVectorTest) + addLinkAndDiscoverTest( + CompactStringVectorTest) -addLinkAndDiscoverTest(SparqlDataTypesTest engine) + addLinkAndDiscoverTest( + SparqlDataTypesTest + engine) -addLinkAndDiscoverTest(ContentEncodingHelperTest http) + addLinkAndDiscoverTest( + ContentEncodingHelperTest + http) -addLinkAndDiscoverTest(VocabularyInMemoryTest vocabulary) + addLinkAndDiscoverTest( + VocabularyInMemoryTest + vocabulary) -addLinkAndDiscoverTest(CompressedVocabularyTest vocabulary) + addLinkAndDiscoverTest( + CompressedVocabularyTest + vocabulary) -addLinkAndDiscoverTest(UnicodeVocabularyTest vocabulary) + addLinkAndDiscoverTest( + UnicodeVocabularyTest + vocabulary) -addLinkAndDiscoverTest(CombinedVocabularyTest vocabulary) + addLinkAndDiscoverTest(CombinedVocabularyTest vocabulary) -addLinkAndDiscoverTest(PrefixCompressorTest) + addLinkAndDiscoverTest( + PrefixCompressorTest) -addLinkAndDiscoverTest(MilestoneIdTest) + addLinkAndDiscoverTest( + MilestoneIdTest) -addLinkAndDiscoverTest(VocabularyOnDiskTest index) + addLinkAndDiscoverTest( + VocabularyOnDiskTest + index) -addLinkAndDiscoverTest(VocabularyTest index) + addLinkAndDiscoverTest( + VocabularyTest + index) -addLinkAndDiscoverTest(IteratorTest) + addLinkAndDiscoverTest( + IteratorTest) -# Stxxl currently always uses a file ./-stxxl.disk for all indices, which -# makes it impossible to run the test cases for the Index class in parallel. -# TODO fix this -addLinkAndDiscoverTestSerial(BackgroundStxxlSorterTest ${STXXL_LIBRARIES}) +#Stxxl currently always uses a file./ -stxxl.disk for all indices, which +#makes it impossible to run the test cases for the Index class in parallel. +#TODO < qup42, joka921> fix this + addLinkAndDiscoverTestSerial(BackgroundStxxlSorterTest ${STXXL_LIBRARIES}) -addLinkAndDiscoverTest(ViewsTest) + addLinkAndDiscoverTest( + ViewsTest) -addLinkAndDiscoverTest(ForwardTest) + addLinkAndDiscoverTest( + ForwardTest) -addLinkAndDiscoverTest(CompressorStreamTest engine) + addLinkAndDiscoverTest(CompressorStreamTest + engine) -addLinkAndDiscoverTest(AsyncStreamTest) + addLinkAndDiscoverTest( + AsyncStreamTest) -addLinkAndDiscoverTest(TriplesViewTest util OpenSSL::SSL OpenSSL::Crypto) + addLinkAndDiscoverTest( + TriplesViewTest + util + OpenSSL::SSL OpenSSL::Crypto) -addLinkAndDiscoverTest(BitUtilsTest) + addLinkAndDiscoverTest( + BitUtilsTest) -addLinkAndDiscoverTest(NBitIntegerTest) + addLinkAndDiscoverTest( + NBitIntegerTest) -addLinkAndDiscoverTest(GeoSparqlHelpersTest util) + addLinkAndDiscoverTest( + GeoSparqlHelpersTest + util) -addLinkAndDiscoverTest(HttpUtilsTest util http) + addLinkAndDiscoverTest( + HttpUtilsTest + util + http) -addLinkAndDiscoverTest(DateTest util parser) + addLinkAndDiscoverTest( + DateTest + util + parser) -addLinkAndDiscoverTest(TripleComponentTest parser) + addLinkAndDiscoverTest( + TripleComponentTest + parser) -addLinkAndDiscoverTest(ValueIdTest util) + addLinkAndDiscoverTest( + ValueIdTest + util) -addLinkAndDiscoverTest(LambdaHelpersTest) + addLinkAndDiscoverTest( + LambdaHelpersTest) -addLinkAndDiscoverTest(ParseExceptionTest parser engine) + addLinkAndDiscoverTest( + ParseExceptionTest + parser + engine) -addLinkAndDiscoverTest(TransparentFunctorsTest) + addLinkAndDiscoverTest( + TransparentFunctorsTest) -addLinkAndDiscoverTest(SelectClauseTest parser engine) + addLinkAndDiscoverTest( + SelectClauseTest + parser + engine) -addLinkAndDiscoverTestSerial(RelationalExpressionTest parser sparqlExpressions index engine) + addLinkAndDiscoverTestSerial(RelationalExpressionTest parser sparqlExpressions index engine) -addLinkAndDiscoverTest(CheckUsePatternTrickTest parser engine) + addLinkAndDiscoverTest( + CheckUsePatternTrickTest + parser + engine) -addLinkAndDiscoverTestSerial(RegexExpressionTest parser sparqlExpressions index engine parser) + addLinkAndDiscoverTestSerial( + RegexExpressionTest + parser + sparqlExpressions + index + engine + parser) -addLinkAndDiscoverTestSerial(LocalVocabTest engine) + addLinkAndDiscoverTestSerial( + LocalVocabTest + engine) -addLinkAndDiscoverTestSerial(ValuesTest engine) + addLinkAndDiscoverTestSerial( + ValuesTest + engine) -addLinkAndDiscoverTestSerial(ServiceTest engine) + addLinkAndDiscoverTestSerial( + ServiceTest + engine) -addLinkAndDiscoverTest(HttpTest Boost::iostreams http) + addLinkAndDiscoverTest( + HttpTest + Boost:: + iostreams + http) -addLinkAndDiscoverTest(CallFixedSizeTest) + addLinkAndDiscoverTest( + CallFixedSizeTest) -addLinkAndDiscoverTest(ConstexprUtilsTest) + addLinkAndDiscoverTest( + ConstexprUtilsTest) -addLinkAndDiscoverTest(ResetWhenMovedTest) + addLinkAndDiscoverTest( + ResetWhenMovedTest) -addLinkAndDiscoverTest(TimerTest) + addLinkAndDiscoverTest( + TimerTest) -addLinkAndDiscoverTest(AlgorithmTest) + addLinkAndDiscoverTest( + AlgorithmTest) -addLinkAndDiscoverTestSerial(CompressedRelationsTest index) + addLinkAndDiscoverTestSerial( + CompressedRelationsTest index) -addLinkAndDiscoverTest(ExceptionTest) + addLinkAndDiscoverTest(ExceptionTest) -addLinkAndDiscoverTestSerial(RandomExpressionTest index) + addLinkAndDiscoverTestSerial( + RandomExpressionTest + index) -addLinkAndDiscoverTestSerial(NowDatetimeExpressionTest index) + addLinkAndDiscoverTestSerial( + NowDatetimeExpressionTest + index) -addLinkAndDiscoverTestSerial(SortTest engine) + addLinkAndDiscoverTestSerial( + SortTest + engine) -addLinkAndDiscoverTestSerial(OrderByTest engine) + addLinkAndDiscoverTestSerial( + OrderByTest + engine) -addLinkAndDiscoverTestSerial(ValuesForTestingTest index) + addLinkAndDiscoverTestSerial( + ValuesForTestingTest + index) -addLinkAndDiscoverTestSerial(ExportQueryExecutionTreeTest index engine parser) + addLinkAndDiscoverTestSerial( + ExportQueryExecutionTreeTest + index + engine + parser) -addLinkAndDiscoverTestSerial(AggregateExpressionTest parser sparqlExpressions index engine) + addLinkAndDiscoverTestSerial( + AggregateExpressionTest parser sparqlExpressions index engine) -addLinkAndDiscoverTest(OnDestructionDontThrowDuringStackUnwindingTest) + addLinkAndDiscoverTest( + OnDestructionDontThrowDuringStackUnwindingTest) -addLinkAndDiscoverTest(ExceptionHandlingTest) + addLinkAndDiscoverTest( + ExceptionHandlingTest) -addLinkAndDiscoverTest(SparqlExpressionTypesTest sparqlExpressions util) + addLinkAndDiscoverTest( + SparqlExpressionTypesTest + sparqlExpressions + util) -addLinkAndDiscoverTest(LimitOffsetClauseTest) + addLinkAndDiscoverTest( + LimitOffsetClauseTest) -addLinkAndDiscoverTest(OperationTest engine) + addLinkAndDiscoverTest( + OperationTest + engine) -addLinkAndDiscoverTest(RuntimeInformationTest engine index) + addLinkAndDiscoverTest(RuntimeInformationTest engine index) -addLinkAndDiscoverTest(VariableToColumnMapTest parser) + addLinkAndDiscoverTest( + VariableToColumnMapTest parser) -addLinkAndDiscoverTest(CopyableUniquePtrTest) + addLinkAndDiscoverTest(CopyableUniquePtrTest) -addLinkAndDiscoverTest(JsonCustomConverterForThirdPartyTest) + addLinkAndDiscoverTest( + JsonCustomConverterForThirdPartyTest) -addLinkAndDiscoverTest(ConfigManagerTest configManager) + addLinkAndDiscoverTest( + ConfigManagerTest + configManager) -addLinkAndDiscoverTest(ConfigOptionTest configManager) + addLinkAndDiscoverTest( + ConfigOptionTest + configManager) -addLinkAndDiscoverTest(ValidatorTest configManager) + addLinkAndDiscoverTest( + ValidatorTest + configManager) -addLinkAndDiscoverTest(ConfigOptionProxyTest configManager) + addLinkAndDiscoverTest( + ConfigOptionProxyTest + configManager) -addLinkAndDiscoverTest(ConfigUtilTest configManager) + addLinkAndDiscoverTest( + ConfigUtilTest + configManager) -addLinkAndDiscoverTest(RandomTest) + addLinkAndDiscoverTest( + RandomTest) -addLinkAndDiscoverTest(BenchmarkMeasurementContainerTest benchmark testUtil) + addLinkAndDiscoverTest( + BenchmarkMeasurementContainerTest + benchmark + testUtil) -addLinkAndDiscoverTest(ResultTableColumnOperationsTest benchmark testUtil) + addLinkAndDiscoverTest( + ResultTableColumnOperationsTest + benchmark + testUtil) -addLinkAndDiscoverTest(FindUndefRangesTest engine) + addLinkAndDiscoverTest( + FindUndefRangesTest + engine) -addLinkAndDiscoverTest(AddCombinedRowToTableTest engine util) + addLinkAndDiscoverTest( + AddCombinedRowToTableTest + engine + util) -addLinkAndDiscoverTest(CtreHelpersTest) + addLinkAndDiscoverTest( + CtreHelpersTest) -addLinkAndDiscoverTest(ComparisonWithNanTest) + addLinkAndDiscoverTest( + ComparisonWithNanTest) -addLinkAndDiscoverTest(ThreadSafeQueueTest) + addLinkAndDiscoverTest( + ThreadSafeQueueTest) -addLinkAndDiscoverTest(IdTableHelpersTest) + addLinkAndDiscoverTest( + IdTableHelpersTest) -addLinkAndDiscoverTest(GeneratorTest) + addLinkAndDiscoverTest( + GeneratorTest) -addLinkAndDiscoverTest(MemorySizeTest memorySize) + addLinkAndDiscoverTest( + MemorySizeTest + memorySize) -addLinkAndDiscoverTest(JsonUtilTest) + addLinkAndDiscoverTest( + JsonUtilTest) -addLinkAndDiscoverTest(JoinAlgorithmsTest) + addLinkAndDiscoverTest( + JoinAlgorithmsTest) -addLinkAndDiscoverTest(AsioHelpersTest) + addLinkAndDiscoverTest( + AsioHelpersTest) -addLinkAndDiscoverTest(UniqueCleanupTest) + addLinkAndDiscoverTest( + UniqueCleanupTest) -addLinkAndDiscoverTest(WebSocketSessionTest http) + addLinkAndDiscoverTest( + WebSocketSessionTest + http) -addLinkAndDiscoverTest(QueryIdTest) + addLinkAndDiscoverTest( + QueryIdTest) -addLinkAndDiscoverTest(QueryHubTest http) + addLinkAndDiscoverTest( + QueryHubTest + http) -addLinkAndDiscoverTest(QueryToSocketDistributorTest http) + addLinkAndDiscoverTest( + QueryToSocketDistributorTest + http) -addLinkAndDiscoverTest(UpdateFetcherTest http) + addLinkAndDiscoverTest( + UpdateFetcherTest + http) -addLinkAndDiscoverTest(MessageSenderTest http) + addLinkAndDiscoverTest( + MessageSenderTest + http) -addLinkAndDiscoverTest(CancellationHandleTest util) + addLinkAndDiscoverTest( + CancellationHandleTest + util) -addLinkAndDiscoverTest(ProgressBarTest util) + addLinkAndDiscoverTest(ProgressBarTest + util) -addLinkAndDiscoverTest(CachingMemoryResourceTest) + addLinkAndDiscoverTest( + CachingMemoryResourceTest) -addLinkAndDiscoverTest(ParallelMultiwayMergeTest) + addLinkAndDiscoverTest( + ParallelMultiwayMergeTest) -addLinkAndDiscoverTest(ParseableDurationTest) + addLinkAndDiscoverTest( + ParseableDurationTest) -addLinkAndDiscoverTest(ConstantsTest) + addLinkAndDiscoverTest( + ConstantsTest) -addLinkAndDiscoverTest(JThreadTest) + addLinkAndDiscoverTest( + JThreadTest) -addLinkAndDiscoverTest(ChunkedForLoopTest) + addLinkAndDiscoverTest( + ChunkedForLoopTest) -addLinkAndDiscoverTest(FsstCompressorTest fsst) + addLinkAndDiscoverTest( + FsstCompressorTest + fsst) diff --git a/test/ConfigManagerTest.cpp b/test/ConfigManagerTest.cpp index 0d90283201..6e03727ac7 100644 --- a/test/ConfigManagerTest.cpp +++ b/test/ConfigManagerTest.cpp @@ -1205,10 +1205,11 @@ void doValidatorTest( here. */ auto addValidatorToConfigManager = - [&adjustVariantArgument, &addValidatorFunction ]( - size_t variant, ConfigManager & m, + [&adjustVariantArgument, &addValidatorFunction]( + size_t variant, ConfigManager& m, ConstConfigOptionProxy... validatorArguments) - requires(sizeof...(Ts) == sizeof...(validatorArguments)) { + requires(sizeof...(Ts) == sizeof...(validatorArguments)) + { // Add the new validator addValidatorFunction( adjustVariantArgument.template operator()(variant), @@ -1235,11 +1236,12 @@ void doValidatorTest( */ auto testGeneratedValidatorsOfConfigManager = [&adjustVariantArgument]( - size_t variantStart, size_t variantEnd, ConfigManager & m, + size_t variantStart, size_t variantEnd, ConfigManager& m, const nlohmann::json& defaultValues, const std::same_as< nlohmann::json::json_pointer> auto&... configOptionPaths) - requires(sizeof...(Ts) == sizeof...(configOptionPaths)) { + requires(sizeof...(Ts) == sizeof...(configOptionPaths)) + { // Using the invariant of our function generator, to create valid // and none valid values for all added validators. for (size_t validatorNumber = variantStart; validatorNumber < variantEnd; @@ -1292,25 +1294,25 @@ void doValidatorTest( here. */ auto doTestNoValidatorInSubManager = - [&addValidatorToConfigManager, & - testGeneratedValidatorsOfConfigManager ]( - ConfigManager & m, const nlohmann::json& defaultValues, + [&addValidatorToConfigManager, + &testGeneratedValidatorsOfConfigManager]( + ConfigManager& m, const nlohmann::json& defaultValues, const std::pair>&... validatorArguments) requires(sizeof...(Ts) == sizeof...(validatorArguments)) { - // How many validators are to be added? - constexpr size_t NUMBER_OF_VALIDATORS{5}; + // How many validators are to be added? + constexpr size_t NUMBER_OF_VALIDATORS{5}; - for (size_t i = 0; i < NUMBER_OF_VALIDATORS; i++) { - // Add a new validator - addValidatorToConfigManager.template operator()( - i, m, validatorArguments.second...); + for (size_t i = 0; i < NUMBER_OF_VALIDATORS; i++) { + // Add a new validator + addValidatorToConfigManager.template operator()( + i, m, validatorArguments.second...); - // Test all the added validators. - testGeneratedValidatorsOfConfigManager.template operator()( - 0, i + 1, m, defaultValues, validatorArguments.first...); - } - }; + // Test all the added validators. + testGeneratedValidatorsOfConfigManager.template operator()( + 0, i + 1, m, defaultValues, validatorArguments.first...); + } + }; /* @brief Do the tests for config manager with one sub manager. The sub manager @@ -1335,13 +1337,14 @@ void doValidatorTest( here. */ auto doTestAlwaysValidatorInSubManager = - [&addValidatorToConfigManager, & - testGeneratedValidatorsOfConfigManager ]( - ConfigManager & m, ConfigManager & subM, + [&addValidatorToConfigManager, + &testGeneratedValidatorsOfConfigManager]( + ConfigManager& m, ConfigManager& subM, const nlohmann::json& defaultValues, const std::pair>&... validatorArguments) - requires(sizeof...(Ts) == sizeof...(validatorArguments)) { + requires(sizeof...(Ts) == sizeof...(validatorArguments)) + { // How many validators are to be added to each of the managers? constexpr size_t NUMBER_OF_VALIDATORS{5}; diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 8d04c001a9..7dc9747842 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -23,7 +23,7 @@ using Vars = std::vector>; } // namespace Result performPathSearch(PathSearchConfiguration config, IdTable input, - Vars vars) { + Vars vars) { auto qec = getQec(); auto subtree = ad_utility::makeExecutionTree( qec, std::move(input), vars); @@ -34,7 +34,10 @@ Result performPathSearch(PathSearchConfiguration config, IdTable input, TEST(PathSearchTest, constructor) { auto qec = getQec(); - PathSearchConfiguration config{ALL_PATHS, V(0), {V(1)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ + ALL_PATHS, V(0), {V(1)}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; PathSearch p = PathSearch(qec, nullptr, config); } @@ -45,7 +48,10 @@ TEST(PathSearchTest, emptyGraph) { expected.setNumColumns(4); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ + ALL_PATHS, V(0), {V(4)}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -66,7 +72,10 @@ TEST(PathSearchTest, singlePath) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ + ALL_PATHS, V(0), {V(4)}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -84,7 +93,14 @@ TEST(PathSearchTest, singlePathWithProperties) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {Var{"?edgeProperty"}}}; + PathSearchConfiguration config{ALL_PATHS, + V(0), + {V(4)}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {Var{"?edgeProperty"}}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -101,7 +117,10 @@ TEST(PathSearchTest, singlePathWithDijkstra) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ + SHORTEST_PATHS, V(0), {V(4)}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -119,7 +138,14 @@ TEST(PathSearchTest, singlePathWithDijkstraAndProperties) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; - PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {Var{"?edgeProperty"}}}; + PathSearchConfiguration config{SHORTEST_PATHS, + V(0), + {V(4)}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {Var{"?edgeProperty"}}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -144,7 +170,10 @@ TEST(PathSearchTest, twoPathsOneTarget) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(2)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ + ALL_PATHS, V(0), {V(2)}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -169,7 +198,10 @@ TEST(PathSearchTest, twoPathsTwoTargets) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(2), V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ALL_PATHS, V(0), + {V(2), V(4)}, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -193,7 +225,10 @@ TEST(PathSearchTest, cycle) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(0)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ + ALL_PATHS, V(0), {V(0)}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -220,7 +255,10 @@ TEST(PathSearchTest, twoCycle) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(0)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ + ALL_PATHS, V(0), {V(0)}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -250,7 +288,10 @@ TEST(PathSearchTest, allPaths) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ + ALL_PATHS, V(0), {}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -276,7 +317,15 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty1"}, Variable{"?edgeProperty2"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {Var{"?edgeProperty2"}, Var{"?edgeProperty1"}}}; + PathSearchConfiguration config{ + ALL_PATHS, + V(0), + {}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {Var{"?edgeProperty2"}, Var{"?edgeProperty1"}}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -302,7 +351,10 @@ TEST(PathSearchTest, singleShortestPath) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ + SHORTEST_PATHS, V(0), {V(4)}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -331,14 +383,16 @@ TEST(PathSearchTest, twoShortestPaths) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(5)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ + SHORTEST_PATHS, V(0), {V(5)}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } - /** * Graph: * 0 -> 1 -> 2 -> 3 -> 4 @@ -356,7 +410,10 @@ TEST(PathSearchTest, singlePathWithIrrelevantNode) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ + ALL_PATHS, V(0), {V(4)}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -373,16 +430,12 @@ TEST(PathSearchTest, shortestPathWithIrrelevantNode) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{SHORTEST_PATHS, V(0), {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ + SHORTEST_PATHS, V(0), {V(4)}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } - - - - - - - diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 1b3a74691d..c8934e5776 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -777,34 +777,25 @@ TEST(QueryPlanner, PathSearchSingleTarget) { auto qec = ad_utility::testing::getQec("

.

"); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); - PathSearchConfiguration config{ - ALL_PATHS, - getId(""), - {getId("")}, - Variable("?start"), - Variable("?end"), - Variable("?path"), - Variable("?edge"), - {} - }; + PathSearchConfiguration config{ALL_PATHS, getId(""), + {getId("")}, Variable("?start"), + Variable("?end"), Variable("?path"), + Variable("?edge"), {}}; h::expect( - "PREFIX pathSearch: " - "SELECT ?start ?end ?path ?edge WHERE {" - "SERVICE pathSearch: {" - "_:path pathSearch:algorithm pathSearch:allPaths ;" - "pathSearch:source ;" - "pathSearch:target ;" - "pathSearch:pathColumn ?path ;" - "pathSearch:edgeColumn ?edge ;" - "pathSearch:start ?start;" - "pathSearch:end ?end;" - "{SELECT * WHERE {" - "?start

?end." - "}}}}", - h::PathSearch( - config, - scan("?start", "

", "?end")), - qec); + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, scan("?start", "

", "?end")), qec); } TEST(QueryPlanner, PathSearchMultipleTargets) { @@ -812,73 +803,61 @@ TEST(QueryPlanner, PathSearchMultipleTargets) { auto qec = ad_utility::testing::getQec("

.

"); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); - PathSearchConfiguration config{ - ALL_PATHS, - getId(""), - {getId(""), getId("")}, - Variable("?start"), - Variable("?end"), - Variable("?path"), - Variable("?edge"), - {} - }; + PathSearchConfiguration config{ALL_PATHS, + getId(""), + {getId(""), getId("")}, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; h::expect( - "PREFIX pathSearch: " - "SELECT ?start ?end ?path ?edge WHERE {" - "SERVICE pathSearch: {" - "_:path pathSearch:algorithm pathSearch:allPaths ;" - "pathSearch:source ;" - "pathSearch:target ;" - "pathSearch:target ;" - "pathSearch:pathColumn ?path ;" - "pathSearch:edgeColumn ?edge ;" - "pathSearch:start ?start;" - "pathSearch:end ?end;" - "{SELECT * WHERE {" - "?start

?end." - "}}}}", - h::PathSearch( - config, - scan("?start", "

", "?end")), - qec); + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, scan("?start", "

", "?end")), qec); } TEST(QueryPlanner, PathSearchWithEdgeProperties) { auto scan = h::IndexScanFromStrings; auto join = h::Join; - auto qec = ad_utility::testing::getQec(" . . . "); + auto qec = ad_utility::testing::getQec( + " . . . "); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); - PathSearchConfiguration config{ - ALL_PATHS, - getId(""), - {getId("")}, - Variable("?start"), - Variable("?end"), - Variable("?path"), - Variable("?edge"), - {Variable("?middle")} - }; + PathSearchConfiguration config{ALL_PATHS, getId(""), + {getId("")}, Variable("?start"), + Variable("?end"), Variable("?path"), + Variable("?edge"), {Variable("?middle")}}; h::expect( - "PREFIX pathSearch: " - "SELECT ?start ?end ?path ?edge WHERE {" - "SERVICE pathSearch: {" - "_:path pathSearch:algorithm pathSearch:allPaths ;" - "pathSearch:source ;" - "pathSearch:target ;" - "pathSearch:pathColumn ?path ;" - "pathSearch:edgeColumn ?edge ;" - "pathSearch:start ?start;" - "pathSearch:end ?end;" - "pathSearch:edgeProperty ?middle;" - "{SELECT * WHERE {" - "?start ?middle." - "?middle ?end." - "}}}}", - h::PathSearch( - config, - join(scan("?start", "", "?middle"), - scan("?middle", "", "?end"))), + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:edgeProperty ?middle;" + "{SELECT * WHERE {" + "?start ?middle." + "?middle ?end." + "}}}}", + h::PathSearch(config, join(scan("?start", "", "?middle"), + scan("?middle", "", "?end"))), qec); } @@ -886,49 +865,46 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { auto scan = h::IndexScanFromStrings; auto join = h::Join; auto qec = ad_utility::testing::getQec( - " ." - " ." - " ." - " ." - " ." - " " - ); + " ." + " ." + " ." + " ." + " ." + " "); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); PathSearchConfiguration config{ - ALL_PATHS, - getId(""), - {getId(""), getId("")}, - Variable("?start"), - Variable("?end"), - Variable("?path"), - Variable("?edge"), - {Variable("?middle"), Variable("?middleAttribute")} - }; + ALL_PATHS, + getId(""), + {getId(""), getId("")}, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {Variable("?middle"), Variable("?middleAttribute")}}; h::expect( - "PREFIX pathSearch: " - "SELECT ?start ?end ?path ?edge WHERE {" - "SERVICE pathSearch: {" - "_:path pathSearch:algorithm pathSearch:allPaths ;" - "pathSearch:source ;" - "pathSearch:target ;" - "pathSearch:target ;" - "pathSearch:pathColumn ?path ;" - "pathSearch:edgeColumn ?edge ;" - "pathSearch:start ?start;" - "pathSearch:end ?end;" - "pathSearch:edgeProperty ?middle;" - "pathSearch:edgeProperty ?middleAttribute;" - "{SELECT * WHERE {" - "?start ?middle." - "?middle ?middleAttribute." - "?middle ?end." - "}}}}", - h::PathSearch( - config, - join(scan("?start", "", "?middle"), - join(scan("?middle", "", "?middleAttribute"), - scan("?middle", "", "?end")))), + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:edgeProperty ?middle;" + "pathSearch:edgeProperty ?middleAttribute;" + "{SELECT * WHERE {" + "?start ?middle." + "?middle ?middleAttribute." + "?middle ?end." + "}}}}", + h::PathSearch(config, + join(scan("?start", "", "?middle"), + join(scan("?middle", "", "?middleAttribute"), + scan("?middle", "", "?end")))), qec); } diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index b3cf541eb8..22fa09f9a1 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -170,7 +170,8 @@ inline auto CountAvailablePredicates = [](size_t subjectColumnIdx, const Variable& predicateVar, const Variable& countVar, const std::same_as auto&... childMatchers) - requires(sizeof...(childMatchers) <= 1) { + requires(sizeof...(childMatchers) <= 1) +{ return RootOperation<::CountAvailablePredicates>(AllOf( AD_PROPERTY(::CountAvailablePredicates, subjectColumnIndex, Eq(subjectColumnIdx)), @@ -262,26 +263,26 @@ inline auto TransitivePath = inline auto PathSearchConfigMatcher = [](PathSearchConfiguration config) { return AllOf( - AD_FIELD(PathSearchConfiguration, algorithm_, Eq(config.algorithm_)), - AD_FIELD(PathSearchConfiguration, source_, Eq(config.source_)), - AD_FIELD(PathSearchConfiguration, start_, Eq(config.start_)), - AD_FIELD(PathSearchConfiguration, targets_, UnorderedElementsAreArray(config.targets_)), - AD_FIELD(PathSearchConfiguration, end_, Eq(config.end_)), - AD_FIELD(PathSearchConfiguration, pathColumn_, Eq(config.pathColumn_)), - AD_FIELD(PathSearchConfiguration, edgeColumn_, Eq(config.edgeColumn_)), - AD_FIELD(PathSearchConfiguration, edgeProperties_, UnorderedElementsAreArray(config.edgeProperties_)) - ); + AD_FIELD(PathSearchConfiguration, algorithm_, Eq(config.algorithm_)), + AD_FIELD(PathSearchConfiguration, source_, Eq(config.source_)), + AD_FIELD(PathSearchConfiguration, start_, Eq(config.start_)), + AD_FIELD(PathSearchConfiguration, targets_, + UnorderedElementsAreArray(config.targets_)), + AD_FIELD(PathSearchConfiguration, end_, Eq(config.end_)), + AD_FIELD(PathSearchConfiguration, pathColumn_, Eq(config.pathColumn_)), + AD_FIELD(PathSearchConfiguration, edgeColumn_, Eq(config.edgeColumn_)), + AD_FIELD(PathSearchConfiguration, edgeProperties_, + UnorderedElementsAreArray(config.edgeProperties_))); }; // Match a PathSearch operation inline auto PathSearch = - [](PathSearchConfiguration config, const std::same_as auto&... childMatchers) { - return RootOperation<::PathSearch>( - AllOf(Property("getChildren", &Operation::getChildren, - ElementsAre(Pointee(childMatchers)...)), - AD_PROPERTY(PathSearch, getConfig, PathSearchConfigMatcher(config)) - ) - ); + [](PathSearchConfiguration config, + const std::same_as auto&... childMatchers) { + return RootOperation<::PathSearch>(AllOf( + Property("getChildren", &Operation::getChildren, + ElementsAre(Pointee(childMatchers)...)), + AD_PROPERTY(PathSearch, getConfig, PathSearchConfigMatcher(config)))); }; // Match a sort operation. Currently, this is only required by the binary search From 19027e245e0a424555145bcd0a3708a730fe0a7e Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 24 Jun 2024 11:52:16 +0200 Subject: [PATCH 20/96] Fix the format fix This reverts commit d700df2106d89971c0909aecc833a75bf45d3167. --- src/engine/CMakeLists.txt | 34 +- src/engine/GroupByHashMapOptimization.h | 5 +- src/engine/idTable/IdTableRow.h | 6 +- src/engine/raw_thoughts.txt | 2 + src/engine/sparqlExpressions/CMakeLists.txt | 31 +- .../ConditionalExpressions.cpp | 68 +- .../SparqlExpressionGenerators.h | 75 +- .../sparqlExpressions/StringExpressions.cpp | 5 +- src/parser/CMakeLists.txt | 45 +- src/parser/sparqlParser/CMakeLists.txt | 26 +- .../sparqlParser/SparqlQleverVisitor.cpp | 34 +- src/util/CMakeLists.txt | 13 +- src/util/ComparisonWithNan.h | 42 +- test/CMakeLists.txt | 810 ++++++------------ test/ConfigManagerTest.cpp | 49 +- test/QueryPlannerTestHelpers.h | 3 +- 16 files changed, 489 insertions(+), 759 deletions(-) diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 166db6fd00..8c628add78 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -1,19 +1,17 @@ add_subdirectory(sparqlExpressions) - add_library(SortPerformanceEstimator SortPerformanceEstimator.cpp) - qlever_target_link_libraries(SortPerformanceEstimator) add_library( - engine Engine.cpp QueryExecutionTree.cpp Operation.cpp Result - .cpp LocalVocab.cpp IndexScan.cpp Join.cpp Sort.cpp Distinct - .cpp OrderBy.cpp Filter.cpp Server.cpp QueryPlanner - .cpp QueryPlanningCostFactors.cpp OptionalJoin - .cpp CountAvailablePredicates.cpp GroupBy.cpp HasPredicateScan - .cpp Union.cpp MultiColumnJoin.cpp TransitivePathBase - .cpp TransitivePathHashMap.cpp TransitivePathBinSearch - .cpp Service.cpp Values.cpp Bind.cpp Minus - .cpp RuntimeInformation.cpp CheckUsePatternTrick - .cpp VariableToColumnMap.cpp ExportQueryExecutionTrees - .cpp CartesianProductJoin.cpp TextIndexScanForWord - .cpp TextIndexScanForEntity.cpp TextLimit.cpp idTable / - CompressedExternalIdTable.h PathSearch.cpp) - qlever_target_link_libraries( - engine util index parser sparqlExpressions http - SortPerformanceEstimator Boost::iostreams) +add_library(SortPerformanceEstimator SortPerformanceEstimator.cpp) +qlever_target_link_libraries(SortPerformanceEstimator) +add_library(engine + Engine.cpp QueryExecutionTree.cpp Operation.cpp Result.cpp LocalVocab.cpp + IndexScan.cpp Join.cpp Sort.cpp + Distinct.cpp OrderBy.cpp Filter.cpp + Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp + OptionalJoin.cpp CountAvailablePredicates.cpp GroupBy.cpp HasPredicateScan.cpp + Union.cpp MultiColumnJoin.cpp TransitivePathBase.cpp + TransitivePathHashMap.cpp TransitivePathBinSearch.cpp Service.cpp + Values.cpp Bind.cpp Minus.cpp RuntimeInformation.cpp CheckUsePatternTrick.cpp + VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp + CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp + TextLimit.cpp + idTable/CompressedExternalIdTable.h PathSearch.cpp) +qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams) diff --git a/src/engine/GroupByHashMapOptimization.h b/src/engine/GroupByHashMapOptimization.h index 8dc52b01ce..d0be51bf36 100644 --- a/src/engine/GroupByHashMapOptimization.h +++ b/src/engine/GroupByHashMapOptimization.h @@ -15,8 +15,9 @@ static constexpr auto valueAdder = []() { auto numericValueAdder = [](T value, double& sum, [[maybe_unused]] const bool& error) - requires std::is_arithmetic_v - { sum += static_cast(value); }; + requires std::is_arithmetic_v { + sum += static_cast(value); + }; auto nonNumericValueAdder = [](sparqlExpression::detail::NotNumeric, [[maybe_unused]] const double& sum, bool& error) { error = true; }; diff --git a/src/engine/idTable/IdTableRow.h b/src/engine/idTable/IdTableRow.h index b9b0c3a7fe..21294df659 100644 --- a/src/engine/idTable/IdTableRow.h +++ b/src/engine/idTable/IdTableRow.h @@ -299,9 +299,9 @@ class RowReferenceImpl { } // Assignment from a `const` RowReference to a `mutable` RowReference - This& operator=( - const RowReferenceWithRestrictedAccess< - Table, ad_utility::IsConst::True>& other) && requires(!isConst) { + This& operator=(const RowReferenceWithRestrictedAccess< + Table, ad_utility::IsConst::True>& other) && + requires(!isConst) { return assignmentImpl(*this, other); } diff --git a/src/engine/raw_thoughts.txt b/src/engine/raw_thoughts.txt index a617bf7616..df5ebe05bb 100644 --- a/src/engine/raw_thoughts.txt +++ b/src/engine/raw_thoughts.txt @@ -146,3 +146,5 @@ I don't see a problem why it couldn't also be a parameter per context variable. HARDER THAN BROCCOLI: cross product with contexts cross product (not full) with co-occurring n-tuples of entities (?x ?y ?z - see graphs above). + + diff --git a/src/engine/sparqlExpressions/CMakeLists.txt b/src/engine/sparqlExpressions/CMakeLists.txt index 9dd47c45e1..c8c998f1a1 100644 --- a/src/engine/sparqlExpressions/CMakeLists.txt +++ b/src/engine/sparqlExpressions/CMakeLists.txt @@ -1,11 +1,22 @@ -add_library( - sparqlExpressions SparqlExpressionValueGetters.cpp NaryExpression - .cpp SetOfIntervals.cpp SparqlExpressionPimpl.cpp SampleExpression - .cpp RelationalExpressions.cpp AggregateExpression.cpp RegexExpression - .cpp LangExpression.cpp NumericUnaryExpressions - .cpp NumericBinaryExpressions.cpp DateExpressions.cpp StringExpressions - .cpp IsSomethingExpressions.cpp ConditionalExpressions - .cpp SparqlExpressionTypes.cpp SparqlExpression - .cpp ConvertToNumericExpression.cpp RdfTermExpressions.cpp) +add_library(sparqlExpressions + SparqlExpressionValueGetters.cpp + NaryExpression.cpp + SetOfIntervals.cpp + SparqlExpressionPimpl.cpp + SampleExpression.cpp + RelationalExpressions.cpp + AggregateExpression.cpp + RegexExpression.cpp + LangExpression.cpp + NumericUnaryExpressions.cpp + NumericBinaryExpressions.cpp + DateExpressions.cpp + StringExpressions.cpp + IsSomethingExpressions.cpp + ConditionalExpressions.cpp + SparqlExpressionTypes.cpp + SparqlExpression.cpp + ConvertToNumericExpression.cpp + RdfTermExpressions.cpp) - qlever_target_link_libraries(sparqlExpressions util index Boost::url) +qlever_target_link_libraries(sparqlExpressions util index Boost::url) diff --git a/src/engine/sparqlExpressions/ConditionalExpressions.cpp b/src/engine/sparqlExpressions/ConditionalExpressions.cpp index 2f0454934b..29aec5a7b2 100644 --- a/src/engine/sparqlExpressions/ConditionalExpressions.cpp +++ b/src/engine/sparqlExpressions/ConditionalExpressions.cpp @@ -66,36 +66,35 @@ class CoalesceExpression : public VariadicExpression { auto visitConstantExpressionResult = [&nextUnboundIndices, &unboundIndices, &isUnbound, &result, - ctx](T&& childResult) + ctx ](T && childResult) requires isConstantResult { - IdOrLiteralOrIri constantResult{AD_FWD(childResult)}; - if (isUnbound(constantResult)) { - nextUnboundIndices = std::move(unboundIndices); - return; - } - ad_utility::chunkedForLoop( - 0, unboundIndices.size(), - [&unboundIndices, &result, &constantResult](size_t idx) { - // GCC 12 & 13 report this as potential uninitialized - // use of a variable when compiling with -O3, which seems to - // be a false positive, so we suppress the warning here. See - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109561 for - // more information. - DISABLE_UNINITIALIZED_WARNINGS - result[unboundIndices[idx]] = constantResult; - }, - [ctx]() { ctx->cancellationHandle_->throwIfCancelled(); }); - }; + IdOrLiteralOrIri constantResult{AD_FWD(childResult)}; + if (isUnbound(constantResult)) { + nextUnboundIndices = std::move(unboundIndices); + return; + } + ad_utility::chunkedForLoop( + 0, unboundIndices.size(), + [&unboundIndices, &result, &constantResult](size_t idx) { + // GCC 12 & 13 report this as potential uninitialized + // use of a variable when compiling with -O3, which seems to + // be a false positive, so we suppress the warning here. See + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109561 for + // more information. + DISABLE_UNINITIALIZED_WARNINGS + result[unboundIndices[idx]] = constantResult; + }, + [ctx]() { ctx->cancellationHandle_->throwIfCancelled(); }); + }; ENABLE_UNINITIALIZED_WARNINGS // For a single child result, write the result at the indices where the // result so far is unbound, and the child result is bound. While doing so, // set up the `nextUnboundIndices` vector for the next step. auto visitVectorExpressionResult = - [&result, &unboundIndices, &nextUnboundIndices, &ctx, - &isUnbound](T&& childResult) - requires std::is_rvalue_reference_v - { + [&result, &unboundIndices, &nextUnboundIndices, &ctx, & + isUnbound ](T && childResult) + requires std::is_rvalue_reference_v { static_assert(!isConstantResult); auto gen = detail::makeGenerator(AD_FWD(childResult), ctx->size(), ctx); // Iterator to the next index where the result so far is unbound. @@ -127,17 +126,18 @@ class CoalesceExpression : public VariadicExpression { [ctx]() { ctx->cancellationHandle_->throwIfCancelled(); }); }; auto visitExpressionResult = - [&visitConstantExpressionResult, - &visitVectorExpressionResult]( - T&& childResult) requires std::is_rvalue_reference_v { - // If the previous expression result is a constant, we can skip the - // loop. - if constexpr (isConstantResult) { - visitConstantExpressionResult(AD_FWD(childResult)); - } else { - visitVectorExpressionResult(AD_FWD(childResult)); - } - }; + [ + &visitConstantExpressionResult, &visitVectorExpressionResult + ](T && childResult) + requires std::is_rvalue_reference_v { + // If the previous expression result is a constant, we can skip the + // loop. + if constexpr (isConstantResult) { + visitConstantExpressionResult(AD_FWD(childResult)); + } else { + visitVectorExpressionResult(AD_FWD(childResult)); + } + }; // Evaluate the children one by one, stopping as soon as all result are // bound. diff --git a/src/engine/sparqlExpressions/SparqlExpressionGenerators.h b/src/engine/sparqlExpressions/SparqlExpressionGenerators.h index d56dff1c67..6197cf0e32 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionGenerators.h +++ b/src/engine/sparqlExpressions/SparqlExpressionGenerators.h @@ -15,9 +15,9 @@ namespace sparqlExpression::detail { /// Convert a variable to a vector of all the Ids it is bound to in the /// `context`. -inline std::span getIdsFromVariable( - const ::Variable& variable, const EvaluationContext* context, - size_t beginIndex, size_t endIndex) { +inline std::span getIdsFromVariable(const ::Variable& variable, + const EvaluationContext* context, + size_t beginIndex, size_t endIndex) { const auto& inputTable = context->_inputTable; const auto& varToColMap = context->_variableToColumnMap; @@ -28,18 +28,15 @@ inline std::span getIdsFromVariable( std::span completeColumn = inputTable.getColumn(columnIndex); - AD_CONTRACT_CHECK(beginIndex <= endIndex && - endIndex <= completeColumn.size()); - return {completeColumn.begin() + beginIndex, - completeColumn.begin() + endIndex}; + AD_CONTRACT_CHECK(beginIndex <= endIndex && endIndex <= completeColumn.size()); + return {completeColumn.begin() + beginIndex, completeColumn.begin() + endIndex}; } // Overload that reads the `beginIndex` and the `endIndex` directly from the // `context -inline std::span getIdsFromVariable( - const ::Variable& variable, const EvaluationContext* context) { - return getIdsFromVariable(variable, context, context->_beginIndex, - context->_endIndex); +inline std::span getIdsFromVariable(const ::Variable& variable, + const EvaluationContext* context) { + return getIdsFromVariable(variable, context, context->_beginIndex, context->_endIndex); } /// Generators that yield `numItems` items for the various @@ -48,9 +45,8 @@ inline std::span getIdsFromVariable( /// `SparqlExpressionValueGetters` with an already bound `EvaluationContext`. template requires isConstantResult && std::invocable -cppcoro::generator>> -resultGenerator(T constant, size_t numItems, - Transformation transformation = {}) { +cppcoro::generator>> resultGenerator( + T constant, size_t numItems, Transformation transformation = {}) { auto transformed = transformation(constant); for (size_t i = 0; i < numItems; ++i) { co_yield transformed; @@ -59,10 +55,9 @@ resultGenerator(T constant, size_t numItems, template requires std::ranges::input_range -auto resultGenerator(T vector, size_t numItems, - Transformation transformation = {}) - -> cppcoro::generator>>> { +auto resultGenerator(T vector, size_t numItems, Transformation transformation = {}) + -> cppcoro::generator>>> { AD_CONTRACT_CHECK(numItems == vector.size()); for (auto& element : vector) { auto cpy = transformation(std::move(element)); @@ -71,8 +66,7 @@ auto resultGenerator(T vector, size_t numItems, } template -inline cppcoro::generator< - const std::decay_t>> +inline cppcoro::generator>> resultGenerator(ad_utility::SetOfIntervals set, size_t targetSize, Transformation transformation = {}) { size_t i = 0; @@ -96,43 +90,36 @@ resultGenerator(ad_utility::SetOfIntervals set, size_t targetSize, /// Return a generator that yields `numItems` many items for the various /// `SingleExpressionResult` template -auto makeGenerator(Input&& input, size_t numItems, - const EvaluationContext* context, +auto makeGenerator(Input&& input, size_t numItems, const EvaluationContext* context, Transformation transformation = {}) { if constexpr (ad_utility::isSimilar<::Variable, Input>) { std::span inputWithVariableResolved{ getIdsFromVariable(std::forward(input), context)}; return resultGenerator(inputWithVariableResolved, numItems, transformation); } else { - return resultGenerator(std::forward(input), numItems, - transformation); + return resultGenerator(std::forward(input), numItems, transformation); } } /// Generate `numItems` many values from the `input` and apply the /// `valueGetter` to each of the values. -inline auto valueGetterGenerator = - []( - size_t numElements, EvaluationContext* context, Input&& input, - ValueGetter&& valueGetter) { - auto transformation = - [context, valueGetter](I&& i) - requires std::invocable { - context->cancellationHandle_->throwIfCancelled(); - return valueGetter(AD_FWD(i), context); - }; - return makeGenerator(std::forward(input), numElements, context, - transformation); - }; +inline auto valueGetterGenerator = []( + size_t numElements, EvaluationContext* context, + Input&& input, ValueGetter&& valueGetter) { + auto transformation = [ context, valueGetter ](I && i) + requires std::invocable { + context->cancellationHandle_->throwIfCancelled(); + return valueGetter(AD_FWD(i), context); + }; + return makeGenerator(std::forward(input), numElements, context, transformation); +}; /// Do the following `numItems` times: Obtain the next elements e_1, ..., e_n /// from the `generators` and yield `function(e_1, ..., e_n)`, also as a /// generator. inline auto applyFunction = []( - Function&& function, size_t numItems, - Generators... generators) - -> cppcoro::generator< - std::invoke_result_t> { + Function&& function, size_t numItems, Generators... generators) + -> cppcoro::generator> { // A tuple holding one iterator to each of the generators. std::tuple iterators{generators.begin()...}; @@ -163,8 +150,7 @@ auto applyOperation(size_t numElements, Operation&&, EvaluationContext* context, // Function that takes all the generators as a parameter pack and computes the // generator for the operation result; - auto getResultFromGenerators = - std::bind_front(applyFunction, Function{}, numElements); + auto getResultFromGenerators = std::bind_front(applyFunction, Function{}, numElements); /// The `ValueGetters` are stored in a `std::tuple`, so we have to extract /// them via `std::apply`. First set up a lambda that performs the actual @@ -173,8 +159,7 @@ auto applyOperation(size_t numElements, Operation&&, EvaluationContext* context, // Both `operands` and `valueGetters` are parameter packs of equal size, // so there will be one call to `getValue` for each pair of // (`operands`, `valueGetter`) - return getResultFromGenerators( - getValue(std::forward(operands), valueGetters)...); + return getResultFromGenerators(getValue(std::forward(operands), valueGetters)...); }; return std::apply(getResultFromValueGetters, ValueGetters{}); diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index ef9ea13b08..d8828fa748 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -316,9 +316,8 @@ class ConcatExpression : public detail::VariadicExpression { // were constants (see above). std::variant result{std::string{""}}; auto visitSingleExpressionResult = - [&ctx, &result](T&& s) - requires std::is_rvalue_reference_v - { + [&ctx, &result ](T && s) + requires std::is_rvalue_reference_v { if constexpr (isConstantResult) { std::string strFromConstant = StringValueGetter{}(s, ctx).value_or(""); if (std::holds_alternative(result)) { diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt index d9f031160b..29ca836c71 100644 --- a/src/parser/CMakeLists.txt +++ b/src/parser/CMakeLists.txt @@ -1,22 +1,29 @@ add_library(rdfEscaping RdfEscaping.h RdfEscaping.cpp) - qlever_target_link_libraries(rdfEscaping) +qlever_target_link_libraries(rdfEscaping) - add_subdirectory(sparqlParser) add_subdirectory(data) +add_subdirectory(sparqlParser) +add_subdirectory(data) + +add_library(parser + sparqlParser/SparqlQleverVisitor.cpp + SparqlParser.cpp + ParsedQuery.cpp + TurtleParser.cpp + Tokenizer.cpp + ContextFileParser.cpp + TurtleTokenId.h + ParallelBuffer.cpp + SparqlParserHelpers.cpp + TripleComponent.cpp + GraphPatternOperation.cpp + PropertyPath.cpp + data/SparqlFilter.cpp + SelectClause.cpp + GraphPatternOperation.cpp + # The `Variable.cpp` from the subdirectory is linked here because otherwise we get linking errors. + GraphPattern.cpp data/Variable.cpp + Iri.cpp + Literal.cpp + LiteralOrIri.cpp) +qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2::re2 util engine) - add_library( - parser sparqlParser / - SparqlQleverVisitor.cpp SparqlParser.cpp ParsedQuery - .cpp TurtleParser.cpp Tokenizer.cpp ContextFileParser - .cpp TurtleTokenId.h ParallelBuffer.cpp SparqlParserHelpers - .cpp TripleComponent.cpp GraphPatternOperation - .cpp PropertyPath.cpp data / - SparqlFilter.cpp SelectClause.cpp GraphPatternOperation - .cpp -#The `Variable \ - .cpp` from the subdirectory is linked here because otherwise we get \ - linking errors. - GraphPattern.cpp data / - Variable.cpp Iri.cpp Literal.cpp LiteralOrIri.cpp) - qlever_target_link_libraries( - parser sparqlParser parserData sparqlExpressions rdfEscaping - re2::re2 util engine) diff --git a/src/parser/sparqlParser/CMakeLists.txt b/src/parser/sparqlParser/CMakeLists.txt index fd1fd85ba6..3ba1f47429 100644 --- a/src/parser/sparqlParser/CMakeLists.txt +++ b/src/parser/sparqlParser/CMakeLists.txt @@ -1,16 +1,10 @@ -add_library(sparqlParser SparqlQleverVisitor.h generated / - SparqlAutomaticBaseListener.h generated / - SparqlAutomaticBaseListener.cpp generated / - SparqlAutomaticLexer.h generated / - SparqlAutomaticLexer.cpp generated / - SparqlAutomaticListener.h generated / - SparqlAutomaticListener.cpp generated / - SparqlAutomaticParser.h generated / - SparqlAutomaticParser.cpp generated / - SparqlAutomaticVisitor.h generated / SparqlAutomaticVisitor.cpp) - qlever_target_link_libraries( - sparqlParser antlr4_static sparqlExpressions rdfEscaping util) -#Silence warnings in files that are auto - generated by ANTLR. -#TODO < joka921> Submit a pull request to ANTLR to fix those warnings. - target_compile_options(sparqlParser PRIVATE - Wno - logical - op - - parentheses - Wno - parentheses) +add_library(sparqlParser SparqlQleverVisitor.h + generated/SparqlAutomaticBaseListener.h generated/SparqlAutomaticBaseListener.cpp + generated/SparqlAutomaticLexer.h generated/SparqlAutomaticLexer.cpp + generated/SparqlAutomaticListener.h generated/SparqlAutomaticListener.cpp + generated/SparqlAutomaticParser.h generated/SparqlAutomaticParser.cpp + generated/SparqlAutomaticVisitor.h generated/SparqlAutomaticVisitor.cpp) +qlever_target_link_libraries(sparqlParser antlr4_static sparqlExpressions rdfEscaping util) +# Silence warnings in files that are auto-generated by ANTLR. +# TODO Submit a pull request to ANTLR to fix those warnings. +target_compile_options(sparqlParser PRIVATE -Wno-logical-op-parentheses -Wno-parentheses) diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index c378b69781..da5320a7a3 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -1987,28 +1987,24 @@ ExpressionPtr Visitor::visit([[maybe_unused]] Parser::BuiltInCallContext* ctx) { using namespace sparqlExpression; // Create the expression using the matching factory function from // `NaryExpression.h`. - auto createUnary = - [&argList](Function function) - requires std::is_invocable_r_v - { + auto createUnary = [&argList](Function function) + requires std::is_invocable_r_v { AD_CORRECTNESS_CHECK(argList.size() == 1, argList.size()); return function(std::move(argList[0])); }; - auto createBinary = - [&argList](Function function) - requires std::is_invocable_r_v { - AD_CORRECTNESS_CHECK(argList.size() == 2); - return function(std::move(argList[0]), std::move(argList[1])); - }; - auto createTernary = - [&argList](Function function) - requires std::is_invocable_r_v { - AD_CORRECTNESS_CHECK(argList.size() == 3); - return function(std::move(argList[0]), std::move(argList[1]), - std::move(argList[2])); - }; + auto createBinary = [&argList](Function function) + requires std::is_invocable_r_v { + AD_CORRECTNESS_CHECK(argList.size() == 2); + return function(std::move(argList[0]), std::move(argList[1])); + }; + auto createTernary = [&argList](Function function) + requires std::is_invocable_r_v { + AD_CORRECTNESS_CHECK(argList.size() == 3); + return function(std::move(argList[0]), std::move(argList[1]), + std::move(argList[2])); + }; if (functionName == "str") { return createUnary(&makeStrExpression); } else if (functionName == "strlang") { diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index ae38fa02aa..e11014b318 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -1,8 +1,5 @@ -add_subdirectory(ConfigManager) add_subdirectory(MemorySize) - add_subdirectory(http) - add_library(util GeoSparqlHelpers.cpp antlr / - ANTLRErrorHandling.cpp ParseException.cpp Conversions - .cpp Date.cpp antlr / - GenerateAntlrExceptionMetadata.cpp CancellationHandle - .cpp StringUtils.cpp) - qlever_target_link_libraries(util re2::re2) +add_subdirectory(ConfigManager) +add_subdirectory(MemorySize) +add_subdirectory(http) +add_library(util GeoSparqlHelpers.cpp antlr/ANTLRErrorHandling.cpp ParseException.cpp Conversions.cpp Date.cpp antlr/GenerateAntlrExceptionMetadata.cpp CancellationHandle.cpp StringUtils.cpp) +qlever_target_link_libraries(util re2::re2) diff --git a/src/util/ComparisonWithNan.h b/src/util/ComparisonWithNan.h index 208b3307e4..748ba429a5 100644 --- a/src/util/ComparisonWithNan.h +++ b/src/util/ComparisonWithNan.h @@ -26,27 +26,27 @@ namespace ad_utility { template inline auto makeComparatorForNans(Comparator comparator) { return [comparator](const A& a, const B& b) - requires std::is_invocable_r_v { - auto isNan = [](const T& t) { - if constexpr (std::is_floating_point_v) { - return std::isnan(t); - } else { - (void)t; - return false; - } - }; + requires std::is_invocable_r_v { + auto isNan = [](const T& t) { + if constexpr (std::is_floating_point_v) { + return std::isnan(t); + } else { + (void)t; + return false; + } + }; - bool aIsNan = isNan(a); - bool bIsNan = isNan(b); - if (aIsNan && bIsNan) { - return comparator(0.0, 0.0); - } else if (aIsNan) { - return comparator(1.0, 0.0); - } else if (bIsNan) { - return comparator(0.0, 1.0); - } else { - return comparator(a, b); - } - }; + bool aIsNan = isNan(a); + bool bIsNan = isNan(b); + if (aIsNan && bIsNan) { + return comparator(0.0, 0.0); + } else if (aIsNan) { + return comparator(1.0, 0.0); + } else if (bIsNan) { + return comparator(0.0, 1.0); + } else { + return comparator(a, b); + } + }; } } // namespace ad_utility diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e2c110ad2b..6e74705935 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,653 +1,397 @@ include(GoogleTest) -#Needed for creating the `testUil`- library. - add_subdirectory(util) - -#Link binary ${basename } against `gmock_main`, the threading library, the -#general test utilities and all libraries that are specified as additional -#arguments. - function(linkTest basename) qlever_target_link_libraries(${basename} ${ - ARGN} GTest::gtest GTest::gmock_main testUtil ${ - CMAKE_THREAD_LIBS_INIT}) endfunction() - -#Add the executable ${basename } that is compiled from the source file -#"${basename}".cpp - function(addTest basename) add_executable($ { - basename - } "${basename}.cpp") endfunction() - -#Usage : `linkAndDiscoverTest(basename, [additionalLibraries...]` -#Link the executable `basename` against `gmock_main`, threading library, -#and all `additionLibraries` which are passed as arguments. -#Then run `gtest_discover_tests` to add the tests cases from the executable. -#Typically you should use `addAndLinkTest` ( \ - below)but this function can be used, -#if a test binary requires multiple sources - function(linkAndDiscoverTest basename) linkTest(${basename} ${ - ARGN}) gtest_discover_tests(${basename} ${ - basename} DISCOVERY_TIMEOUT 600) endfunction() - -#Usage : `linkAndDiscoverTestSerial(basename, [additionalLibraries...]` -#Similar to `linkAndDiscoverTestSerial` but also requires that the test is run \ - serially -#(without any of the other test cases running in parallel).This can be -#required e.g.if several tests cases write to the same file. - function(linkAndDiscoverTestSerial basename) linkTest(${ - basename} ${ARGN}) gtest_discover_tests(${basename} ${ - basename} PROPERTIES RUN_SERIAL TRUE) endfunction() - - if (SINGLE_TEST_BINARY) message( - STATUS - "All tests are linked into a single executable " - "`QLeverAllUnitTestsMain`") add_executable(QLeverAllUnitTestsMain) - qlever_target_link_libraries( - QLeverAllUnitTestsMain gtest gmock_main testUtil - ${CMAKE_THREAD_LIBS_INIT}) - gtest_discover_tests( - QLeverAllUnitTestsMain - QLeverAllUnitTestsMain PROPERTIES - RUN_SERIAL - TRUE) else() message(STATUS - "The " - "tests " - "are " - "split " - "over " - "multiple " - "binaries") - - endif() -#Usage : `addAndLinkTest(basename, [additionalLibraries...]` -#Add a GTest / \ - GMock test case that is called `basename` and compiled from a file called -# `basename.cpp` \ - .All tests are linked against `gmock_main` and the threading library. -#additional libraries against which the test case has to be linked can be \ - specified as -#additional arguments after the `basename` - function(addLinkAndDiscoverTest basename) if (SINGLE_TEST_BINARY) target_sources( - QLeverAllUnitTestsMain PUBLIC ${ - basename} - .cpp) qlever_target_link_libraries(QLeverAllUnitTestsMain ${ - ARGN}) else() addTest(${ - basename}) linkAndDiscoverTest(${ - basename} ${ARGN}) endif() - - endfunction() - -#Usage : `addAndLinkTestSerial(basename, [additionalLibraries...]` -#Similar to `addAndLinkTest` but also requires that the test is run serially -#(without any of the other test cases running in parallel).This can be -#required e.g.if several tests cases write to the same file. - function( - addLinkAndDiscoverTestSerial - basename) if (SINGLE_TEST_BINARY) - target_sources( - QLeverAllUnitTestsMain - PUBLIC ${basename} - .cpp) - qlever_target_link_libraries( - QLeverAllUnitTestsMain - ${ARGN}) else() addTest(${ - basename}) linkAndDiscoverTestSerial(${ - basename} ${ - ARGN}) endif() endfunction() - -#Only compile and link the test, but do not run it. -#Usage : Same as for the two functions above. - function( - addAndLinkTest - basename) - addTest(${ - basename}) - linkTest(${ - basename} ${ - ARGN}) - endfunction() - - add_subdirectory( - engine) - add_subdirectory( - parser) - add_subdirectory( - index) - - addLinkAndDiscoverTest( - ValueIdComparatorsTest - util) - - addLinkAndDiscoverTest( - SparqlParserTest parser engine - sparqlExpressions) - - addLinkAndDiscoverTest( - StringUtilsTest util) - - addLinkAndDiscoverTest( - CryptographicHashUtilsTest - util) - - addLinkAndDiscoverTest( - CacheTest) - - addLinkAndDiscoverTest( - ConcurrentCacheTest) - -#This test also seems to use the same filenames and should be fixed. - addLinkAndDiscoverTestSerial( - FileTest) - - addLinkAndDiscoverTest( - Simple8bTest) - - addLinkAndDiscoverTest( - ContextFileParserTest - parser) - - addLinkAndDiscoverTest( - IndexMetaDataTest - index) - -#Stxxl currently always uses a file./ -stxxl.disk for all indices, which -#makes it impossible to run the test cases for the Index class in parallel. -#TODO < qup42, joka921> fix this - addLinkAndDiscoverTestSerial( - IndexTest - index) - - addLinkAndDiscoverTest( - EngineTest - engine) - - addLinkAndDiscoverTest( - JoinTest - engine) - - addLinkAndDiscoverTest(TextLimitOperationTest engine) - - addLinkAndDiscoverTest( - QueryPlannerTest - engine) - - addLinkAndDiscoverTest( - HashMapTest) - - addLinkAndDiscoverTest( - HashSetTest) - - addLinkAndDiscoverTestSerial( - GroupByTest engine) - - addLinkAndDiscoverTest( - VocabularyGeneratorTest - index) - - addLinkAndDiscoverTest( - HasPredicateScanTest - engine) - - addLinkAndDiscoverTest( - MmapVectorTest) - -#BufferedVectorTest also uses conflicting filenames. - addLinkAndDiscoverTestSerial( - BufferedVectorTest) - - addLinkAndDiscoverTest(UnionTest - engine) - - if (SINGLE_TEST_BINARY) - target_sources( - QLeverAllUnitTestsMain - PUBLIC - TokenTest - .cpp TokenTestCtreHelper - .cpp) qlever_target_link_libraries(QLeverAllUnitTestsMain parser - re2 util) else() - add_executable(TokenTest TokenTest - .cpp - TokenTestCtreHelper - .cpp) - linkAndDiscoverTest(TokenTest parser re2 util) - endif() +# Needed for creating the `testUil`-library. +add_subdirectory(util) + +# Link binary ${basename} against `gmock_main`, the threading library, the +# general test utilities and all libraries that are specified as additional +# arguments. +function (linkTest basename) + qlever_target_link_libraries(${basename} ${ARGN} GTest::gtest GTest::gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) +endfunction() + +# Add the executable ${basename} that is compiled from the source file +# "${basename}".cpp +function (addTest basename) + add_executable(${basename} "${basename}.cpp") +endfunction() + +# Usage: `linkAndDiscoverTest(basename, [additionalLibraries...]` +# Link the executable `basename` against `gmock_main`,threading library, +# and all `additionLibraries` which are passed as arguments. +# Then run `gtest_discover_tests` to add the tests cases from the executable. +# Typically you should use `addAndLinkTest` (below) but this function can be used, +# if a test binary requires multiple sources +function(linkAndDiscoverTest basename) + linkTest(${basename} ${ARGN}) + gtest_discover_tests(${basename} ${basename} DISCOVERY_TIMEOUT 600) +endfunction() + +# Usage: `linkAndDiscoverTestSerial(basename, [additionalLibraries...]` +# Similar to `linkAndDiscoverTestSerial` but also requires that the test is run serially +# (without any of the other test cases running in parallel). This can be +# required e.g. if several tests cases write to the same file. +function(linkAndDiscoverTestSerial basename) + linkTest(${basename} ${ARGN}) + gtest_discover_tests(${basename} ${basename} PROPERTIES RUN_SERIAL + TRUE) +endfunction() + +if (SINGLE_TEST_BINARY) + message(STATUS "All tests are linked into a single executable `QLeverAllUnitTestsMain`") + add_executable(QLeverAllUnitTestsMain) + qlever_target_link_libraries(QLeverAllUnitTestsMain gtest gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) + gtest_discover_tests(QLeverAllUnitTestsMain QLeverAllUnitTestsMain PROPERTIES RUN_SERIAL + TRUE) +else() + message(STATUS "The tests are split over multiple binaries") + +endif() +# Usage: `addAndLinkTest(basename, [additionalLibraries...]` +# Add a GTest/GMock test case that is called `basename` and compiled from a file called +# `basename.cpp`. All tests are linked against `gmock_main` and the threading library. +# additional libraries against which the test case has to be linked can be specified as +# additional arguments after the `basename` +function(addLinkAndDiscoverTest basename) + if (SINGLE_TEST_BINARY) + target_sources(QLeverAllUnitTestsMain PUBLIC ${basename}.cpp) + qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) + else() + addTest(${basename}) + linkAndDiscoverTest(${basename} ${ARGN}) + endif() - addLinkAndDiscoverTestSerial( - TurtleParserTest - parser re2) +endfunction() - addLinkAndDiscoverTest(MultiColumnJoinTest - engine) +# Usage: `addAndLinkTestSerial(basename, [additionalLibraries...]` +# Similar to `addAndLinkTest` but also requires that the test is run serially +# (without any of the other test cases running in parallel). This can be +# required e.g. if several tests cases write to the same file. +function(addLinkAndDiscoverTestSerial basename) + if (SINGLE_TEST_BINARY) + target_sources(QLeverAllUnitTestsMain PUBLIC ${basename}.cpp) + qlever_target_link_libraries(QLeverAllUnitTestsMain ${ARGN}) + else() + addTest(${basename}) + linkAndDiscoverTestSerial(${basename} ${ARGN}) + endif() +endfunction() - addLinkAndDiscoverTest( - IdTableTest util) +# Only compile and link the test, but do not run it. +# Usage: Same as for the two functions above. +function(addAndLinkTest basename) + addTest(${basename}) + linkTest(${basename} ${ARGN}) +endfunction() - addLinkAndDiscoverTest( - TransitivePathTest - engine) +add_subdirectory(engine) +add_subdirectory(parser) +add_subdirectory(index) - addLinkAndDiscoverTest( - PathSearchTest - engine) +addLinkAndDiscoverTest(ValueIdComparatorsTest util) - addLinkAndDiscoverTest( - BatchedPipelineTest) +addLinkAndDiscoverTest(SparqlParserTest parser engine sparqlExpressions) - addLinkAndDiscoverTest( - TupleHelpersTest) +addLinkAndDiscoverTest(StringUtilsTest util) - addLinkAndDiscoverTest( - StringSortComparatorTest) - - addLinkAndDiscoverTest( - PriorityQueueTest) - - addLinkAndDiscoverTest( - SynchronizedTest) +addLinkAndDiscoverTest(CryptographicHashUtilsTest util) - addLinkAndDiscoverTest( - AllocatorWithLimitTest) +addLinkAndDiscoverTest(CacheTest) - addLinkAndDiscoverTest( - MinusTest - engine) +addLinkAndDiscoverTest(ConcurrentCacheTest) -#this test runs for quite some time and might have spurious failures ! -#Therefore it is compiled, but not run.If you want to run it, -#change the following two lines. - addAndLinkTest( - SortPerformanceEstimatorTest - SortPerformanceEstimator) +# This test also seems to use the same filenames and should be fixed. +addLinkAndDiscoverTestSerial(FileTest) + +addLinkAndDiscoverTest(Simple8bTest) + +addLinkAndDiscoverTest(ContextFileParserTest parser) + +addLinkAndDiscoverTest(IndexMetaDataTest index) + +# Stxxl currently always uses a file ./-stxxl.disk for all indices, which +# makes it impossible to run the test cases for the Index class in parallel. +# TODO fix this +addLinkAndDiscoverTestSerial(IndexTest index) + +addLinkAndDiscoverTest(EngineTest engine) + +addLinkAndDiscoverTest(JoinTest engine) + +addLinkAndDiscoverTest(TextLimitOperationTest engine) + +addLinkAndDiscoverTest(QueryPlannerTest engine) + +addLinkAndDiscoverTest(HashMapTest) + +addLinkAndDiscoverTest(HashSetTest) + +addLinkAndDiscoverTestSerial(GroupByTest engine) + +addLinkAndDiscoverTest(VocabularyGeneratorTest index) + +addLinkAndDiscoverTest(HasPredicateScanTest engine) + +addLinkAndDiscoverTest(MmapVectorTest) + +# BufferedVectorTest also uses conflicting filenames. +addLinkAndDiscoverTestSerial(BufferedVectorTest) + +addLinkAndDiscoverTest(UnionTest engine) + +if (SINGLE_TEST_BINARY) + target_sources(QLeverAllUnitTestsMain PUBLIC TokenTest.cpp TokenTestCtreHelper.cpp) + qlever_target_link_libraries(QLeverAllUnitTestsMain parser re2 util) +else() + add_executable(TokenTest TokenTest.cpp TokenTestCtreHelper.cpp) + linkAndDiscoverTest(TokenTest parser re2 util) +endif() + +addLinkAndDiscoverTestSerial(TurtleParserTest parser re2) + +addLinkAndDiscoverTest(MultiColumnJoinTest engine) + +addLinkAndDiscoverTest(IdTableTest util) + +addLinkAndDiscoverTest(TransitivePathTest engine) + +addLinkAndDiscoverTest(PathSearchTest engine) + +addLinkAndDiscoverTest(BatchedPipelineTest) + +addLinkAndDiscoverTest(TupleHelpersTest) + +addLinkAndDiscoverTest(StringSortComparatorTest) + +addLinkAndDiscoverTest(PriorityQueueTest) + +addLinkAndDiscoverTest(SynchronizedTest) + +addLinkAndDiscoverTest(AllocatorWithLimitTest) + +addLinkAndDiscoverTest(MinusTest engine) + +# this test runs for quite some time and might have spurious failures! +# Therefore it is compiled, but not run. If you want to run it, +# change the following two lines. +addAndLinkTest(SortPerformanceEstimatorTest SortPerformanceEstimator) #addLinkAndDiscoverTest(SortPerformanceEstimatorTest SortPerformanceEstimator) - addLinkAndDiscoverTestSerial( - SparqlAntlrParserTest - parser - sparqlExpressions - engine) +addLinkAndDiscoverTestSerial(SparqlAntlrParserTest parser sparqlExpressions engine) -#The SerializerTest uses temporary files.The tests fail when multiple test -#cases are run in parallel.This should be fixed by using distinct filenames -#for each test case. -#TODO < qup42, joka921> fix this - addLinkAndDiscoverTestSerial( - SerializerTest) +# The SerializerTest uses temporary files. The tests fail when multiple test +# cases are run in parallel. This should be fixed by using distinct filenames +# for each test case. +# TODO fix this +addLinkAndDiscoverTestSerial(SerializerTest) - addLinkAndDiscoverTest( - ParametersTest) +addLinkAndDiscoverTest(ParametersTest) - addLinkAndDiscoverTest(ZstdCompressionTest zstd ${ - cmake_thread_libs_init}) +addLinkAndDiscoverTest(ZstdCompressionTest zstd ${cmake_thread_libs_init}) - addLinkAndDiscoverTest( - TaskQueueTest) +addLinkAndDiscoverTest(TaskQueueTest) - addLinkAndDiscoverTest( - SetOfIntervalsTest - sparqlExpressions) +addLinkAndDiscoverTest(SetOfIntervalsTest sparqlExpressions) - addLinkAndDiscoverTest( - TypeTraitsTest) +addLinkAndDiscoverTest(TypeTraitsTest) - addLinkAndDiscoverTestSerial( - SparqlExpressionTest - sparqlExpressions - index engine) +addLinkAndDiscoverTestSerial(SparqlExpressionTest sparqlExpressions index engine) - addLinkAndDiscoverTest( - StreamableBodyTest - http) +addLinkAndDiscoverTest(StreamableBodyTest http) - addLinkAndDiscoverTest( - StreamableGeneratorTest) +addLinkAndDiscoverTest(StreamableGeneratorTest) - addLinkAndDiscoverTest( - AcceptHeaderTest - mediaTypes - httpParser) +addLinkAndDiscoverTest(AcceptHeaderTest mediaTypes httpParser) - addLinkAndDiscoverTest( - RdfEscapingTest - parser) +addLinkAndDiscoverTest(RdfEscapingTest parser) - addLinkAndDiscoverTest( - CompactStringVectorTest) +addLinkAndDiscoverTest(CompactStringVectorTest) - addLinkAndDiscoverTest( - SparqlDataTypesTest - engine) +addLinkAndDiscoverTest(SparqlDataTypesTest engine) - addLinkAndDiscoverTest( - ContentEncodingHelperTest - http) +addLinkAndDiscoverTest(ContentEncodingHelperTest http) - addLinkAndDiscoverTest( - VocabularyInMemoryTest - vocabulary) +addLinkAndDiscoverTest(VocabularyInMemoryTest vocabulary) - addLinkAndDiscoverTest( - CompressedVocabularyTest - vocabulary) +addLinkAndDiscoverTest(CompressedVocabularyTest vocabulary) - addLinkAndDiscoverTest( - UnicodeVocabularyTest - vocabulary) +addLinkAndDiscoverTest(UnicodeVocabularyTest vocabulary) - addLinkAndDiscoverTest(CombinedVocabularyTest vocabulary) +addLinkAndDiscoverTest(CombinedVocabularyTest vocabulary) - addLinkAndDiscoverTest( - PrefixCompressorTest) +addLinkAndDiscoverTest(PrefixCompressorTest) - addLinkAndDiscoverTest( - MilestoneIdTest) +addLinkAndDiscoverTest(MilestoneIdTest) - addLinkAndDiscoverTest( - VocabularyOnDiskTest - index) +addLinkAndDiscoverTest(VocabularyOnDiskTest index) - addLinkAndDiscoverTest( - VocabularyTest - index) +addLinkAndDiscoverTest(VocabularyTest index) - addLinkAndDiscoverTest( - IteratorTest) +addLinkAndDiscoverTest(IteratorTest) -#Stxxl currently always uses a file./ -stxxl.disk for all indices, which -#makes it impossible to run the test cases for the Index class in parallel. -#TODO < qup42, joka921> fix this - addLinkAndDiscoverTestSerial(BackgroundStxxlSorterTest ${STXXL_LIBRARIES}) +# Stxxl currently always uses a file ./-stxxl.disk for all indices, which +# makes it impossible to run the test cases for the Index class in parallel. +# TODO fix this +addLinkAndDiscoverTestSerial(BackgroundStxxlSorterTest ${STXXL_LIBRARIES}) - addLinkAndDiscoverTest( - ViewsTest) +addLinkAndDiscoverTest(ViewsTest) - addLinkAndDiscoverTest( - ForwardTest) +addLinkAndDiscoverTest(ForwardTest) - addLinkAndDiscoverTest(CompressorStreamTest - engine) +addLinkAndDiscoverTest(CompressorStreamTest engine) - addLinkAndDiscoverTest( - AsyncStreamTest) +addLinkAndDiscoverTest(AsyncStreamTest) - addLinkAndDiscoverTest( - TriplesViewTest - util - OpenSSL::SSL OpenSSL::Crypto) +addLinkAndDiscoverTest(TriplesViewTest util OpenSSL::SSL OpenSSL::Crypto) - addLinkAndDiscoverTest( - BitUtilsTest) +addLinkAndDiscoverTest(BitUtilsTest) - addLinkAndDiscoverTest( - NBitIntegerTest) +addLinkAndDiscoverTest(NBitIntegerTest) - addLinkAndDiscoverTest( - GeoSparqlHelpersTest - util) +addLinkAndDiscoverTest(GeoSparqlHelpersTest util) - addLinkAndDiscoverTest( - HttpUtilsTest - util - http) +addLinkAndDiscoverTest(HttpUtilsTest util http) - addLinkAndDiscoverTest( - DateTest - util - parser) +addLinkAndDiscoverTest(DateTest util parser) - addLinkAndDiscoverTest( - TripleComponentTest - parser) +addLinkAndDiscoverTest(TripleComponentTest parser) - addLinkAndDiscoverTest( - ValueIdTest - util) +addLinkAndDiscoverTest(ValueIdTest util) - addLinkAndDiscoverTest( - LambdaHelpersTest) +addLinkAndDiscoverTest(LambdaHelpersTest) - addLinkAndDiscoverTest( - ParseExceptionTest - parser - engine) +addLinkAndDiscoverTest(ParseExceptionTest parser engine) - addLinkAndDiscoverTest( - TransparentFunctorsTest) +addLinkAndDiscoverTest(TransparentFunctorsTest) - addLinkAndDiscoverTest( - SelectClauseTest - parser - engine) +addLinkAndDiscoverTest(SelectClauseTest parser engine) - addLinkAndDiscoverTestSerial(RelationalExpressionTest parser sparqlExpressions index engine) +addLinkAndDiscoverTestSerial(RelationalExpressionTest parser sparqlExpressions index engine) - addLinkAndDiscoverTest( - CheckUsePatternTrickTest - parser - engine) +addLinkAndDiscoverTest(CheckUsePatternTrickTest parser engine) - addLinkAndDiscoverTestSerial( - RegexExpressionTest - parser - sparqlExpressions - index - engine - parser) +addLinkAndDiscoverTestSerial(RegexExpressionTest parser sparqlExpressions index engine parser) - addLinkAndDiscoverTestSerial( - LocalVocabTest - engine) +addLinkAndDiscoverTestSerial(LocalVocabTest engine) - addLinkAndDiscoverTestSerial( - ValuesTest - engine) +addLinkAndDiscoverTestSerial(ValuesTest engine) - addLinkAndDiscoverTestSerial( - ServiceTest - engine) +addLinkAndDiscoverTestSerial(ServiceTest engine) - addLinkAndDiscoverTest( - HttpTest - Boost:: - iostreams - http) +addLinkAndDiscoverTest(HttpTest Boost::iostreams http) - addLinkAndDiscoverTest( - CallFixedSizeTest) +addLinkAndDiscoverTest(CallFixedSizeTest) - addLinkAndDiscoverTest( - ConstexprUtilsTest) +addLinkAndDiscoverTest(ConstexprUtilsTest) - addLinkAndDiscoverTest( - ResetWhenMovedTest) +addLinkAndDiscoverTest(ResetWhenMovedTest) - addLinkAndDiscoverTest( - TimerTest) +addLinkAndDiscoverTest(TimerTest) - addLinkAndDiscoverTest( - AlgorithmTest) +addLinkAndDiscoverTest(AlgorithmTest) - addLinkAndDiscoverTestSerial( - CompressedRelationsTest index) +addLinkAndDiscoverTestSerial(CompressedRelationsTest index) - addLinkAndDiscoverTest(ExceptionTest) +addLinkAndDiscoverTest(ExceptionTest) - addLinkAndDiscoverTestSerial( - RandomExpressionTest - index) +addLinkAndDiscoverTestSerial(RandomExpressionTest index) - addLinkAndDiscoverTestSerial( - NowDatetimeExpressionTest - index) +addLinkAndDiscoverTestSerial(NowDatetimeExpressionTest index) - addLinkAndDiscoverTestSerial( - SortTest - engine) +addLinkAndDiscoverTestSerial(SortTest engine) - addLinkAndDiscoverTestSerial( - OrderByTest - engine) +addLinkAndDiscoverTestSerial(OrderByTest engine) - addLinkAndDiscoverTestSerial( - ValuesForTestingTest - index) +addLinkAndDiscoverTestSerial(ValuesForTestingTest index) - addLinkAndDiscoverTestSerial( - ExportQueryExecutionTreeTest - index - engine - parser) +addLinkAndDiscoverTestSerial(ExportQueryExecutionTreeTest index engine parser) - addLinkAndDiscoverTestSerial( - AggregateExpressionTest parser sparqlExpressions index engine) +addLinkAndDiscoverTestSerial(AggregateExpressionTest parser sparqlExpressions index engine) - addLinkAndDiscoverTest( - OnDestructionDontThrowDuringStackUnwindingTest) +addLinkAndDiscoverTest(OnDestructionDontThrowDuringStackUnwindingTest) - addLinkAndDiscoverTest( - ExceptionHandlingTest) +addLinkAndDiscoverTest(ExceptionHandlingTest) - addLinkAndDiscoverTest( - SparqlExpressionTypesTest - sparqlExpressions - util) +addLinkAndDiscoverTest(SparqlExpressionTypesTest sparqlExpressions util) - addLinkAndDiscoverTest( - LimitOffsetClauseTest) +addLinkAndDiscoverTest(LimitOffsetClauseTest) - addLinkAndDiscoverTest( - OperationTest - engine) +addLinkAndDiscoverTest(OperationTest engine) - addLinkAndDiscoverTest(RuntimeInformationTest engine index) +addLinkAndDiscoverTest(RuntimeInformationTest engine index) - addLinkAndDiscoverTest( - VariableToColumnMapTest parser) +addLinkAndDiscoverTest(VariableToColumnMapTest parser) - addLinkAndDiscoverTest(CopyableUniquePtrTest) +addLinkAndDiscoverTest(CopyableUniquePtrTest) - addLinkAndDiscoverTest( - JsonCustomConverterForThirdPartyTest) +addLinkAndDiscoverTest(JsonCustomConverterForThirdPartyTest) - addLinkAndDiscoverTest( - ConfigManagerTest - configManager) +addLinkAndDiscoverTest(ConfigManagerTest configManager) - addLinkAndDiscoverTest( - ConfigOptionTest - configManager) +addLinkAndDiscoverTest(ConfigOptionTest configManager) - addLinkAndDiscoverTest( - ValidatorTest - configManager) +addLinkAndDiscoverTest(ValidatorTest configManager) - addLinkAndDiscoverTest( - ConfigOptionProxyTest - configManager) +addLinkAndDiscoverTest(ConfigOptionProxyTest configManager) - addLinkAndDiscoverTest( - ConfigUtilTest - configManager) +addLinkAndDiscoverTest(ConfigUtilTest configManager) - addLinkAndDiscoverTest( - RandomTest) +addLinkAndDiscoverTest(RandomTest) - addLinkAndDiscoverTest( - BenchmarkMeasurementContainerTest - benchmark - testUtil) +addLinkAndDiscoverTest(BenchmarkMeasurementContainerTest benchmark testUtil) - addLinkAndDiscoverTest( - ResultTableColumnOperationsTest - benchmark - testUtil) +addLinkAndDiscoverTest(ResultTableColumnOperationsTest benchmark testUtil) - addLinkAndDiscoverTest( - FindUndefRangesTest - engine) +addLinkAndDiscoverTest(FindUndefRangesTest engine) - addLinkAndDiscoverTest( - AddCombinedRowToTableTest - engine - util) +addLinkAndDiscoverTest(AddCombinedRowToTableTest engine util) - addLinkAndDiscoverTest( - CtreHelpersTest) +addLinkAndDiscoverTest(CtreHelpersTest) - addLinkAndDiscoverTest( - ComparisonWithNanTest) +addLinkAndDiscoverTest(ComparisonWithNanTest) - addLinkAndDiscoverTest( - ThreadSafeQueueTest) +addLinkAndDiscoverTest(ThreadSafeQueueTest) - addLinkAndDiscoverTest( - IdTableHelpersTest) +addLinkAndDiscoverTest(IdTableHelpersTest) - addLinkAndDiscoverTest( - GeneratorTest) +addLinkAndDiscoverTest(GeneratorTest) - addLinkAndDiscoverTest( - MemorySizeTest - memorySize) +addLinkAndDiscoverTest(MemorySizeTest memorySize) - addLinkAndDiscoverTest( - JsonUtilTest) +addLinkAndDiscoverTest(JsonUtilTest) - addLinkAndDiscoverTest( - JoinAlgorithmsTest) +addLinkAndDiscoverTest(JoinAlgorithmsTest) - addLinkAndDiscoverTest( - AsioHelpersTest) +addLinkAndDiscoverTest(AsioHelpersTest) - addLinkAndDiscoverTest( - UniqueCleanupTest) +addLinkAndDiscoverTest(UniqueCleanupTest) - addLinkAndDiscoverTest( - WebSocketSessionTest - http) +addLinkAndDiscoverTest(WebSocketSessionTest http) - addLinkAndDiscoverTest( - QueryIdTest) +addLinkAndDiscoverTest(QueryIdTest) - addLinkAndDiscoverTest( - QueryHubTest - http) +addLinkAndDiscoverTest(QueryHubTest http) - addLinkAndDiscoverTest( - QueryToSocketDistributorTest - http) +addLinkAndDiscoverTest(QueryToSocketDistributorTest http) - addLinkAndDiscoverTest( - UpdateFetcherTest - http) +addLinkAndDiscoverTest(UpdateFetcherTest http) - addLinkAndDiscoverTest( - MessageSenderTest - http) +addLinkAndDiscoverTest(MessageSenderTest http) - addLinkAndDiscoverTest( - CancellationHandleTest - util) +addLinkAndDiscoverTest(CancellationHandleTest util) - addLinkAndDiscoverTest(ProgressBarTest - util) +addLinkAndDiscoverTest(ProgressBarTest util) - addLinkAndDiscoverTest( - CachingMemoryResourceTest) +addLinkAndDiscoverTest(CachingMemoryResourceTest) - addLinkAndDiscoverTest( - ParallelMultiwayMergeTest) +addLinkAndDiscoverTest(ParallelMultiwayMergeTest) - addLinkAndDiscoverTest( - ParseableDurationTest) +addLinkAndDiscoverTest(ParseableDurationTest) - addLinkAndDiscoverTest( - ConstantsTest) +addLinkAndDiscoverTest(ConstantsTest) - addLinkAndDiscoverTest( - JThreadTest) +addLinkAndDiscoverTest(JThreadTest) - addLinkAndDiscoverTest( - ChunkedForLoopTest) +addLinkAndDiscoverTest(ChunkedForLoopTest) - addLinkAndDiscoverTest( - FsstCompressorTest - fsst) +addLinkAndDiscoverTest(FsstCompressorTest fsst) diff --git a/test/ConfigManagerTest.cpp b/test/ConfigManagerTest.cpp index 6e03727ac7..0d90283201 100644 --- a/test/ConfigManagerTest.cpp +++ b/test/ConfigManagerTest.cpp @@ -1205,11 +1205,10 @@ void doValidatorTest( here. */ auto addValidatorToConfigManager = - [&adjustVariantArgument, &addValidatorFunction]( - size_t variant, ConfigManager& m, + [&adjustVariantArgument, &addValidatorFunction ]( + size_t variant, ConfigManager & m, ConstConfigOptionProxy... validatorArguments) - requires(sizeof...(Ts) == sizeof...(validatorArguments)) - { + requires(sizeof...(Ts) == sizeof...(validatorArguments)) { // Add the new validator addValidatorFunction( adjustVariantArgument.template operator()(variant), @@ -1236,12 +1235,11 @@ void doValidatorTest( */ auto testGeneratedValidatorsOfConfigManager = [&adjustVariantArgument]( - size_t variantStart, size_t variantEnd, ConfigManager& m, + size_t variantStart, size_t variantEnd, ConfigManager & m, const nlohmann::json& defaultValues, const std::same_as< nlohmann::json::json_pointer> auto&... configOptionPaths) - requires(sizeof...(Ts) == sizeof...(configOptionPaths)) - { + requires(sizeof...(Ts) == sizeof...(configOptionPaths)) { // Using the invariant of our function generator, to create valid // and none valid values for all added validators. for (size_t validatorNumber = variantStart; validatorNumber < variantEnd; @@ -1294,25 +1292,25 @@ void doValidatorTest( here. */ auto doTestNoValidatorInSubManager = - [&addValidatorToConfigManager, - &testGeneratedValidatorsOfConfigManager]( - ConfigManager& m, const nlohmann::json& defaultValues, + [&addValidatorToConfigManager, & + testGeneratedValidatorsOfConfigManager ]( + ConfigManager & m, const nlohmann::json& defaultValues, const std::pair>&... validatorArguments) requires(sizeof...(Ts) == sizeof...(validatorArguments)) { - // How many validators are to be added? - constexpr size_t NUMBER_OF_VALIDATORS{5}; + // How many validators are to be added? + constexpr size_t NUMBER_OF_VALIDATORS{5}; - for (size_t i = 0; i < NUMBER_OF_VALIDATORS; i++) { - // Add a new validator - addValidatorToConfigManager.template operator()( - i, m, validatorArguments.second...); + for (size_t i = 0; i < NUMBER_OF_VALIDATORS; i++) { + // Add a new validator + addValidatorToConfigManager.template operator()( + i, m, validatorArguments.second...); - // Test all the added validators. - testGeneratedValidatorsOfConfigManager.template operator()( - 0, i + 1, m, defaultValues, validatorArguments.first...); - } - }; + // Test all the added validators. + testGeneratedValidatorsOfConfigManager.template operator()( + 0, i + 1, m, defaultValues, validatorArguments.first...); + } + }; /* @brief Do the tests for config manager with one sub manager. The sub manager @@ -1337,14 +1335,13 @@ void doValidatorTest( here. */ auto doTestAlwaysValidatorInSubManager = - [&addValidatorToConfigManager, - &testGeneratedValidatorsOfConfigManager]( - ConfigManager& m, ConfigManager& subM, + [&addValidatorToConfigManager, & + testGeneratedValidatorsOfConfigManager ]( + ConfigManager & m, ConfigManager & subM, const nlohmann::json& defaultValues, const std::pair>&... validatorArguments) - requires(sizeof...(Ts) == sizeof...(validatorArguments)) - { + requires(sizeof...(Ts) == sizeof...(validatorArguments)) { // How many validators are to be added to each of the managers? constexpr size_t NUMBER_OF_VALIDATORS{5}; diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index 22fa09f9a1..0a8e666846 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -170,8 +170,7 @@ inline auto CountAvailablePredicates = [](size_t subjectColumnIdx, const Variable& predicateVar, const Variable& countVar, const std::same_as auto&... childMatchers) - requires(sizeof...(childMatchers) <= 1) -{ + requires(sizeof...(childMatchers) <= 1) { return RootOperation<::CountAvailablePredicates>(AllOf( AD_PROPERTY(::CountAvailablePredicates, subjectColumnIndex, Eq(subjectColumnIdx)), From ec7bfd07ef4656b869ca3ab8fed02f91e3661091 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 24 Jun 2024 13:11:50 +0200 Subject: [PATCH 21/96] Added PathSearch e2e tests --- e2e/scientists_queries.yaml | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/e2e/scientists_queries.yaml b/e2e/scientists_queries.yaml index a945eeb574..661a9ea9fe 100644 --- a/e2e/scientists_queries.yaml +++ b/e2e/scientists_queries.yaml @@ -1017,6 +1017,58 @@ queries: - contains_row: [""] - contains_row: ["1.87"] + - query: path_search_all_paths + type: no-text + sparql: | + PREFIX pathSearch: + SELECT * WHERE { + SERVICE pathSearch: { + pathSearch: pathSearch:algorithm pathSearch:allPaths; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path; + pathSearch:edgeColumn ?edge; + pathSearch:start ?start; + pathSearch:end ?end; + {SELECT * WHERE { + * ?start. + ?start ?end + }} + } + } + checks: + - num_rows: 10 + - num_cols: 4 + - selected: ["?path", "?edge", "?start", "?end"] + + - query: path_search_shortest_paths + type: no-text + sparql: | + PREFIX pathSearch: + SELECT * WHERE { + SERVICE pathSearch: { + pathSearch: pathSearch:algorithm pathSearch:shortestPaths; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path; + pathSearch:edgeColumn ?edge; + pathSearch:start ?start; + pathSearch:end ?end; + {SELECT * WHERE { + * ?start. + ?start ?end + }} + } + } + checks: + - num_rows: 4 + - num_cols: 4 + - selected: ["?path", "?edge", "?start", "?end"] + - contains_row: [0, 0, "", ""] + - contains_row: [0, 1, "", ""] + - contains_row: [1, 0, "", ""] + - contains_row: [1, 1, "", ""] + - query : property_path_inverse type: no-text From 47daa2b582e1b3ea40baf61d3e865df4c0f2a113 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 25 Jun 2024 22:01:55 +0200 Subject: [PATCH 22/96] Reworked AllPathsVisitor --- e2e/scientists_queries.yaml | 58 +++++++++++++++--------------- src/engine/PathSearch.cpp | 49 ++++++++++++++++++++++--- src/engine/PathSearch.h | 2 ++ src/engine/PathSearchVisitors.h | 54 +++++++++++----------------- test/PathSearchTest.cpp | 63 +++++++++++++++++++++++++++++++-- 5 files changed, 156 insertions(+), 70 deletions(-) diff --git a/e2e/scientists_queries.yaml b/e2e/scientists_queries.yaml index 661a9ea9fe..9253104ef1 100644 --- a/e2e/scientists_queries.yaml +++ b/e2e/scientists_queries.yaml @@ -1025,49 +1025,47 @@ queries: SERVICE pathSearch: { pathSearch: pathSearch:algorithm pathSearch:allPaths; pathSearch:source ; - pathSearch:target ; + pathSearch:target ; pathSearch:pathColumn ?path; pathSearch:edgeColumn ?edge; pathSearch:start ?start; pathSearch:end ?end; {SELECT * WHERE { - * ?start. ?start ?end }} } } checks: - - num_rows: 10 + - num_rows: 17 - num_cols: 4 - selected: ["?path", "?edge", "?start", "?end"] - - query: path_search_shortest_paths - type: no-text - sparql: | - PREFIX pathSearch: - SELECT * WHERE { - SERVICE pathSearch: { - pathSearch: pathSearch:algorithm pathSearch:shortestPaths; - pathSearch:source ; - pathSearch:target ; - pathSearch:pathColumn ?path; - pathSearch:edgeColumn ?edge; - pathSearch:start ?start; - pathSearch:end ?end; - {SELECT * WHERE { - * ?start. - ?start ?end - }} - } - } - checks: - - num_rows: 4 - - num_cols: 4 - - selected: ["?path", "?edge", "?start", "?end"] - - contains_row: [0, 0, "", ""] - - contains_row: [0, 1, "", ""] - - contains_row: [1, 0, "", ""] - - contains_row: [1, 1, "", ""] + # - query: path_search_shortest_paths + # type: no-text + # sparql: | + # PREFIX pathSearch: + # SELECT * WHERE { + # SERVICE pathSearch: { + # pathSearch: pathSearch:algorithm pathSearch:shortestPaths; + # pathSearch:source ; + # pathSearch:target ; + # pathSearch:pathColumn ?path; + # pathSearch:edgeColumn ?edge; + # pathSearch:start ?start; + # pathSearch:end ?end; + # {SELECT * WHERE { + # ?start ?end + # }} + # } + # } + # checks: + # - num_rows: 4 + # - num_cols: 4 + # - selected: ["?path", "?edge", "?start", "?end"] + # - contains_row: [0, 0, "", ""] + # - contains_row: [0, 1, "", ""] + # - contains_row: [1, 0, "", ""] + # - contains_row: [1, 1, "", ""] - query : property_path_inverse diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 86862a8003..1fbad45d3d 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -10,6 +10,7 @@ #include #include "engine/CallFixedSize.h" +#include "engine/PathSearchVisitors.h" #include "engine/VariableToColumnMap.h" #include "parser/GraphPatternOperation.h" #include "util/Exception.h" @@ -182,14 +183,31 @@ std::vector PathSearch::allPaths() const { auto startIndex = idToIndex_.at(config_.source_); - std::unordered_set targets; + std::vector targets; for (auto target : config_.targets_) { - targets.insert(target.getBits()); + targets.push_back(target.getBits()); + } + + if (targets.empty()) { + for (auto id: indexToId_) { + targets.push_back(id.getBits()); + } } - AllPathsVisitor vis(targets, path, paths, indexToId_); - boost::depth_first_search(graph_, - boost::visitor(vis).root_vertex(startIndex)); + PredecessorMap predecessors; + + AllPathsVisitor vis(startIndex, predecessors); + try { + boost::depth_first_search(graph_, + boost::visitor(vis).root_vertex(startIndex)); + } catch (const StopSearchException&e) {} + + for (auto target: targets) { + auto pathsToTarget = reconstructPaths(target, predecessors); + for (auto path: pathsToTarget) { + paths.push_back(std::move(path)); + } + } return paths; } @@ -222,6 +240,27 @@ std::vector PathSearch::shortestPaths() const { return paths; } +// _____________________________________________________________________________ +std::vector PathSearch::reconstructPaths(uint64_t target, PredecessorMap predecessors) const { + auto edges = predecessors[target]; + std::vector paths; + + for (auto edge: edges) { + std::vector subPaths; + if (edge.start_ == config_.source_.getBits()) { + subPaths = {Path()}; + } else { + subPaths = reconstructPaths(edge.start_, predecessors); + } + + for (auto path: subPaths) { + path.push_back(edge); + paths.push_back(path); + } + } + return paths; +} + // _____________________________________________________________________________ template void PathSearch::pathsToResultTable(IdTable& tableDyn, diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index ccb652fbbf..42711ae5d0 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -142,6 +142,8 @@ class PathSearch : public Operation { */ std::vector shortestPaths() const; + std::vector reconstructPaths(uint64_t target, PredecessorMap predecessors) const; + /** * @brief Converts paths to a result table with a specified width. * @tparam WIDTH The width of the result table. diff --git a/src/engine/PathSearchVisitors.h b/src/engine/PathSearchVisitors.h index d1557e2393..ffc256290c 100644 --- a/src/engine/PathSearchVisitors.h +++ b/src/engine/PathSearchVisitors.h @@ -10,6 +10,7 @@ #include #include #include +#include #include /** @@ -104,21 +105,21 @@ typedef boost::adjacency_list::vertex_descriptor VertexDescriptor; typedef boost::graph_traits::edge_descriptor EdgeDescriptor; +using PredecessorMap = std::unordered_map>; + +struct StopSearchException : public std::exception { + const char* what() const noexcept override { + return "Stop DFS"; + } +}; + /** * @brief Visitor for performing a depth-first search to find all paths. */ class AllPathsVisitor : public boost::default_dfs_visitor { - // Set of target node IDs. - std::unordered_set targets_; - - // Reference to the current path being explored. - Path& currentPath_; - - // Reference to the collection of all found paths. - std::vector& allPaths_; + VertexDescriptor start_; - // Mapping from indices to IDs. - const std::vector& indexToId_; + PredecessorMap& predecessors_; public: /** @@ -128,12 +129,9 @@ class AllPathsVisitor : public boost::default_dfs_visitor { * @param paths Reference to the collection of all found paths. * @param indexToId Mapping from indices to IDs. */ - AllPathsVisitor(std::unordered_set targets, Path& path, - std::vector& paths, const std::vector& indexToId) - : targets_(std::move(targets)), - currentPath_(path), - allPaths_(paths), - indexToId_(indexToId) {} + AllPathsVisitor(VertexDescriptor start, PredecessorMap& predecessors) + : start_(start), + predecessors_(predecessors) {} /** * @brief Examines an edge during the depth-first search. @@ -142,22 +140,13 @@ class AllPathsVisitor : public boost::default_dfs_visitor { */ void examine_edge(EdgeDescriptor edgeDesc, const Graph& graph) { const Edge& edge = graph[edgeDesc]; - if (targets_.empty() || (currentPath_.ends_with(edge.start_) && - targets_.find(edge.end_) != targets_.end())) { - auto pathCopy = currentPath_; - pathCopy.push_back(edge); - allPaths_.push_back(pathCopy); + + if (!predecessors_.contains(edge.end_)) { + predecessors_[edge.end_] = {}; } - } - /** - * @brief Processes a tree edge during the depth-first search. - * @param edgeDesc The descriptor of the edge being processed. - * @param graph The graph being searched. - */ - void tree_edge(EdgeDescriptor edgeDesc, const Graph& graph) { - const Edge& edge = graph[edgeDesc]; - currentPath_.edges_.push_back(edge); + auto& predEdges = predecessors_[edge.end_]; + predEdges.push_back(edge); } /** @@ -168,9 +157,8 @@ class AllPathsVisitor : public boost::default_dfs_visitor { */ void finish_vertex(VertexDescriptor vertex, const Graph& graph) { (void)graph; - if (!currentPath_.empty() && - Id::fromBits(currentPath_.lastNode().value()) == indexToId_[vertex]) { - currentPath_.edges_.pop_back(); + if (vertex == start_) { + throw StopSearchException(); } } }; diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 7dc9747842..1b9e794659 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -281,8 +281,8 @@ TEST(PathSearchTest, allPaths) { {V(0), V(1), I(1), I(0)}, {V(1), V(3), I(1), I(1)}, {V(0), V(2), I(2), I(0)}, + {V(2), V(3), I(2), I(1)}, {V(0), V(2), I(3), I(0)}, - {V(2), V(3), I(3), I(1)}, {V(0), V(2), I(4), I(0)}, {V(2), V(4), I(4), I(1)}, }); @@ -309,8 +309,8 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { {V(0), V(1), I(1), I(0), V(11), V(10)}, {V(1), V(3), I(1), I(1), V(21), V(20)}, {V(0), V(2), I(2), I(0), V(31), V(30)}, + {V(2), V(3), I(2), I(1), V(41), V(40)}, {V(0), V(2), I(3), I(0), V(31), V(30)}, - {V(2), V(3), I(3), I(1), V(41), V(40)}, {V(0), V(2), I(4), I(0), V(31), V(30)}, {V(2), V(4), I(4), I(1), V(51), V(50)}, }); @@ -439,3 +439,62 @@ TEST(PathSearchTest, shortestPathWithIrrelevantNode) { ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } + + +/** + * Graph: + * 0 + * | + * 1 + * / \ + * 2 3 + * \ / + * 4 + * | + * 5 + */ +TEST(PathSearchTest, allPathsElongatedDiamond) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 4}, {3, 4}, {4, 5}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(4), I(0), I(2)}, + {V(4), V(5), I(0), I(3)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(3), I(1), I(1)}, + {V(3), V(4), I(1), I(2)}, + {V(4), V(5), I(1), I(3)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{ + ALL_PATHS, V(0), {V(5)}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, shortestPathsElongatedDiamond) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 4}, {3, 4}, {4, 5}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(4), I(0), I(2)}, + {V(4), V(5), I(0), I(3)} + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{ + SHORTEST_PATHS, V(0), {V(5)}, + Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + + From 765f1aa80f9051249cd0608f4f4b43d553e490a7 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 25 Jun 2024 22:03:03 +0200 Subject: [PATCH 23/96] Format fix --- src/engine/PathSearch.cpp | 18 ++++++++++-------- src/engine/PathSearch.h | 3 ++- src/engine/PathSearchVisitors.h | 7 ++----- test/PathSearchTest.cpp | 21 +++++++++------------ 4 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 1fbad45d3d..414b227a2b 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -189,7 +189,7 @@ std::vector PathSearch::allPaths() const { } if (targets.empty()) { - for (auto id: indexToId_) { + for (auto id : indexToId_) { targets.push_back(id.getBits()); } } @@ -200,11 +200,12 @@ std::vector PathSearch::allPaths() const { try { boost::depth_first_search(graph_, boost::visitor(vis).root_vertex(startIndex)); - } catch (const StopSearchException&e) {} + } catch (const StopSearchException& e) { + } - for (auto target: targets) { + for (auto target : targets) { auto pathsToTarget = reconstructPaths(target, predecessors); - for (auto path: pathsToTarget) { + for (auto path : pathsToTarget) { paths.push_back(std::move(path)); } } @@ -241,19 +242,20 @@ std::vector PathSearch::shortestPaths() const { } // _____________________________________________________________________________ -std::vector PathSearch::reconstructPaths(uint64_t target, PredecessorMap predecessors) const { +std::vector PathSearch::reconstructPaths( + uint64_t target, PredecessorMap predecessors) const { auto edges = predecessors[target]; std::vector paths; - for (auto edge: edges) { + for (auto edge : edges) { std::vector subPaths; if (edge.start_ == config_.source_.getBits()) { subPaths = {Path()}; } else { subPaths = reconstructPaths(edge.start_, predecessors); } - - for (auto path: subPaths) { + + for (auto path : subPaths) { path.push_back(edge); paths.push_back(path); } diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 42711ae5d0..bec96d00f1 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -142,7 +142,8 @@ class PathSearch : public Operation { */ std::vector shortestPaths() const; - std::vector reconstructPaths(uint64_t target, PredecessorMap predecessors) const; + std::vector reconstructPaths(uint64_t target, + PredecessorMap predecessors) const; /** * @brief Converts paths to a result table with a specified width. diff --git a/src/engine/PathSearchVisitors.h b/src/engine/PathSearchVisitors.h index ffc256290c..cc474c1ce2 100644 --- a/src/engine/PathSearchVisitors.h +++ b/src/engine/PathSearchVisitors.h @@ -108,9 +108,7 @@ typedef boost::graph_traits::edge_descriptor EdgeDescriptor; using PredecessorMap = std::unordered_map>; struct StopSearchException : public std::exception { - const char* what() const noexcept override { - return "Stop DFS"; - } + const char* what() const noexcept override { return "Stop DFS"; } }; /** @@ -130,8 +128,7 @@ class AllPathsVisitor : public boost::default_dfs_visitor { * @param indexToId Mapping from indices to IDs. */ AllPathsVisitor(VertexDescriptor start, PredecessorMap& predecessors) - : start_(start), - predecessors_(predecessors) {} + : start_(start), predecessors_(predecessors) {} /** * @brief Examines an edge during the depth-first search. diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 1b9e794659..b8ff9b67cc 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -440,7 +440,6 @@ TEST(PathSearchTest, shortestPathWithIrrelevantNode) { ::testing::UnorderedElementsAreArray(expected)); } - /** * Graph: * 0 @@ -454,7 +453,8 @@ TEST(PathSearchTest, shortestPathWithIrrelevantNode) { * 5 */ TEST(PathSearchTest, allPathsElongatedDiamond) { - auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 4}, {3, 4}, {4, 5}}); + auto sub = + makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 4}, {3, 4}, {4, 5}}); auto expected = makeIdTableFromVector({ {V(0), V(1), I(0), I(0)}, {V(1), V(2), I(0), I(1)}, @@ -478,17 +478,16 @@ TEST(PathSearchTest, allPathsElongatedDiamond) { } TEST(PathSearchTest, shortestPathsElongatedDiamond) { - auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 4}, {3, 4}, {4, 5}}); - auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0)}, - {V(1), V(2), I(0), I(1)}, - {V(2), V(4), I(0), I(2)}, - {V(4), V(5), I(0), I(3)} - }); + auto sub = + makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 4}, {3, 4}, {4, 5}}); + auto expected = makeIdTableFromVector({{V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(4), I(0), I(2)}, + {V(4), V(5), I(0), I(3)}}); Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, V(0), {V(5)}, + SHORTEST_PATHS, V(0), {V(5)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -496,5 +495,3 @@ TEST(PathSearchTest, shortestPathsElongatedDiamond) { ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } - - From da1eb3abb71d6cda93d72febe429183821de473b Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 26 Jun 2024 09:33:10 +0200 Subject: [PATCH 24/96] Sonar Fixes --- src/engine/PathSearch.cpp | 17 +++++++---------- src/engine/PathSearchVisitors.h | 15 +++++++-------- src/engine/QueryPlanner.cpp | 6 +++--- src/parser/GraphPatternOperation.cpp | 2 +- src/parser/GraphPatternOperation.h | 2 +- src/parser/sparqlParser/SparqlQleverVisitor.cpp | 4 ++-- 6 files changed, 21 insertions(+), 25 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 414b227a2b..178eba631f 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -12,7 +12,6 @@ #include "engine/CallFixedSize.h" #include "engine/PathSearchVisitors.h" #include "engine/VariableToColumnMap.h" -#include "parser/GraphPatternOperation.h" #include "util/Exception.h" // _____________________________________________________________________________ @@ -21,9 +20,7 @@ PathSearch::PathSearch(QueryExecutionContext* qec, PathSearchConfiguration config) : Operation(qec), subtree_(std::move(subtree)), - graph_(), config_(std::move(config)), - indexToId_(), idToIndex_(allocator()) { AD_CORRECTNESS_CHECK(qec != nullptr); resultWidth_ = 4 + config_.edgeProperties_.size(); @@ -98,9 +95,9 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { const IdTable& dynSub = subRes->idTable(); - if (dynSub.size() > 0) { + if (!dynSub.empty()) { std::vector> edgePropertyLists; - for (auto edgeProperty : config_.edgeProperties_) { + for (const auto& edgeProperty : config_.edgeProperties_) { auto edgePropertyIndex = subtree_->getVariableColumn(edgeProperty); edgePropertyLists.push_back(dynSub.getColumn(edgePropertyIndex)); } @@ -128,7 +125,7 @@ VariableToColumnMap PathSearch::computeVariableToColumnMap() const { void PathSearch::buildMapping(std::span startNodes, std::span endNodes) { auto addNode = [this](const Id node) { - if (idToIndex_.find(node) == idToIndex_.end()) { + if (!idToIndex_.contains(node)) { idToIndex_[node] = indexToId_.size(); indexToId_.push_back(node); } @@ -229,12 +226,12 @@ std::vector PathSearch::shortestPaths() const { DijkstraAllPathsVisitor vis(startIndex, targets, path, paths, predecessors, distances); - auto weight_map = get(&Edge::weight_, graph_); + auto weightMap = get(&Edge::weight_, graph_); boost::dijkstra_shortest_paths( graph_, startIndex, boost::visitor(vis) - .weight_map(weight_map) + .weight_map(weightMap) .predecessor_map(predecessors.data()) .distance_map(distances.data()) .distance_compare(std::less_equal())); @@ -244,10 +241,10 @@ std::vector PathSearch::shortestPaths() const { // _____________________________________________________________________________ std::vector PathSearch::reconstructPaths( uint64_t target, PredecessorMap predecessors) const { - auto edges = predecessors[target]; + const auto& edges = predecessors[target]; std::vector paths; - for (auto edge : edges) { + for (const auto& edge : edges) { std::vector subPaths; if (edge.start_ == config_.source_.getBits()) { subPaths = {Path()}; diff --git a/src/engine/PathSearchVisitors.h b/src/engine/PathSearchVisitors.h index cc474c1ce2..d5558a504f 100644 --- a/src/engine/PathSearchVisitors.h +++ b/src/engine/PathSearchVisitors.h @@ -61,12 +61,12 @@ struct Path { * @brief Adds an edge to the end of the path. * @param edge The edge to add. */ - void push_back(Edge edge) { edges_.push_back(edge); } + void push_back(const Edge& edge) { edges_.push_back(edge); } /** * @brief Reverses the order of the edges in the path. */ - void reverse() { std::reverse(edges_.begin(), edges_.end()); } + void reverse() { std::ranges::reverse(edges_); } /** * @brief Returns the ID of the first node in the path, if it exists. @@ -99,11 +99,10 @@ struct Path { /** * @brief Boost graph types and descriptors. */ -typedef boost::adjacency_list - Graph; -typedef boost::graph_traits::vertex_descriptor VertexDescriptor; -typedef boost::graph_traits::edge_descriptor EdgeDescriptor; +using Graph = boost::adjacency_list; +using VertexDescriptor = boost::graph_traits::vertex_descriptor; +using EdgeDescriptor = boost::graph_traits::edge_descriptor; using PredecessorMap = std::unordered_map>; @@ -226,7 +225,7 @@ class DijkstraAllPathsVisitor : public boost::default_dijkstra_visitor { */ void edge_relaxed(EdgeDescriptor edgeDesc, const Graph& graph) { const Edge& edge = graph[edgeDesc]; - if (targets_.empty() || targets_.find(edge.end_) != targets_.end()) { + if (targets_.empty() || targets_.contains(edge.end_)) { rebuild_path(target(edgeDesc, graph), graph); } } diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 54089a5d51..b9aaf9e1c6 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -2114,7 +2114,7 @@ void QueryPlanner::GraphPatternPlanner::visitPathSearch( parsedQuery::PathQuery& pathQuery) { auto candidatesIn = planner_.optimize(&pathQuery.childGraphPattern_); std::vector candidatesOut; - auto tripleComponentToId = [this](TripleComponent& comp) -> Id { + auto tripleComponentToId = [this](const TripleComponent& comp) { auto opt = comp.toValueId(planner_._qec->getIndex().getVocab()); if (opt.has_value()) { return opt.value(); @@ -2138,8 +2138,8 @@ void QueryPlanner::GraphPatternPlanner::visitPathSearch( std::move(pathQuery.edgeProperties_)}; for (auto& sub : candidatesIn) { - auto pathSearch = std::make_shared( - PathSearch(qec_, std::move(sub._qet), config)); + auto pathSearch = + std::make_shared(qec_, std::move(sub._qet), config); auto plan = makeSubtreePlan(std::move(pathSearch)); candidatesOut.push_back(std::move(plan)); } diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index b51b3cec51..ee4ee061e2 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -68,7 +68,7 @@ void BasicGraphPattern::appendTriples(BasicGraphPattern other) { } // ____________________________________________________________________________ -void PathQuery::addParameter(SparqlTriple& triple) { +void PathQuery::addParameter(const SparqlTriple& triple) { auto simpleTriple = triple.getSimple(); TripleComponent predicate = simpleTriple.p_; TripleComponent object = simpleTriple.o_; diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index f49e4fe8ae..33348769e2 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -149,7 +149,7 @@ struct PathQuery { GraphPattern childGraphPattern_; - void addParameter(SparqlTriple& triple); + void addParameter(const SparqlTriple& triple); void fromBasicPattern(const BasicGraphPattern& pattern); }; diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index da5320a7a3..5512cc3a86 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -740,12 +740,12 @@ GraphPatternOperation Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { parsedQuery::GraphPattern graphPattern = visit(ctx->groupGraphPattern()); parsedQuery::PathQuery pathQuery; - for (auto op : graphPattern._graphPatterns) { + for (const auto& op : graphPattern._graphPatterns) { parsePathQuery(pathQuery, op); } return pathQuery; - }; + } // Parse the body of the SERVICE query. Add the visible variables from the // SERVICE clause to the visible variables so far, but also remember them From 0027d7b41c24ca739ac5788d15394faded8addc2 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Sun, 30 Jun 2024 12:19:25 +0200 Subject: [PATCH 25/96] Added multisource to PathSearch --- src/engine/PathSearch.cpp | 90 +++++++++++++++------------- src/engine/PathSearch.h | 6 +- src/engine/QueryPlanner.cpp | 7 ++- src/parser/GraphPatternOperation.cpp | 2 +- src/parser/GraphPatternOperation.h | 2 +- test/PathSearchTest.cpp | 36 +++++------ test/QueryPlannerTest.cpp | 8 +-- test/QueryPlannerTestHelpers.h | 4 +- 8 files changed, 80 insertions(+), 75 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 178eba631f..481d1d00dd 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -178,32 +178,34 @@ std::vector PathSearch::allPaths() const { std::vector paths; Path path; - auto startIndex = idToIndex_.at(config_.source_); + for (auto source: config_.sources_) { + auto startIndex = idToIndex_.at(source); - std::vector targets; - for (auto target : config_.targets_) { - targets.push_back(target.getBits()); - } + std::vector targets; + for (auto target : config_.targets_) { + targets.push_back(target.getBits()); + } - if (targets.empty()) { - for (auto id : indexToId_) { - targets.push_back(id.getBits()); + if (targets.empty()) { + for (auto id : indexToId_) { + targets.push_back(id.getBits()); + } } - } - PredecessorMap predecessors; + PredecessorMap predecessors; - AllPathsVisitor vis(startIndex, predecessors); - try { - boost::depth_first_search(graph_, - boost::visitor(vis).root_vertex(startIndex)); - } catch (const StopSearchException& e) { - } + AllPathsVisitor vis(startIndex, predecessors); + try { + boost::depth_first_search(graph_, + boost::visitor(vis).root_vertex(startIndex)); + } catch (const StopSearchException& e) { + } - for (auto target : targets) { - auto pathsToTarget = reconstructPaths(target, predecessors); - for (auto path : pathsToTarget) { - paths.push_back(std::move(path)); + for (auto target : targets) { + auto pathsToTarget = reconstructPaths(source.getBits(), target, predecessors); + for (auto path : pathsToTarget) { + paths.push_back(std::move(path)); + } } } return paths; @@ -213,43 +215,45 @@ std::vector PathSearch::allPaths() const { std::vector PathSearch::shortestPaths() const { std::vector paths; Path path; - auto startIndex = idToIndex_.at(config_.source_); + for (auto source: config_.sources_) { + auto startIndex = idToIndex_.at(source); - std::unordered_set targets; - for (auto target : config_.targets_) { - targets.insert(target.getBits()); + std::unordered_set targets; + for (auto target : config_.targets_) { + targets.insert(target.getBits()); + } + std::vector predecessors(indexToId_.size()); + std::vector distances(indexToId_.size(), + std::numeric_limits::max()); + + DijkstraAllPathsVisitor vis(startIndex, targets, path, paths, predecessors, + distances); + + auto weightMap = get(&Edge::weight_, graph_); + + boost::dijkstra_shortest_paths( + graph_, startIndex, + boost::visitor(vis) + .weight_map(weightMap) + .predecessor_map(predecessors.data()) + .distance_map(distances.data()) + .distance_compare(std::less_equal())); } - std::vector predecessors(indexToId_.size()); - std::vector distances(indexToId_.size(), - std::numeric_limits::max()); - - DijkstraAllPathsVisitor vis(startIndex, targets, path, paths, predecessors, - distances); - - auto weightMap = get(&Edge::weight_, graph_); - - boost::dijkstra_shortest_paths( - graph_, startIndex, - boost::visitor(vis) - .weight_map(weightMap) - .predecessor_map(predecessors.data()) - .distance_map(distances.data()) - .distance_compare(std::less_equal())); return paths; } // _____________________________________________________________________________ -std::vector PathSearch::reconstructPaths( +std::vector PathSearch::reconstructPaths(uint64_t source, uint64_t target, PredecessorMap predecessors) const { const auto& edges = predecessors[target]; std::vector paths; for (const auto& edge : edges) { std::vector subPaths; - if (edge.start_ == config_.source_.getBits()) { + if (edge.start_ == source) { subPaths = {Path()}; } else { - subPaths = reconstructPaths(edge.start_, predecessors); + subPaths = reconstructPaths(source, edge.start_, predecessors); } for (auto path : subPaths) { diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index bec96d00f1..7ce5a695ca 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -34,7 +34,7 @@ struct PathSearchConfiguration { // The path search algorithm to use. PathSearchAlgorithm algorithm_; // The source node ID. - Id source_; + std::vector sources_; // A list of target node IDs. std::vector targets_; // Variable representing the start column in the result. @@ -73,8 +73,6 @@ class PathSearch : public Operation { PathSearchConfiguration config); std::vector getChildren() override; - const Id& getSource() const { return config_.source_; } - const std::vector& getTargets() const { return config_.targets_; } const PathSearchConfiguration& getConfig() const { return config_; } @@ -142,7 +140,7 @@ class PathSearch : public Operation { */ std::vector shortestPaths() const; - std::vector reconstructPaths(uint64_t target, + std::vector reconstructPaths(uint64_t source, uint64_t target, PredecessorMap predecessors) const; /** diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index b9aaf9e1c6..1513b0b0fc 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -2122,14 +2122,17 @@ void QueryPlanner::GraphPatternPlanner::visitPathSearch( AD_THROW("No vocabulary entry for " + comp.toString()); } }; - auto source = tripleComponentToId(pathQuery.source_); + std::vector sources; + for (auto comp: pathQuery.sources_) { + sources.push_back(tripleComponentToId(comp)); + } std::vector targets; for (auto comp : pathQuery.targets_) { targets.push_back(tripleComponentToId(comp)); } auto config = PathSearchConfiguration{pathQuery.algorithm_, - std::move(source), + std::move(sources), std::move(targets), std::move(pathQuery.start_.value()), std::move(pathQuery.end_.value()), diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index ee4ee061e2..f973244cbe 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -75,7 +75,7 @@ void PathQuery::addParameter(const SparqlTriple& triple) { AD_CORRECTNESS_CHECK(predicate.isIri()); if (predicate.getIri().toStringRepresentation().ends_with("source>")) { AD_CORRECTNESS_CHECK(object.isIri()); - source_ = std::move(object); + sources_.push_back(std::move(object)); } else if (predicate.getIri().toStringRepresentation().ends_with("target>")) { AD_CORRECTNESS_CHECK(object.isIri()); targets_.push_back(std::move(object)); diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 33348769e2..a140ca3372 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -138,7 +138,7 @@ struct TransPath { }; struct PathQuery { - TripleComponent source_; + std::vector sources_; std::vector targets_; std::optional start_; std::optional end_; diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index b8ff9b67cc..ab12367a4a 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -35,7 +35,7 @@ Result performPathSearch(PathSearchConfiguration config, IdTable input, TEST(PathSearchTest, constructor) { auto qec = getQec(); PathSearchConfiguration config{ - ALL_PATHS, V(0), {V(1)}, + ALL_PATHS, {V(0)}, {V(1)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; PathSearch p = PathSearch(qec, nullptr, config); @@ -49,7 +49,7 @@ TEST(PathSearchTest, emptyGraph) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, V(0), {V(4)}, + ALL_PATHS, {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -73,7 +73,7 @@ TEST(PathSearchTest, singlePath) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, V(0), {V(4)}, + ALL_PATHS, {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -94,7 +94,7 @@ TEST(PathSearchTest, singlePathWithProperties) { Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; PathSearchConfiguration config{ALL_PATHS, - V(0), + {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, @@ -118,7 +118,7 @@ TEST(PathSearchTest, singlePathWithDijkstra) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, V(0), {V(4)}, + SHORTEST_PATHS, {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -139,7 +139,7 @@ TEST(PathSearchTest, singlePathWithDijkstraAndProperties) { Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; PathSearchConfiguration config{SHORTEST_PATHS, - V(0), + {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, @@ -171,7 +171,7 @@ TEST(PathSearchTest, twoPathsOneTarget) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, V(0), {V(2)}, + ALL_PATHS, {V(0)}, {V(2)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -198,7 +198,7 @@ TEST(PathSearchTest, twoPathsTwoTargets) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, V(0), + PathSearchConfiguration config{ALL_PATHS, {V(0)}, {V(2), V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -226,7 +226,7 @@ TEST(PathSearchTest, cycle) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, V(0), {V(0)}, + ALL_PATHS, {V(0)}, {V(0)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -256,7 +256,7 @@ TEST(PathSearchTest, twoCycle) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, V(0), {V(0)}, + ALL_PATHS, {V(0)}, {V(0)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -289,7 +289,7 @@ TEST(PathSearchTest, allPaths) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, V(0), {}, + ALL_PATHS, {V(0)}, {}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -319,7 +319,7 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { Variable{"?edgeProperty2"}}; PathSearchConfiguration config{ ALL_PATHS, - V(0), + {V(0)}, {}, Var{"?start"}, Var{"?end"}, @@ -352,7 +352,7 @@ TEST(PathSearchTest, singleShortestPath) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, V(0), {V(4)}, + SHORTEST_PATHS, {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -384,7 +384,7 @@ TEST(PathSearchTest, twoShortestPaths) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, V(0), {V(5)}, + SHORTEST_PATHS, {V(0)}, {V(5)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -411,7 +411,7 @@ TEST(PathSearchTest, singlePathWithIrrelevantNode) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, V(0), {V(4)}, + ALL_PATHS, {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -431,7 +431,7 @@ TEST(PathSearchTest, shortestPathWithIrrelevantNode) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, V(0), {V(4)}, + SHORTEST_PATHS, {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -468,7 +468,7 @@ TEST(PathSearchTest, allPathsElongatedDiamond) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, V(0), {V(5)}, + ALL_PATHS, {V(0)}, {V(5)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -487,7 +487,7 @@ TEST(PathSearchTest, shortestPathsElongatedDiamond) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, V(0), {V(5)}, + SHORTEST_PATHS, {V(0)}, {V(5)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index c8934e5776..766cc34141 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -777,7 +777,7 @@ TEST(QueryPlanner, PathSearchSingleTarget) { auto qec = ad_utility::testing::getQec("

.

"); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); - PathSearchConfiguration config{ALL_PATHS, getId(""), + PathSearchConfiguration config{ALL_PATHS, {getId("")}, {getId("")}, Variable("?start"), Variable("?end"), Variable("?path"), Variable("?edge"), {}}; @@ -804,7 +804,7 @@ TEST(QueryPlanner, PathSearchMultipleTargets) { auto getId = ad_utility::testing::makeGetId(qec->getIndex()); PathSearchConfiguration config{ALL_PATHS, - getId(""), + {getId("")}, {getId(""), getId("")}, Variable("?start"), Variable("?end"), @@ -836,7 +836,7 @@ TEST(QueryPlanner, PathSearchWithEdgeProperties) { " . . . "); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); - PathSearchConfiguration config{ALL_PATHS, getId(""), + PathSearchConfiguration config{ALL_PATHS, {getId("")}, {getId("")}, Variable("?start"), Variable("?end"), Variable("?path"), Variable("?edge"), {Variable("?middle")}}; @@ -875,7 +875,7 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { PathSearchConfiguration config{ ALL_PATHS, - getId(""), + {getId("")}, {getId(""), getId("")}, Variable("?start"), Variable("?end"), diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index 0a8e666846..9828893ef1 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -263,10 +263,10 @@ inline auto TransitivePath = inline auto PathSearchConfigMatcher = [](PathSearchConfiguration config) { return AllOf( AD_FIELD(PathSearchConfiguration, algorithm_, Eq(config.algorithm_)), - AD_FIELD(PathSearchConfiguration, source_, Eq(config.source_)), - AD_FIELD(PathSearchConfiguration, start_, Eq(config.start_)), + AD_FIELD(PathSearchConfiguration, sources_, UnorderedElementsAreArray(config.sources_)), AD_FIELD(PathSearchConfiguration, targets_, UnorderedElementsAreArray(config.targets_)), + AD_FIELD(PathSearchConfiguration, start_, Eq(config.start_)), AD_FIELD(PathSearchConfiguration, end_, Eq(config.end_)), AD_FIELD(PathSearchConfiguration, pathColumn_, Eq(config.pathColumn_)), AD_FIELD(PathSearchConfiguration, edgeColumn_, Eq(config.edgeColumn_)), From fbb61a1f454e5dffc0f1ff712b42de232c1699f7 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Sun, 30 Jun 2024 12:22:41 +0200 Subject: [PATCH 26/96] format fix --- src/engine/PathSearch.cpp | 11 ++++++----- src/engine/QueryPlanner.cpp | 2 +- test/PathSearchTest.cpp | 34 +++++++++++++++++----------------- test/QueryPlannerTest.cpp | 4 ++-- test/QueryPlannerTestHelpers.h | 3 ++- 5 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 481d1d00dd..8f0c1bd982 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -178,7 +178,7 @@ std::vector PathSearch::allPaths() const { std::vector paths; Path path; - for (auto source: config_.sources_) { + for (auto source : config_.sources_) { auto startIndex = idToIndex_.at(source); std::vector targets; @@ -202,7 +202,8 @@ std::vector PathSearch::allPaths() const { } for (auto target : targets) { - auto pathsToTarget = reconstructPaths(source.getBits(), target, predecessors); + auto pathsToTarget = + reconstructPaths(source.getBits(), target, predecessors); for (auto path : pathsToTarget) { paths.push_back(std::move(path)); } @@ -215,7 +216,7 @@ std::vector PathSearch::allPaths() const { std::vector PathSearch::shortestPaths() const { std::vector paths; Path path; - for (auto source: config_.sources_) { + for (auto source : config_.sources_) { auto startIndex = idToIndex_.at(source); std::unordered_set targets; @@ -243,8 +244,8 @@ std::vector PathSearch::shortestPaths() const { } // _____________________________________________________________________________ -std::vector PathSearch::reconstructPaths(uint64_t source, - uint64_t target, PredecessorMap predecessors) const { +std::vector PathSearch::reconstructPaths( + uint64_t source, uint64_t target, PredecessorMap predecessors) const { const auto& edges = predecessors[target]; std::vector paths; diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 1513b0b0fc..6d075d8207 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -2123,7 +2123,7 @@ void QueryPlanner::GraphPatternPlanner::visitPathSearch( } }; std::vector sources; - for (auto comp: pathQuery.sources_) { + for (auto comp : pathQuery.sources_) { sources.push_back(tripleComponentToId(comp)); } std::vector targets; diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index ab12367a4a..6dbec9cbe5 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -35,7 +35,7 @@ Result performPathSearch(PathSearchConfiguration config, IdTable input, TEST(PathSearchTest, constructor) { auto qec = getQec(); PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(1)}, + ALL_PATHS, {V(0)}, {V(1)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; PathSearch p = PathSearch(qec, nullptr, config); @@ -49,7 +49,7 @@ TEST(PathSearchTest, emptyGraph) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(4)}, + ALL_PATHS, {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -73,7 +73,7 @@ TEST(PathSearchTest, singlePath) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(4)}, + ALL_PATHS, {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -94,7 +94,7 @@ TEST(PathSearchTest, singlePathWithProperties) { Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; PathSearchConfiguration config{ALL_PATHS, - {V(0)}, + {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, @@ -118,7 +118,7 @@ TEST(PathSearchTest, singlePathWithDijkstra) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, {V(0)}, {V(4)}, + SHORTEST_PATHS, {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -139,7 +139,7 @@ TEST(PathSearchTest, singlePathWithDijkstraAndProperties) { Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; PathSearchConfiguration config{SHORTEST_PATHS, - {V(0)}, + {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, @@ -171,7 +171,7 @@ TEST(PathSearchTest, twoPathsOneTarget) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(2)}, + ALL_PATHS, {V(0)}, {V(2)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -226,7 +226,7 @@ TEST(PathSearchTest, cycle) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(0)}, + ALL_PATHS, {V(0)}, {V(0)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -256,7 +256,7 @@ TEST(PathSearchTest, twoCycle) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(0)}, + ALL_PATHS, {V(0)}, {V(0)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -289,7 +289,7 @@ TEST(PathSearchTest, allPaths) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {}, + ALL_PATHS, {V(0)}, {}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -319,7 +319,7 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { Variable{"?edgeProperty2"}}; PathSearchConfiguration config{ ALL_PATHS, - {V(0)}, + {V(0)}, {}, Var{"?start"}, Var{"?end"}, @@ -352,7 +352,7 @@ TEST(PathSearchTest, singleShortestPath) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, {V(0)}, {V(4)}, + SHORTEST_PATHS, {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -384,7 +384,7 @@ TEST(PathSearchTest, twoShortestPaths) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, {V(0)}, {V(5)}, + SHORTEST_PATHS, {V(0)}, {V(5)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -411,7 +411,7 @@ TEST(PathSearchTest, singlePathWithIrrelevantNode) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(4)}, + ALL_PATHS, {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -431,7 +431,7 @@ TEST(PathSearchTest, shortestPathWithIrrelevantNode) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, {V(0)}, {V(4)}, + SHORTEST_PATHS, {V(0)}, {V(4)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -468,7 +468,7 @@ TEST(PathSearchTest, allPathsElongatedDiamond) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(5)}, + ALL_PATHS, {V(0)}, {V(5)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -487,7 +487,7 @@ TEST(PathSearchTest, shortestPathsElongatedDiamond) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, {V(0)}, {V(5)}, + SHORTEST_PATHS, {V(0)}, {V(5)}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 766cc34141..964c3c7325 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -804,7 +804,7 @@ TEST(QueryPlanner, PathSearchMultipleTargets) { auto getId = ad_utility::testing::makeGetId(qec->getIndex()); PathSearchConfiguration config{ALL_PATHS, - {getId("")}, + {getId("")}, {getId(""), getId("")}, Variable("?start"), Variable("?end"), @@ -875,7 +875,7 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { PathSearchConfiguration config{ ALL_PATHS, - {getId("")}, + {getId("")}, {getId(""), getId("")}, Variable("?start"), Variable("?end"), diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index 9828893ef1..4f1a9eca83 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -263,7 +263,8 @@ inline auto TransitivePath = inline auto PathSearchConfigMatcher = [](PathSearchConfiguration config) { return AllOf( AD_FIELD(PathSearchConfiguration, algorithm_, Eq(config.algorithm_)), - AD_FIELD(PathSearchConfiguration, sources_, UnorderedElementsAreArray(config.sources_)), + AD_FIELD(PathSearchConfiguration, sources_, + UnorderedElementsAreArray(config.sources_)), AD_FIELD(PathSearchConfiguration, targets_, UnorderedElementsAreArray(config.targets_)), AD_FIELD(PathSearchConfiguration, start_, Eq(config.start_)), From 17a8b39f284ee7464d2c7d228f8fc4f7c2984f0b Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 1 Jul 2024 05:34:55 +0200 Subject: [PATCH 27/96] Added mutlisource multitarget tests --- test/PathSearchTest.cpp | 62 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 6dbec9cbe5..3780b5799f 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -495,3 +495,65 @@ TEST(PathSearchTest, shortestPathsElongatedDiamond) { ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } + +/** + * Graph: + * 0 4 + * \ / + * 2-->3 + * / \ + * 1 5 + */ +TEST(PathSearchTest, multiSourceMultiTargetallPaths) { + auto sub = makeIdTableFromVector({{0, 2}, {1, 2}, {2, 3}, {3, 4}, {3, 5}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0)}, + {V(2), V(3), I(0), I(1)}, + {V(3), V(4), I(0), I(2)}, + {V(0), V(2), I(1), I(0)}, + {V(2), V(3), I(1), I(1)}, + {V(3), V(5), I(1), I(2)}, + {V(1), V(2), I(2), I(0)}, + {V(2), V(3), I(2), I(1)}, + {V(3), V(4), I(2), I(2)}, + {V(1), V(2), I(3), I(0)}, + {V(2), V(3), I(3), I(1)}, + {V(3), V(5), I(3), I(2)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{ + ALL_PATHS, {V(0), V(1)}, {V(4), V(5)}, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, multiSourceMultiTargetshortestPaths) { + auto sub = makeIdTableFromVector({{0, 2}, {1, 2}, {2, 3}, {3, 4}, {3, 5}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0)}, + {V(2), V(3), I(0), I(1)}, + {V(3), V(4), I(0), I(2)}, + {V(0), V(2), I(1), I(0)}, + {V(2), V(3), I(1), I(1)}, + {V(3), V(5), I(1), I(2)}, + {V(1), V(2), I(2), I(0)}, + {V(2), V(3), I(2), I(1)}, + {V(3), V(4), I(2), I(2)}, + {V(1), V(2), I(3), I(0)}, + {V(2), V(3), I(3), I(1)}, + {V(3), V(5), I(3), I(2)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{ + SHORTEST_PATHS, {V(0), V(1)}, {V(4), V(5)}, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} From b2611c9125e5d742a7d5bf0644d1133fd9deeb71 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 2 Jul 2024 15:16:03 +0200 Subject: [PATCH 28/96] Added createJoinWithPathSearch --- src/engine/PathSearch.cpp | 105 ++++++++++++++++++++++----------- src/engine/PathSearch.h | 50 ++++++++++++++-- src/engine/QueryPlanner.cpp | 45 ++++++++++++++ src/engine/QueryPlanner.h | 4 ++ test/PathSearchTest.cpp | 84 ++++++++++++++++++-------- test/QueryPlannerTest.cpp | 21 ++++--- test/QueryPlannerTestHelpers.h | 4 +- 7 files changed, 239 insertions(+), 74 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 8f0c1bd982..c935a41039 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -5,9 +5,9 @@ #include "PathSearch.h" #include -#include #include #include +#include #include "engine/CallFixedSize.h" #include "engine/PathSearchVisitors.h" @@ -24,17 +24,28 @@ PathSearch::PathSearch(QueryExecutionContext* qec, idToIndex_(allocator()) { AD_CORRECTNESS_CHECK(qec != nullptr); resultWidth_ = 4 + config_.edgeProperties_.size(); - variableColumns_[config_.start_] = makeAlwaysDefinedColumn(0); - variableColumns_[config_.end_] = makeAlwaysDefinedColumn(1); - variableColumns_[config_.pathColumn_] = makeAlwaysDefinedColumn(2); - variableColumns_[config_.edgeColumn_] = makeAlwaysDefinedColumn(3); - - for (size_t edgePropertyIndex = 0; - edgePropertyIndex < config_.edgeProperties_.size(); - edgePropertyIndex++) { - auto edgeProperty = config_.edgeProperties_[edgePropertyIndex]; - variableColumns_[edgeProperty] = - makeAlwaysDefinedColumn(4 + edgePropertyIndex); + + size_t colIndex = 0; + + variableColumns_[config_.start_] = makeAlwaysDefinedColumn(colIndex++); + variableColumns_[config_.end_] = makeAlwaysDefinedColumn(colIndex++); + variableColumns_[config_.pathColumn_] = makeAlwaysDefinedColumn(colIndex++); + variableColumns_[config_.edgeColumn_] = makeAlwaysDefinedColumn(colIndex++); + + if (std::holds_alternative(config_.sources_)) { + resultWidth_++; + const auto& sourceColumn = std::get(config_.sources_); + variableColumns_[sourceColumn] = makeAlwaysDefinedColumn(colIndex++); + } + + if (std::holds_alternative(config_.targets_)) { + resultWidth_++; + const auto& targetColumn = std::get(config_.targets_); + variableColumns_[targetColumn] = makeAlwaysDefinedColumn(colIndex++); + } + + for (auto edgeProperty: config_.edgeProperties_) { + variableColumns_[edgeProperty] = makeAlwaysDefinedColumn(colIndex++); } } @@ -87,6 +98,17 @@ bool PathSearch::knownEmptyResult() { return subtree_->knownEmptyResult(); }; // _____________________________________________________________________________ vector PathSearch::resultSortedOn() const { return {}; }; + +// _____________________________________________________________________________ +void PathSearch::bindSourceSide(std::shared_ptr sourcesOp, size_t inputCol) { + boundSources_ = {sourcesOp, inputCol}; +} + +// _____________________________________________________________________________ +void PathSearch::bindTargetSide(std::shared_ptr targetsOp, size_t inputCol) { + boundTargets_ = {targetsOp, inputCol}; +} + // _____________________________________________________________________________ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { std::shared_ptr subRes = subtree_->getResult(); @@ -94,7 +116,6 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { idTable.setNumColumns(getResultWidth()); const IdTable& dynSub = subRes->idTable(); - if (!dynSub.empty()) { std::vector> edgePropertyLists; for (const auto& edgeProperty : config_.edgeProperties_) { @@ -107,7 +128,10 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { buildGraph(dynSub.getColumn(subStartColumn), dynSub.getColumn(subEndColumn), edgePropertyLists); - auto paths = findPaths(); + std::span sources = handleSearchSide(config_.sources_, boundSources_); + std::span targets = handleSearchSide(config_.targets_, boundTargets_); + + auto paths = findPaths(sources, targets); CALL_FIXED_SIZE(std::array{getResultWidth()}, &PathSearch::pathsToResultTable, this, idTable, paths); @@ -136,6 +160,23 @@ void PathSearch::buildMapping(std::span startNodes, } } +std::span PathSearch::handleSearchSide(const SearchSide& side, const std::optional& binding) const { + std::span ids; + bool isVariable = std::holds_alternative(side); + if (isVariable && binding.has_value()) { + ids = binding->first->getResult()->idTable().getColumn(binding->second); + } else if (isVariable || std::get>(side).empty()) { + std::vector idVec; + for (auto id: indexToId_) { + idVec.push_back(id); + } + ids = idVec; + } else { + ids = std::get>(side); + } + return ids; +} + // _____________________________________________________________________________ void PathSearch::buildGraph(std::span startNodes, std::span endNodes, @@ -162,34 +203,28 @@ void PathSearch::buildGraph(std::span startNodes, } // _____________________________________________________________________________ -std::vector PathSearch::findPaths() const { +std::vector PathSearch::findPaths(std::span sources, std::span targets) const { switch (config_.algorithm_) { case ALL_PATHS: - return allPaths(); + return allPaths(sources, targets); case SHORTEST_PATHS: - return shortestPaths(); + return shortestPaths(sources, targets); default: AD_FAIL(); } } // _____________________________________________________________________________ -std::vector PathSearch::allPaths() const { +std::vector PathSearch::allPaths(std::span sources, std::span targets) const { std::vector paths; Path path; - for (auto source : config_.sources_) { + for (auto source : sources) { auto startIndex = idToIndex_.at(source); - std::vector targets; - for (auto target : config_.targets_) { - targets.push_back(target.getBits()); - } - - if (targets.empty()) { - for (auto id : indexToId_) { - targets.push_back(id.getBits()); - } + std::vector targetIndices; + for (auto target : targets) { + targetIndices.push_back(target.getBits()); } PredecessorMap predecessors; @@ -201,7 +236,7 @@ std::vector PathSearch::allPaths() const { } catch (const StopSearchException& e) { } - for (auto target : targets) { + for (auto target : targetIndices) { auto pathsToTarget = reconstructPaths(source.getBits(), target, predecessors); for (auto path : pathsToTarget) { @@ -213,21 +248,21 @@ std::vector PathSearch::allPaths() const { } // _____________________________________________________________________________ -std::vector PathSearch::shortestPaths() const { +std::vector PathSearch::shortestPaths(std::span sources, std::span targets) const { std::vector paths; Path path; - for (auto source : config_.sources_) { + for (auto source : sources) { auto startIndex = idToIndex_.at(source); - std::unordered_set targets; - for (auto target : config_.targets_) { - targets.insert(target.getBits()); + std::unordered_set targetIndices; + for (auto target : targets) { + targetIndices.insert(target.getBits()); } std::vector predecessors(indexToId_.size()); std::vector distances(indexToId_.size(), std::numeric_limits::max()); - DijkstraAllPathsVisitor vis(startIndex, targets, path, paths, predecessors, + DijkstraAllPathsVisitor vis(startIndex, targetIndices, path, paths, predecessors, distances); auto weightMap = get(&Edge::weight_, graph_); diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 7ce5a695ca..63517e1ebd 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -5,8 +5,10 @@ #pragma once #include +#include #include #include +#include #include #include "engine/Operation.h" @@ -27,6 +29,9 @@ using IdToNodeMap = std::unordered_map< enum PathSearchAlgorithm { ALL_PATHS, SHORTEST_PATHS }; +using TreeAndCol = std::pair, size_t>; +using SearchSide = std::variant>; + /** * @brief Struct to hold configuration parameters for the path search. */ @@ -34,9 +39,9 @@ struct PathSearchConfiguration { // The path search algorithm to use. PathSearchAlgorithm algorithm_; // The source node ID. - std::vector sources_; + SearchSide sources_; // A list of target node IDs. - std::vector targets_; + SearchSide targets_; // Variable representing the start column in the result. Variable start_; // Variable representing the end column in the result. @@ -47,6 +52,9 @@ struct PathSearchConfiguration { Variable edgeColumn_; // Variables representing edge property columns. std::vector edgeProperties_; + + bool sourceIsVariable() const {return std::holds_alternative(sources_);} + bool targetIsVariable() const {return std::holds_alternative(targets_);} }; /** @@ -67,6 +75,9 @@ class PathSearch : public Operation { std::vector indexToId_; IdToNodeMap idToIndex_; + std::optional boundSources_; + std::optional boundTargets_; + public: PathSearch(QueryExecutionContext* qec, std::shared_ptr subtree, @@ -101,6 +112,33 @@ class PathSearch : public Operation { vector resultSortedOn() const override; + void bindSourceSide(std::shared_ptr sourcesOp, size_t inputCol); + void bindTargetSide(std::shared_ptr targetsOp, size_t inputCol); + + bool isSourceBound() const { + return boundSources_.has_value() || !config_.sourceIsVariable(); + } + + bool isTargetBound() const { + return boundTargets_.has_value() || !config_.targetIsVariable(); + } + + std::optional getSourceColumn() const { + if (!config_.sourceIsVariable()) { + return std::nullopt; + } + + return variableColumns_.at(std::get(config_.sources_)).columnIndex_; + } + + std::optional getTargetColumn() const { + if (!config_.targetIsVariable()) { + return std::nullopt; + } + + return variableColumns_.at(std::get(config_.targets_)).columnIndex_; + } + Result computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; @@ -122,23 +160,25 @@ class PathSearch : public Operation { void buildMapping(std::span startNodes, std::span endNodes); + std::span handleSearchSide(const SearchSide& side, const std::optional& binding) const; + /** * @brief Finds paths based on the configured algorithm. * @return A vector of paths. */ - std::vector findPaths() const; + std::vector findPaths(std::span sources, std::span targets) const; /** * @brief Finds all paths in the graph. * @return A vector of all paths. */ - std::vector allPaths() const; + std::vector allPaths(std::span sources, std::span targets) const; /** * @brief Finds the shortest paths in the graph. * @return A vector of the shortest paths. */ - std::vector shortestPaths() const; + std::vector shortestPaths(std::span sources, std::span targets) const; std::vector reconstructPaths(uint64_t source, uint64_t target, PredecessorMap predecessors) const; diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 6d075d8207..9d0e235fb4 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -9,6 +9,7 @@ #include #include +#include #include "engine/Bind.h" #include "engine/CartesianProductJoin.h" @@ -1644,6 +1645,11 @@ std::vector QueryPlanner::createJoinCandidates( return {makeSubtreePlan(_qec, a._qet, b._qet)}; } + if (auto opt = createJoinWithPathSearch(a, b, jcs)) { + candidates.push_back(std::move(opt.value())); + return candidates; + } + // Check if one of the two Operations is a SERVICE. If so, we can try // to simplify the Service Query using the result of the other operation. if (auto opt = createJoinWithService(a, b, jcs)) { @@ -1808,6 +1814,45 @@ auto QueryPlanner::createJoinWithService( return plan; } +// _____________________________________________________________________ +auto QueryPlanner::createJoinWithPathSearch( + SubtreePlan a, SubtreePlan b, + const std::vector>& jcs) + -> std::optional { + auto aRootOp = std::dynamic_pointer_cast(a._qet->getRootOperation()); + auto bRootOp = std::dynamic_pointer_cast(b._qet->getRootOperation()); + + // Exactly one of the two Operations can be a path search. + if (static_cast(aRootOp) == static_cast(bRootOp)) { + return std::nullopt; + } + + auto pathSearch = aRootOp ? aRootOp : bRootOp; + auto sibling = bRootOp ? a : b; + + // Only source and target may be bound directly + if (jcs.size() > 2) { + return std::nullopt; + } + + auto sourceColumn = pathSearch->getSourceColumn(); + auto targetColumn = pathSearch->getTargetColumn(); + for (auto jc : jcs) { + const size_t thisCol = aRootOp ? jc[0] : jc[1]; + const size_t otherCol = aRootOp ? jc[1] : jc[0]; + + if (sourceColumn && sourceColumn == thisCol && !pathSearch->isSourceBound()) { + pathSearch->bindSourceSide(sibling._qet, otherCol); + } else if (targetColumn && targetColumn == thisCol && !pathSearch->isTargetBound()) { + pathSearch->bindTargetSide(sibling._qet, otherCol); + } + } + + SubtreePlan plan = makeSubtreePlan(pathSearch); + mergeSubtreePlanIds(plan, a, b); + return plan; +} + // _____________________________________________________________________ void QueryPlanner::QueryGraph::setupGraph( const std::vector& leafOperations) { diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 101621f246..64a12af184 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -333,6 +333,10 @@ class QueryPlanner { SubtreePlan a, SubtreePlan b, const std::vector>& jcs); + [[nodiscard]] static std::optional createJoinWithPathSearch( + SubtreePlan a, SubtreePlan b, + const std::vector>& jcs); + [[nodiscard]] vector getOrderByRow( const ParsedQuery& pq, const std::vector>& dpTab) const; diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 3780b5799f..b200cc91ac 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -34,8 +34,10 @@ Result performPathSearch(PathSearchConfiguration config, IdTable input, TEST(PathSearchTest, constructor) { auto qec = getQec(); + std::vector sources{V(0)}; + std::vector targets{V(1)}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(1)}, + ALL_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; PathSearch p = PathSearch(qec, nullptr, config); @@ -47,9 +49,11 @@ TEST(PathSearchTest, emptyGraph) { auto expected = makeIdTableFromVector({}); expected.setNumColumns(4); + std::vector sources{V(0)}; + std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(4)}, + ALL_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -71,9 +75,11 @@ TEST(PathSearchTest, singlePath) { {V(3), V(4), I(0), I(3)}, }); + std::vector sources{V(0)}; + std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(4)}, + ALL_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -92,10 +98,12 @@ TEST(PathSearchTest, singlePathWithProperties) { {V(3), V(4), I(0), I(3), V(40)}, }); + std::vector sources{V(0)}; + std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; PathSearchConfiguration config{ALL_PATHS, - {V(0)}, - {V(4)}, + sources, + targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, @@ -116,9 +124,11 @@ TEST(PathSearchTest, singlePathWithDijkstra) { {V(3), V(4), I(0), I(3)}, }); + std::vector sources{V(0)}; + std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, {V(0)}, {V(4)}, + SHORTEST_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -137,10 +147,11 @@ TEST(PathSearchTest, singlePathWithDijkstraAndProperties) { {V(3), V(4), I(0), I(3), V(40)}, }); + std::vector sources{V(0)}; + std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; PathSearchConfiguration config{SHORTEST_PATHS, - {V(0)}, - {V(4)}, + sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, @@ -169,9 +180,11 @@ TEST(PathSearchTest, twoPathsOneTarget) { {V(3), V(2), I(1), I(1)}, }); + std::vector sources{V(0)}; + std::vector targets{V(2)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(2)}, + ALL_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -197,9 +210,10 @@ TEST(PathSearchTest, twoPathsTwoTargets) { {V(3), V(4), I(1), I(1)}, }); + std::vector sources{V(0)}; + std::vector targets{V(2), V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, {V(0)}, - {V(2), V(4)}, Var{"?start"}, + PathSearchConfiguration config{ALL_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -224,9 +238,10 @@ TEST(PathSearchTest, cycle) { {V(2), V(0), I(0), I(2)}, }); + std::vector sources{V(0)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(0)}, + ALL_PATHS, sources, sources, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -254,9 +269,10 @@ TEST(PathSearchTest, twoCycle) { {V(3), V(0), I(1), I(2)}, }); + std::vector sources{V(0)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(0)}, + ALL_PATHS, sources, sources, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -287,9 +303,11 @@ TEST(PathSearchTest, allPaths) { {V(2), V(4), I(4), I(1)}, }); + std::vector sources{V(0)}; + std::vector targets{}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {}, + ALL_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -315,12 +333,14 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { {V(2), V(4), I(4), I(1), V(51), V(50)}, }); + std::vector sources{V(0)}; + std::vector targets{}; Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty1"}, Variable{"?edgeProperty2"}}; PathSearchConfiguration config{ ALL_PATHS, - {V(0)}, - {}, + sources, + targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, @@ -350,9 +370,11 @@ TEST(PathSearchTest, singleShortestPath) { {V(1), V(4), I(0), I(1)}, }); + std::vector sources{V(0)}; + std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, {V(0)}, {V(4)}, + SHORTEST_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -382,9 +404,11 @@ TEST(PathSearchTest, twoShortestPaths) { {V(1), V(5), I(1), I(1)}, }); + std::vector sources{V(0)}; + std::vector targets{V(5)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, {V(0)}, {V(5)}, + SHORTEST_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -409,9 +433,11 @@ TEST(PathSearchTest, singlePathWithIrrelevantNode) { {V(3), V(4), I(0), I(3)}, }); + std::vector sources{V(0)}; + std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(4)}, + ALL_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -429,9 +455,11 @@ TEST(PathSearchTest, shortestPathWithIrrelevantNode) { {V(3), V(4), I(0), I(3)}, }); + std::vector sources{V(0)}; + std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, {V(0)}, {V(4)}, + SHORTEST_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -466,9 +494,11 @@ TEST(PathSearchTest, allPathsElongatedDiamond) { {V(4), V(5), I(1), I(3)}, }); + std::vector sources{V(0)}; + std::vector targets{V(5)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0)}, {V(5)}, + ALL_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -485,9 +515,11 @@ TEST(PathSearchTest, shortestPathsElongatedDiamond) { {V(2), V(4), I(0), I(2)}, {V(4), V(5), I(0), I(3)}}); + std::vector sources{V(0)}; + std::vector targets{V(5)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, {V(0)}, {V(5)}, + SHORTEST_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; @@ -521,9 +553,11 @@ TEST(PathSearchTest, multiSourceMultiTargetallPaths) { {V(3), V(5), I(3), I(2)}, }); + std::vector sources{V(0), V(1)}; + std::vector targets{V(4), V(5)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, {V(0), V(1)}, {V(4), V(5)}, Var{"?start"}, + ALL_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); @@ -549,8 +583,10 @@ TEST(PathSearchTest, multiSourceMultiTargetshortestPaths) { }); Vars vars = {Variable{"?start"}, Variable{"?end"}}; + std::vector sources{V(0), V(1)}; + std::vector targets{V(4), V(5)}; PathSearchConfiguration config{ - SHORTEST_PATHS, {V(0), V(1)}, {V(4), V(5)}, Var{"?start"}, + SHORTEST_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 964c3c7325..d86b2a07b5 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -777,8 +777,10 @@ TEST(QueryPlanner, PathSearchSingleTarget) { auto qec = ad_utility::testing::getQec("

.

"); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); - PathSearchConfiguration config{ALL_PATHS, {getId("")}, - {getId("")}, Variable("?start"), + std::vector sources{getId("")}; + std::vector targets{getId("")}; + PathSearchConfiguration config{ALL_PATHS, sources, + targets, Variable("?start"), Variable("?end"), Variable("?path"), Variable("?edge"), {}}; h::expect( @@ -803,9 +805,10 @@ TEST(QueryPlanner, PathSearchMultipleTargets) { auto qec = ad_utility::testing::getQec("

.

"); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + std::vector sources{getId("")}; + std::vector targets{getId(""), getId("")}; PathSearchConfiguration config{ALL_PATHS, - {getId("")}, - {getId(""), getId("")}, + sources, targets, Variable("?start"), Variable("?end"), Variable("?path"), @@ -836,8 +839,9 @@ TEST(QueryPlanner, PathSearchWithEdgeProperties) { " . . . "); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); - PathSearchConfiguration config{ALL_PATHS, {getId("")}, - {getId("")}, Variable("?start"), + std::vector sources{getId("")}; + std::vector targets{getId("")}; + PathSearchConfiguration config{ALL_PATHS, sources, targets, Variable("?start"), Variable("?end"), Variable("?path"), Variable("?edge"), {Variable("?middle")}}; h::expect( @@ -873,10 +877,11 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { " "); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + std::vector sources{getId("")}; + std::vector targets{getId(""), getId("")}; PathSearchConfiguration config{ ALL_PATHS, - {getId("")}, - {getId(""), getId("")}, + sources, targets, Variable("?start"), Variable("?end"), Variable("?path"), diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index 4f1a9eca83..e6360c2d31 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -264,9 +264,9 @@ inline auto PathSearchConfigMatcher = [](PathSearchConfiguration config) { return AllOf( AD_FIELD(PathSearchConfiguration, algorithm_, Eq(config.algorithm_)), AD_FIELD(PathSearchConfiguration, sources_, - UnorderedElementsAreArray(config.sources_)), + VariantWith>(UnorderedElementsAreArray(std::get>(config.sources_)))), AD_FIELD(PathSearchConfiguration, targets_, - UnorderedElementsAreArray(config.targets_)), + VariantWith>(UnorderedElementsAreArray(std::get>(config.targets_)))), AD_FIELD(PathSearchConfiguration, start_, Eq(config.start_)), AD_FIELD(PathSearchConfiguration, end_, Eq(config.end_)), AD_FIELD(PathSearchConfiguration, pathColumn_, Eq(config.pathColumn_)), From 08da81bdb1338a327c2e83cad66ed9e232e17f9a Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 3 Jul 2024 12:45:37 +0200 Subject: [PATCH 29/96] Added tests, finished binding logic --- src/engine/PathSearch.cpp | 33 ++++--- src/engine/PathSearch.h | 32 ++++--- src/engine/QueryPlanner.cpp | 35 +++----- src/parser/GraphPatternOperation.cpp | 24 ++++- src/parser/GraphPatternOperation.h | 2 + test/PathSearchTest.cpp | 115 ++++++++++-------------- test/QueryPlannerTest.cpp | 126 ++++++++++++++++++++++----- test/QueryPlannerTestHelpers.h | 31 +++++-- 8 files changed, 255 insertions(+), 143 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index c935a41039..ae440ebaee 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -44,7 +44,7 @@ PathSearch::PathSearch(QueryExecutionContext* qec, variableColumns_[targetColumn] = makeAlwaysDefinedColumn(colIndex++); } - for (auto edgeProperty: config_.edgeProperties_) { + for (auto edgeProperty : config_.edgeProperties_) { variableColumns_[edgeProperty] = makeAlwaysDefinedColumn(colIndex++); } } @@ -98,14 +98,15 @@ bool PathSearch::knownEmptyResult() { return subtree_->knownEmptyResult(); }; // _____________________________________________________________________________ vector PathSearch::resultSortedOn() const { return {}; }; - // _____________________________________________________________________________ -void PathSearch::bindSourceSide(std::shared_ptr sourcesOp, size_t inputCol) { +void PathSearch::bindSourceSide(std::shared_ptr sourcesOp, + size_t inputCol) { boundSources_ = {sourcesOp, inputCol}; } // _____________________________________________________________________________ -void PathSearch::bindTargetSide(std::shared_ptr targetsOp, size_t inputCol) { +void PathSearch::bindTargetSide(std::shared_ptr targetsOp, + size_t inputCol) { boundTargets_ = {targetsOp, inputCol}; } @@ -128,8 +129,10 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { buildGraph(dynSub.getColumn(subStartColumn), dynSub.getColumn(subEndColumn), edgePropertyLists); - std::span sources = handleSearchSide(config_.sources_, boundSources_); - std::span targets = handleSearchSide(config_.targets_, boundTargets_); + std::span sources = + handleSearchSide(config_.sources_, boundSources_); + std::span targets = + handleSearchSide(config_.targets_, boundTargets_); auto paths = findPaths(sources, targets); @@ -160,14 +163,15 @@ void PathSearch::buildMapping(std::span startNodes, } } -std::span PathSearch::handleSearchSide(const SearchSide& side, const std::optional& binding) const { +std::span PathSearch::handleSearchSide( + const SearchSide& side, const std::optional& binding) const { std::span ids; bool isVariable = std::holds_alternative(side); if (isVariable && binding.has_value()) { ids = binding->first->getResult()->idTable().getColumn(binding->second); } else if (isVariable || std::get>(side).empty()) { std::vector idVec; - for (auto id: indexToId_) { + for (auto id : indexToId_) { idVec.push_back(id); } ids = idVec; @@ -203,7 +207,8 @@ void PathSearch::buildGraph(std::span startNodes, } // _____________________________________________________________________________ -std::vector PathSearch::findPaths(std::span sources, std::span targets) const { +std::vector PathSearch::findPaths(std::span sources, + std::span targets) const { switch (config_.algorithm_) { case ALL_PATHS: return allPaths(sources, targets); @@ -215,7 +220,8 @@ std::vector PathSearch::findPaths(std::span sources, std::span PathSearch::allPaths(std::span sources, std::span targets) const { +std::vector PathSearch::allPaths(std::span sources, + std::span targets) const { std::vector paths; Path path; @@ -248,7 +254,8 @@ std::vector PathSearch::allPaths(std::span sources, std::span PathSearch::shortestPaths(std::span sources, std::span targets) const { +std::vector PathSearch::shortestPaths(std::span sources, + std::span targets) const { std::vector paths; Path path; for (auto source : sources) { @@ -262,8 +269,8 @@ std::vector PathSearch::shortestPaths(std::span sources, std::sp std::vector distances(indexToId_.size(), std::numeric_limits::max()); - DijkstraAllPathsVisitor vis(startIndex, targetIndices, path, paths, predecessors, - distances); + DijkstraAllPathsVisitor vis(startIndex, targetIndices, path, paths, + predecessors, distances); auto weightMap = get(&Edge::weight_, graph_); diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 63517e1ebd..dbd6fbffa1 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -53,8 +53,12 @@ struct PathSearchConfiguration { // Variables representing edge property columns. std::vector edgeProperties_; - bool sourceIsVariable() const {return std::holds_alternative(sources_);} - bool targetIsVariable() const {return std::holds_alternative(targets_);} + bool sourceIsVariable() const { + return std::holds_alternative(sources_); + } + bool targetIsVariable() const { + return std::holds_alternative(targets_); + } }; /** @@ -112,8 +116,10 @@ class PathSearch : public Operation { vector resultSortedOn() const override; - void bindSourceSide(std::shared_ptr sourcesOp, size_t inputCol); - void bindTargetSide(std::shared_ptr targetsOp, size_t inputCol); + void bindSourceSide(std::shared_ptr sourcesOp, + size_t inputCol); + void bindTargetSide(std::shared_ptr targetsOp, + size_t inputCol); bool isSourceBound() const { return boundSources_.has_value() || !config_.sourceIsVariable(); @@ -128,7 +134,8 @@ class PathSearch : public Operation { return std::nullopt; } - return variableColumns_.at(std::get(config_.sources_)).columnIndex_; + return variableColumns_.at(std::get(config_.sources_)) + .columnIndex_; } std::optional getTargetColumn() const { @@ -136,7 +143,8 @@ class PathSearch : public Operation { return std::nullopt; } - return variableColumns_.at(std::get(config_.targets_)).columnIndex_; + return variableColumns_.at(std::get(config_.targets_)) + .columnIndex_; } Result computeResult([[maybe_unused]] bool requestLaziness) override; @@ -160,25 +168,29 @@ class PathSearch : public Operation { void buildMapping(std::span startNodes, std::span endNodes); - std::span handleSearchSide(const SearchSide& side, const std::optional& binding) const; + std::span handleSearchSide( + const SearchSide& side, const std::optional& binding) const; /** * @brief Finds paths based on the configured algorithm. * @return A vector of paths. */ - std::vector findPaths(std::span sources, std::span targets) const; + std::vector findPaths(std::span sources, + std::span targets) const; /** * @brief Finds all paths in the graph. * @return A vector of all paths. */ - std::vector allPaths(std::span sources, std::span targets) const; + std::vector allPaths(std::span sources, + std::span targets) const; /** * @brief Finds the shortest paths in the graph. * @return A vector of the shortest paths. */ - std::vector shortestPaths(std::span sources, std::span targets) const; + std::vector shortestPaths(std::span sources, + std::span targets) const; std::vector reconstructPaths(uint64_t source, uint64_t target, PredecessorMap predecessors) const; diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 9d0e235fb4..6d5eb1ff18 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1819,8 +1819,10 @@ auto QueryPlanner::createJoinWithPathSearch( SubtreePlan a, SubtreePlan b, const std::vector>& jcs) -> std::optional { - auto aRootOp = std::dynamic_pointer_cast(a._qet->getRootOperation()); - auto bRootOp = std::dynamic_pointer_cast(b._qet->getRootOperation()); + auto aRootOp = + std::dynamic_pointer_cast(a._qet->getRootOperation()); + auto bRootOp = + std::dynamic_pointer_cast(b._qet->getRootOperation()); // Exactly one of the two Operations can be a path search. if (static_cast(aRootOp) == static_cast(bRootOp)) { @@ -1841,13 +1843,15 @@ auto QueryPlanner::createJoinWithPathSearch( const size_t thisCol = aRootOp ? jc[0] : jc[1]; const size_t otherCol = aRootOp ? jc[1] : jc[0]; - if (sourceColumn && sourceColumn == thisCol && !pathSearch->isSourceBound()) { + if (sourceColumn && sourceColumn == thisCol && + !pathSearch->isSourceBound()) { pathSearch->bindSourceSide(sibling._qet, otherCol); - } else if (targetColumn && targetColumn == thisCol && !pathSearch->isTargetBound()) { + } else if (targetColumn && targetColumn == thisCol && + !pathSearch->isTargetBound()) { pathSearch->bindTargetSide(sibling._qet, otherCol); } } - + SubtreePlan plan = makeSubtreePlan(pathSearch); mergeSubtreePlanIds(plan, a, b); return plan; @@ -2159,22 +2163,11 @@ void QueryPlanner::GraphPatternPlanner::visitPathSearch( parsedQuery::PathQuery& pathQuery) { auto candidatesIn = planner_.optimize(&pathQuery.childGraphPattern_); std::vector candidatesOut; - auto tripleComponentToId = [this](const TripleComponent& comp) { - auto opt = comp.toValueId(planner_._qec->getIndex().getVocab()); - if (opt.has_value()) { - return opt.value(); - } else { - AD_THROW("No vocabulary entry for " + comp.toString()); - } - }; - std::vector sources; - for (auto comp : pathQuery.sources_) { - sources.push_back(tripleComponentToId(comp)); - } - std::vector targets; - for (auto comp : pathQuery.targets_) { - targets.push_back(tripleComponentToId(comp)); - } + + const auto& vocab = planner_._qec->getIndex().getVocab(); + auto sources = pathQuery.toSearchSide(pathQuery.sources_, vocab); + auto targets = pathQuery.toSearchSide(pathQuery.targets_, vocab); + auto config = PathSearchConfiguration{pathQuery.algorithm_, std::move(sources), diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index f973244cbe..626fa98e57 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -8,6 +8,7 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "parser/ParsedQuery.h" +#include "util/Exception.h" #include "util/Forward.h" namespace parsedQuery { @@ -74,10 +75,8 @@ void PathQuery::addParameter(const SparqlTriple& triple) { TripleComponent object = simpleTriple.o_; AD_CORRECTNESS_CHECK(predicate.isIri()); if (predicate.getIri().toStringRepresentation().ends_with("source>")) { - AD_CORRECTNESS_CHECK(object.isIri()); sources_.push_back(std::move(object)); } else if (predicate.getIri().toStringRepresentation().ends_with("target>")) { - AD_CORRECTNESS_CHECK(object.isIri()); targets_.push_back(std::move(object)); } else if (predicate.getIri().toStringRepresentation().ends_with("start>")) { AD_CORRECTNESS_CHECK(object.isVariable()); @@ -113,6 +112,27 @@ void PathQuery::addParameter(const SparqlTriple& triple) { } } +std::variant> PathQuery::toSearchSide( + std::vector side, const Index::Vocab& vocab) const { + if (side.size() == 1 && side[0].isVariable()) { + return side[0].getVariable(); + } else { + std::vector sideIds; + for (auto comp : side) { + if (comp.isVariable()) { + AD_THROW("Only one variable is allowed per search side"); + } + auto opt = comp.toValueId(vocab); + if (opt.has_value()) { + sideIds.push_back(opt.value()); + } else { + AD_THROW("No vocabulary entry for " + comp.toString()); + } + } + return sideIds; + } +} + // ____________________________________________________________________________ void PathQuery::fromBasicPattern(const BasicGraphPattern& pattern) { for (SparqlTriple triple : pattern._triples) { diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index a140ca3372..b3dd9d1e2b 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -151,6 +151,8 @@ struct PathQuery { void addParameter(const SparqlTriple& triple); void fromBasicPattern(const BasicGraphPattern& pattern); + std::variant> toSearchSide( + std::vector side, const Index::Vocab& vocab) const; }; // A SPARQL Bind construct. diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index b200cc91ac..c4b608a4d9 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -37,9 +37,8 @@ TEST(PathSearchTest, constructor) { std::vector sources{V(0)}; std::vector targets{V(1)}; PathSearchConfiguration config{ - ALL_PATHS, sources, targets, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + ALL_PATHS, sources, targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; PathSearch p = PathSearch(qec, nullptr, config); } @@ -53,9 +52,8 @@ TEST(PathSearchTest, emptyGraph) { std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, sources, targets, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + ALL_PATHS, sources, targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -79,9 +77,8 @@ TEST(PathSearchTest, singlePath) { std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, sources, targets, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + ALL_PATHS, sources, targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -101,14 +98,10 @@ TEST(PathSearchTest, singlePathWithProperties) { std::vector sources{V(0)}; std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; - PathSearchConfiguration config{ALL_PATHS, - sources, - targets, - Var{"?start"}, - Var{"?end"}, - Var{"?edgeIndex"}, - Var{"?pathIndex"}, - {Var{"?edgeProperty"}}}; + PathSearchConfiguration config{ALL_PATHS, sources, + targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {Var{"?edgeProperty"}}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -128,9 +121,8 @@ TEST(PathSearchTest, singlePathWithDijkstra) { std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, sources, targets, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + SHORTEST_PATHS, sources, targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -150,13 +142,10 @@ TEST(PathSearchTest, singlePathWithDijkstraAndProperties) { std::vector sources{V(0)}; std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; - PathSearchConfiguration config{SHORTEST_PATHS, - sources, targets, - Var{"?start"}, - Var{"?end"}, - Var{"?edgeIndex"}, - Var{"?pathIndex"}, - {Var{"?edgeProperty"}}}; + PathSearchConfiguration config{SHORTEST_PATHS, sources, + targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {Var{"?edgeProperty"}}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -184,9 +173,8 @@ TEST(PathSearchTest, twoPathsOneTarget) { std::vector targets{V(2)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, sources, targets, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + ALL_PATHS, sources, targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -213,9 +201,9 @@ TEST(PathSearchTest, twoPathsTwoTargets) { std::vector sources{V(0)}; std::vector targets{V(2), V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ALL_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{ + ALL_PATHS, sources, targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -241,9 +229,8 @@ TEST(PathSearchTest, cycle) { std::vector sources{V(0)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, sources, sources, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + ALL_PATHS, sources, sources, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -272,9 +259,8 @@ TEST(PathSearchTest, twoCycle) { std::vector sources{V(0)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, sources, sources, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + ALL_PATHS, sources, sources, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -307,9 +293,8 @@ TEST(PathSearchTest, allPaths) { std::vector targets{}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, sources, targets, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + ALL_PATHS, sources, targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -338,14 +323,10 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty1"}, Variable{"?edgeProperty2"}}; PathSearchConfiguration config{ - ALL_PATHS, - sources, - targets, - Var{"?start"}, - Var{"?end"}, - Var{"?edgeIndex"}, - Var{"?pathIndex"}, - {Var{"?edgeProperty2"}, Var{"?edgeProperty1"}}}; + ALL_PATHS, sources, + targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, + Var{"?pathIndex"}, {Var{"?edgeProperty2"}, Var{"?edgeProperty1"}}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -374,9 +355,8 @@ TEST(PathSearchTest, singleShortestPath) { std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, sources, targets, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + SHORTEST_PATHS, sources, targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -408,9 +388,8 @@ TEST(PathSearchTest, twoShortestPaths) { std::vector targets{V(5)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, sources, targets, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + SHORTEST_PATHS, sources, targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -437,9 +416,8 @@ TEST(PathSearchTest, singlePathWithIrrelevantNode) { std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, sources, targets, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + ALL_PATHS, sources, targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -459,9 +437,8 @@ TEST(PathSearchTest, shortestPathWithIrrelevantNode) { std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, sources, targets, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + SHORTEST_PATHS, sources, targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -498,9 +475,8 @@ TEST(PathSearchTest, allPathsElongatedDiamond) { std::vector targets{V(5)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, sources, targets, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + ALL_PATHS, sources, targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -519,9 +495,8 @@ TEST(PathSearchTest, shortestPathsElongatedDiamond) { std::vector targets{V(5)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - SHORTEST_PATHS, sources, targets, - Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {}}; + SHORTEST_PATHS, sources, targets, Var{"?start"}, + Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -557,7 +532,7 @@ TEST(PathSearchTest, multiSourceMultiTargetallPaths) { std::vector targets{V(4), V(5)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{ - ALL_PATHS, sources, targets, Var{"?start"}, + ALL_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); @@ -586,7 +561,7 @@ TEST(PathSearchTest, multiSourceMultiTargetshortestPaths) { std::vector sources{V(0), V(1)}; std::vector targets{V(4), V(5)}; PathSearchConfiguration config{ - SHORTEST_PATHS, sources, targets, Var{"?start"}, + SHORTEST_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index d86b2a07b5..00b64bd78c 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -8,6 +8,7 @@ #include "engine/QueryPlanner.h" #include "parser/GraphPatternOperation.h" #include "parser/SparqlParser.h" +#include "parser/data/Variable.h" #include "util/TripleComponentTestHelpers.h" namespace h = queryPlannerTestHelpers; @@ -780,7 +781,7 @@ TEST(QueryPlanner, PathSearchSingleTarget) { std::vector sources{getId("")}; std::vector targets{getId("")}; PathSearchConfiguration config{ALL_PATHS, sources, - targets, Variable("?start"), + targets, Variable("?start"), Variable("?end"), Variable("?path"), Variable("?edge"), {}}; h::expect( @@ -797,7 +798,7 @@ TEST(QueryPlanner, PathSearchSingleTarget) { "{SELECT * WHERE {" "?start

?end." "}}}}", - h::PathSearch(config, scan("?start", "

", "?end")), qec); + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); } TEST(QueryPlanner, PathSearchMultipleTargets) { @@ -807,13 +808,10 @@ TEST(QueryPlanner, PathSearchMultipleTargets) { std::vector sources{getId("")}; std::vector targets{getId(""), getId("")}; - PathSearchConfiguration config{ALL_PATHS, - sources, targets, - Variable("?start"), - Variable("?end"), - Variable("?path"), - Variable("?edge"), - {}}; + PathSearchConfiguration config{ALL_PATHS, sources, + targets, Variable("?start"), + Variable("?end"), Variable("?path"), + Variable("?edge"), {}}; h::expect( "PREFIX pathSearch: " "SELECT ?start ?end ?path ?edge WHERE {" @@ -829,7 +827,7 @@ TEST(QueryPlanner, PathSearchMultipleTargets) { "{SELECT * WHERE {" "?start

?end." "}}}}", - h::PathSearch(config, scan("?start", "

", "?end")), qec); + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); } TEST(QueryPlanner, PathSearchWithEdgeProperties) { @@ -841,7 +839,8 @@ TEST(QueryPlanner, PathSearchWithEdgeProperties) { std::vector sources{getId("")}; std::vector targets{getId("")}; - PathSearchConfiguration config{ALL_PATHS, sources, targets, Variable("?start"), + PathSearchConfiguration config{ALL_PATHS, sources, + targets, Variable("?start"), Variable("?end"), Variable("?path"), Variable("?edge"), {Variable("?middle")}}; h::expect( @@ -860,8 +859,9 @@ TEST(QueryPlanner, PathSearchWithEdgeProperties) { "?start ?middle." "?middle ?end." "}}}}", - h::PathSearch(config, join(scan("?start", "", "?middle"), - scan("?middle", "", "?end"))), + h::PathSearch(config, true, true, + join(scan("?start", "", "?middle"), + scan("?middle", "", "?end"))), qec); } @@ -880,13 +880,10 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { std::vector sources{getId("")}; std::vector targets{getId(""), getId("")}; PathSearchConfiguration config{ - ALL_PATHS, - sources, targets, - Variable("?start"), - Variable("?end"), - Variable("?path"), - Variable("?edge"), - {Variable("?middle"), Variable("?middleAttribute")}}; + ALL_PATHS, sources, + targets, Variable("?start"), + Variable("?end"), Variable("?path"), + Variable("?edge"), {Variable("?middle"), Variable("?middleAttribute")}}; h::expect( "PREFIX pathSearch: " "SELECT ?start ?end ?path ?edge WHERE {" @@ -906,13 +903,100 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { "?middle ?middleAttribute." "?middle ?end." "}}}}", - h::PathSearch(config, + h::PathSearch(config, true, true, join(scan("?start", "", "?middle"), join(scan("?middle", "", "?middleAttribute"), scan("?middle", "", "?end")))), qec); } +TEST(QueryPlanner, PathSearchSourceBound) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + Variable sources{"?source"}; + std::vector targets{getId("")}; + PathSearchConfiguration config{ALL_PATHS, sources, + targets, Variable("?start"), + Variable("?end"), Variable("?path"), + Variable("?edge"), {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES ?source {}" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} + +TEST(QueryPlanner, PathSearchTargetBound) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + Variable targets{"?target"}; + PathSearchConfiguration config{ALL_PATHS, sources, + targets, Variable("?start"), + Variable("?end"), Variable("?path"), + Variable("?edge"), {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES ?target {}" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ?target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} + +TEST(QueryPlanner, PathSearchBothBound) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + Variable sources{"?source"}; + Variable targets{"?target"}; + PathSearchConfiguration config{ALL_PATHS, sources, + targets, Variable("?start"), + Variable("?end"), Variable("?path"), + Variable("?edge"), {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES (?source ?target) {( )}" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source ;" + "pathSearch:target ?target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} + // __________________________________________________________________________ TEST(QueryPlanner, BindAtBeginningOfQuery) { h::expect( diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index e6360c2d31..a206d74a00 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -7,6 +7,8 @@ #include #include +#include + #include "./util/GTestHelpers.h" #include "engine/Bind.h" #include "engine/CartesianProductJoin.h" @@ -261,12 +263,27 @@ inline auto TransitivePath = }; inline auto PathSearchConfigMatcher = [](PathSearchConfiguration config) { + auto sourceMatcher = + std::holds_alternative(config.sources_) + ? AD_FIELD( + PathSearchConfiguration, sources_, + VariantWith(Eq(std::get(config.sources_)))) + : AD_FIELD( + PathSearchConfiguration, sources_, + VariantWith>(UnorderedElementsAreArray( + std::get>(config.sources_)))); + auto targetMatcher = + std::holds_alternative(config.targets_) + ? AD_FIELD( + PathSearchConfiguration, targets_, + VariantWith(Eq(std::get(config.targets_)))) + : AD_FIELD( + PathSearchConfiguration, targets_, + VariantWith>(UnorderedElementsAreArray( + std::get>(config.targets_)))); return AllOf( AD_FIELD(PathSearchConfiguration, algorithm_, Eq(config.algorithm_)), - AD_FIELD(PathSearchConfiguration, sources_, - VariantWith>(UnorderedElementsAreArray(std::get>(config.sources_)))), - AD_FIELD(PathSearchConfiguration, targets_, - VariantWith>(UnorderedElementsAreArray(std::get>(config.targets_)))), + sourceMatcher, targetMatcher, AD_FIELD(PathSearchConfiguration, start_, Eq(config.start_)), AD_FIELD(PathSearchConfiguration, end_, Eq(config.end_)), AD_FIELD(PathSearchConfiguration, pathColumn_, Eq(config.pathColumn_)), @@ -277,12 +294,14 @@ inline auto PathSearchConfigMatcher = [](PathSearchConfiguration config) { // Match a PathSearch operation inline auto PathSearch = - [](PathSearchConfiguration config, + [](PathSearchConfiguration config, bool sourceBound, bool targetBound, const std::same_as auto&... childMatchers) { return RootOperation<::PathSearch>(AllOf( Property("getChildren", &Operation::getChildren, ElementsAre(Pointee(childMatchers)...)), - AD_PROPERTY(PathSearch, getConfig, PathSearchConfigMatcher(config)))); + AD_PROPERTY(PathSearch, getConfig, PathSearchConfigMatcher(config)), + AD_PROPERTY(PathSearch, isSourceBound, Eq(sourceBound)), + AD_PROPERTY(PathSearch, isTargetBound, Eq(targetBound)))); }; // Match a sort operation. Currently, this is only required by the binary search From adf85bc71fd1476b972cce0cf2e7f669bf6e9f6a Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 3 Jul 2024 13:21:22 +0200 Subject: [PATCH 30/96] Added runtime info --- src/engine/PathSearch.cpp | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index ae440ebaee..dda04a6074 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -118,6 +118,9 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { const IdTable& dynSub = subRes->idTable(); if (!dynSub.empty()) { + auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); + timer.start(); + std::vector> edgePropertyLists; for (const auto& edgeProperty : config_.edgeProperties_) { auto edgePropertyIndex = subtree_->getVariableColumn(edgeProperty); @@ -126,18 +129,47 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto subStartColumn = subtree_->getVariableColumn(config_.start_); auto subEndColumn = subtree_->getVariableColumn(config_.end_); + + timer.stop(); + auto prepTime = timer.msecs(); + timer.start(); + buildGraph(dynSub.getColumn(subStartColumn), dynSub.getColumn(subEndColumn), edgePropertyLists); + timer.stop(); + auto buildingTime = timer.msecs(); + timer.start(); + std::span sources = handleSearchSide(config_.sources_, boundSources_); std::span targets = handleSearchSide(config_.targets_, boundTargets_); + timer.stop(); + auto sideTime = timer.msecs(); + timer.start(); + auto paths = findPaths(sources, targets); + + timer.stop(); + auto searchTime = timer.msecs(); + timer.start(); + CALL_FIXED_SIZE(std::array{getResultWidth()}, &PathSearch::pathsToResultTable, this, idTable, paths); + + timer.stop(); + auto fillTime = timer.msecs(); + timer.start(); + + auto& info = runtimeInfo(); + info.addDetail("Time to read subcols", prepTime.count()); + info.addDetail("Time to build graph & mapping", buildingTime.count()); + info.addDetail("Time to prepare search sides", sideTime.count()); + info.addDetail("Time to search paths", searchTime.count()); + info.addDetail("Time to fill result table", fillTime.count()); } return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; From 48dbab14868452d9f4e1ee2ea918e08f3b9f898f Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 3 Jul 2024 13:48:09 +0200 Subject: [PATCH 31/96] Added cancellation checks --- src/engine/PathSearch.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index dda04a6074..f55a0e6abc 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -190,6 +190,7 @@ void PathSearch::buildMapping(std::span startNodes, } }; for (size_t i = 0; i < startNodes.size(); i++) { + checkCancellation(); addNode(startNodes[i]); addNode(endNodes[i]); } @@ -225,6 +226,7 @@ void PathSearch::buildGraph(std::span startNodes, } for (size_t i = 0; i < startNodes.size(); i++) { + checkCancellation(); auto startIndex = idToIndex_[startNodes[i]]; auto endIndex = idToIndex_[endNodes[i]]; From d5b513b7cef3d78da5aae900fa698e8df1697135 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Thu, 4 Jul 2024 20:22:34 +0200 Subject: [PATCH 32/96] Fixed CacheKey --- src/engine/PathSearch.cpp | 2 ++ src/engine/PathSearch.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index f55a0e6abc..208a619b07 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -59,6 +59,8 @@ std::vector PathSearch::getChildren() { // _____________________________________________________________________________ std::string PathSearch::getCacheKeyImpl() const { std::ostringstream os; + os << config_.toString(); + AD_CORRECTNESS_CHECK(subtree_); os << "Subtree:\n" << subtree_->getCacheKey() << '\n'; return std::move(os).str(); diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index dbd6fbffa1..b196a11356 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -59,6 +59,44 @@ struct PathSearchConfiguration { bool targetIsVariable() const { return std::holds_alternative(targets_); } + + std::string searchSideToString(const SearchSide& side) const { + if (std::holds_alternative(side)) { + return std::get(side).toSparql(); + } + std::ostringstream os; + for (auto id: std::get>(side)) { + os << id << ", "; + } + return std::move(os).str(); + } + + std::string toString() const { + std::ostringstream os; + switch (algorithm_) { + case ALL_PATHS: + os << "Algorthm: All paths" << '\n'; + break; + case SHORTEST_PATHS: + os << "Algorthm: Shortest paths" << '\n'; + break; + } + + os << "Source: " << searchSideToString(sources_) << '\n'; + os << "Target: " << searchSideToString(targets_) << '\n'; + + os << "Start: " << start_.toSparql() << '\n'; + os << "End: " << end_.toSparql() << '\n'; + os << "PathColumn: " << pathColumn_.toSparql() << '\n'; + os << "EdgeColumn: " << edgeColumn_.toSparql() << '\n'; + + os << "EdgeProperties:" << '\n'; + for (auto edgeProperty: edgeProperties_) { + os << " " << edgeProperty.toSparql() << '\n'; + } + + return std::move(os).str(); + } }; /** From 49dfaa3ced8b0021fd47465828a1dc338d5b0b85 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Thu, 4 Jul 2024 20:39:36 +0200 Subject: [PATCH 33/96] Removed unneeded members --- src/engine/PathSearch.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index b196a11356..159d0a6630 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -111,8 +111,6 @@ class PathSearch : public Operation { Graph graph_; // Configuration for the path search. PathSearchConfiguration config_; - Id source_; - std::vector targets_; std::vector indexToId_; IdToNodeMap idToIndex_; From f6357a7d5dd83c4672186331f24512e8ad248c74 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Thu, 4 Jul 2024 20:49:18 +0200 Subject: [PATCH 34/96] Format fix --- src/engine/PathSearch.cpp | 1 - src/engine/PathSearch.h | 16 ++++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 208a619b07..31ab7b1aac 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -154,7 +154,6 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto paths = findPaths(sources, targets); - timer.stop(); auto searchTime = timer.msecs(); timer.start(); diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 159d0a6630..130c71aea1 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -63,9 +63,9 @@ struct PathSearchConfiguration { std::string searchSideToString(const SearchSide& side) const { if (std::holds_alternative(side)) { return std::get(side).toSparql(); - } + } std::ostringstream os; - for (auto id: std::get>(side)) { + for (auto id : std::get>(side)) { os << id << ", "; } return std::move(os).str(); @@ -74,11 +74,11 @@ struct PathSearchConfiguration { std::string toString() const { std::ostringstream os; switch (algorithm_) { - case ALL_PATHS: - os << "Algorthm: All paths" << '\n'; + case ALL_PATHS: + os << "Algorithm: All paths" << '\n'; break; - case SHORTEST_PATHS: - os << "Algorthm: Shortest paths" << '\n'; + case SHORTEST_PATHS: + os << "Algorithm: Shortest paths" << '\n'; break; } @@ -91,10 +91,10 @@ struct PathSearchConfiguration { os << "EdgeColumn: " << edgeColumn_.toSparql() << '\n'; os << "EdgeProperties:" << '\n'; - for (auto edgeProperty: edgeProperties_) { + for (auto edgeProperty : edgeProperties_) { os << " " << edgeProperty.toSparql() << '\n'; } - + return std::move(os).str(); } }; From abdc36a56700998722ad460e51421c6559ffb53f Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 5 Jul 2024 12:30:34 +0200 Subject: [PATCH 35/96] Simplified handleSearchSide --- src/engine/PathSearch.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 31ab7b1aac..1a4c57cee4 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -204,11 +204,7 @@ std::span PathSearch::handleSearchSide( if (isVariable && binding.has_value()) { ids = binding->first->getResult()->idTable().getColumn(binding->second); } else if (isVariable || std::get>(side).empty()) { - std::vector idVec; - for (auto id : indexToId_) { - idVec.push_back(id); - } - ids = idVec; + return indexToId_; } else { ids = std::get>(side); } From 1c892a251077fc4b5399f4d4f10e9807f4e2dbc9 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 8 Jul 2024 16:55:26 +0200 Subject: [PATCH 36/96] New all paths implementation --- src/engine/PathSearch.cpp | 145 +++++++++++++++++++++++++++----------- src/engine/PathSearch.h | 25 ++++++- test/PathSearchTest.cpp | 16 +++-- test/QueryPlannerTest.cpp | 10 +-- 4 files changed, 144 insertions(+), 52 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 1a4c57cee4..908436d795 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -3,17 +3,80 @@ // Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) #include "PathSearch.h" +#include +#include #include +#include +#include +#include #include #include +#include #include #include "engine/CallFixedSize.h" #include "engine/PathSearchVisitors.h" +#include "engine/QueryExecutionTree.h" #include "engine/VariableToColumnMap.h" #include "util/Exception.h" +// _____________________________________________________________________________ +BinSearchWrapper::BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, std::vector edgeCols) + : table_(table), startCol_(startCol), endCol_(endCol), edgeCols_(std::move(edgeCols)) { + +} + +// _____________________________________________________________________________ +std::vector BinSearchWrapper::outgoingEdes(const Id node) const { + auto startIds = table_.getColumn(startCol_); + auto range = std::ranges::equal_range(startIds, node); + auto startIndex = std::distance(startIds.begin(), range.begin()); + + std::vector edges; + for (size_t i = 0; i < range.size(); i++) { + auto row = startIndex + i; + auto edge = makeEdgeFromRow(row); + edges.push_back(edge); + } + return edges; +} + +std::vector BinSearchWrapper::findPaths(const Id& source, const std::unordered_set& targets) { + if (pathCache_.contains(source.getBits())) { return pathCache_[source.getBits()]; } + pathCache_[source.getBits()] = {}; + std::vector paths; + + auto edges = outgoingEdes(source); + for (auto edge: edges) { + if (targets.contains(edge.end_) || targets.empty()) { + Path path; + path.push_back(edge); + paths.push_back(std::move(path)); + } + auto partialPaths = findPaths(Id::fromBits(edge.end_), targets); + for (auto path: partialPaths) { + path.push_back(edge); + paths.push_back(std::move(path)); + } + } + + pathCache_[source.getBits()].insert(pathCache_[source.getBits()].end(), paths.begin(), paths.end()); + return paths; +} + +// _____________________________________________________________________________ +const Edge BinSearchWrapper::makeEdgeFromRow(size_t row) const { + Edge edge; + edge.start_ = table_(row, startCol_).getBits(); + edge.end_ = table_(row, endCol_).getBits(); + + for (auto edgeCol: edgeCols_) { + edge.edgeProperties_.push_back(table_(row, edgeCol)); + } + return edge; +} + // _____________________________________________________________________________ PathSearch::PathSearch(QueryExecutionContext* qec, std::shared_ptr subtree, @@ -23,6 +86,11 @@ PathSearch::PathSearch(QueryExecutionContext* qec, config_(std::move(config)), idToIndex_(allocator()) { AD_CORRECTNESS_CHECK(qec != nullptr); + + auto startCol = subtree_->getVariableColumn(config_.start_); + auto endCol = subtree_->getVariableColumn(config_.end_); + subtree_ = QueryExecutionTree::createSortedTree(subtree_, {startCol, endCol}); + resultWidth_ = 4 + config_.edgeProperties_.size(); size_t colIndex = 0; @@ -123,21 +191,29 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); timer.start(); - std::vector> edgePropertyLists; - for (const auto& edgeProperty : config_.edgeProperties_) { - auto edgePropertyIndex = subtree_->getVariableColumn(edgeProperty); - edgePropertyLists.push_back(dynSub.getColumn(edgePropertyIndex)); + if (config_.algorithm_ == SHORTEST_PATHS) { + std::vector> edgePropertyLists; + for (const auto& edgeProperty : config_.edgeProperties_) { + auto edgePropertyIndex = subtree_->getVariableColumn(edgeProperty); + edgePropertyLists.push_back(dynSub.getColumn(edgePropertyIndex)); + } + + auto subStartColumn = subtree_->getVariableColumn(config_.start_); + auto subEndColumn = subtree_->getVariableColumn(config_.end_); + + buildGraph(dynSub.getColumn(subStartColumn), dynSub.getColumn(subEndColumn), + edgePropertyLists); + } + auto subStartColumn = subtree_->getVariableColumn(config_.start_); auto subEndColumn = subtree_->getVariableColumn(config_.end_); - - timer.stop(); - auto prepTime = timer.msecs(); - timer.start(); - - buildGraph(dynSub.getColumn(subStartColumn), dynSub.getColumn(subEndColumn), - edgePropertyLists); + std::vector edgeColumns; + for (auto edgeProp: config_.edgeProperties_) { + edgeColumns.push_back(subtree_->getVariableColumn(edgeProp)); + } + BinSearchWrapper binSearch{dynSub, subStartColumn, subEndColumn, std::move(edgeColumns)}; timer.stop(); auto buildingTime = timer.msecs(); @@ -152,7 +228,7 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto sideTime = timer.msecs(); timer.start(); - auto paths = findPaths(sources, targets); + auto paths = findPaths(sources, targets, binSearch); timer.stop(); auto searchTime = timer.msecs(); @@ -166,7 +242,6 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { timer.start(); auto& info = runtimeInfo(); - info.addDetail("Time to read subcols", prepTime.count()); info.addDetail("Time to build graph & mapping", buildingTime.count()); info.addDetail("Time to prepare search sides", sideTime.count()); info.addDetail("Time to search paths", searchTime.count()); @@ -203,8 +278,8 @@ std::span PathSearch::handleSearchSide( bool isVariable = std::holds_alternative(side); if (isVariable && binding.has_value()) { ids = binding->first->getResult()->idTable().getColumn(binding->second); - } else if (isVariable || std::get>(side).empty()) { - return indexToId_; + } else if (isVariable) { + return {}; } else { ids = std::get>(side); } @@ -239,10 +314,10 @@ void PathSearch::buildGraph(std::span startNodes, // _____________________________________________________________________________ std::vector PathSearch::findPaths(std::span sources, - std::span targets) const { + std::span targets, BinSearchWrapper& binSearch) const { switch (config_.algorithm_) { case ALL_PATHS: - return allPaths(sources, targets); + return allPaths(sources, targets, binSearch); case SHORTEST_PATHS: return shortestPaths(sources, targets); default: @@ -252,33 +327,23 @@ std::vector PathSearch::findPaths(std::span sources, // _____________________________________________________________________________ std::vector PathSearch::allPaths(std::span sources, - std::span targets) const { + std::span targets, + BinSearchWrapper& binSearch) const { std::vector paths; Path path; - for (auto source : sources) { - auto startIndex = idToIndex_.at(source); - - std::vector targetIndices; - for (auto target : targets) { - targetIndices.push_back(target.getBits()); - } - - PredecessorMap predecessors; - - AllPathsVisitor vis(startIndex, predecessors); - try { - boost::depth_first_search(graph_, - boost::visitor(vis).root_vertex(startIndex)); - } catch (const StopSearchException& e) { - } + std::unordered_set targetSet; + for (auto target: targets) { + targetSet.insert(target.getBits()); + } - for (auto target : targetIndices) { - auto pathsToTarget = - reconstructPaths(source.getBits(), target, predecessors); - for (auto path : pathsToTarget) { - paths.push_back(std::move(path)); - } + if (sources.empty()) { + sources = indexToId_; + } + for (auto source : sources) { + for (auto path: binSearch.findPaths(source, targetSet)) { + std::ranges::reverse(path.edges_); + paths.push_back(path); } } return paths; diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 130c71aea1..9752001bb6 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -99,6 +100,26 @@ struct PathSearchConfiguration { } }; +class BinSearchWrapper { + const IdTable& table_; + size_t startCol_; + size_t endCol_; + std::vector edgeCols_; + std::unordered_map> pathCache_; + + public: + BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, std::vector edgeCols); + + std::vector outgoingEdes(const Id node) const; + + std::vector findPaths(const Id& source, const std::unordered_set& targets); + + bool isTarget(const Id node) const; + +private: + const Edge makeEdgeFromRow(size_t row) const; +}; + /** * @brief Class to perform various path search algorithms on a graph. */ @@ -212,14 +233,14 @@ class PathSearch : public Operation { * @return A vector of paths. */ std::vector findPaths(std::span sources, - std::span targets) const; + std::span targets, BinSearchWrapper& binSearch) const; /** * @brief Finds all paths in the graph. * @return A vector of all paths. */ std::vector allPaths(std::span sources, - std::span targets) const; + std::span targets, BinSearchWrapper& binSearch) const; /** * @brief Finds the shortest paths in the graph. diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index c4b608a4d9..6b85f672ed 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -27,19 +27,25 @@ Result performPathSearch(PathSearchConfiguration config, IdTable input, auto qec = getQec(); auto subtree = ad_utility::makeExecutionTree( qec, std::move(input), vars); - PathSearch p = PathSearch(qec, std::move(subtree), config); + PathSearch p = PathSearch(qec, std::move(subtree), std::move(config)); return p.computeResult(false); } TEST(PathSearchTest, constructor) { auto qec = getQec(); + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + auto sub = makeIdTableFromVector({}); + sub.setNumColumns(2); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(sub), vars); + std::vector sources{V(0)}; std::vector targets{V(1)}; PathSearchConfiguration config{ ALL_PATHS, sources, targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; - PathSearch p = PathSearch(qec, nullptr, config); + PathSearch p = PathSearch(qec, std::move(subtree), config); } TEST(PathSearchTest, emptyGraph) { @@ -277,14 +283,14 @@ TEST(PathSearchTest, twoCycle) { * 3 4 */ TEST(PathSearchTest, allPaths) { - auto sub = makeIdTableFromVector({{0, 1}, {1, 3}, {0, 2}, {2, 3}, {2, 4}}); + auto sub = makeIdTableFromVector({{0, 1}, {0, 2}, {1, 3}, {2, 3}, {2, 4}}); auto expected = makeIdTableFromVector({ {V(0), V(1), I(0), I(0)}, {V(0), V(1), I(1), I(0)}, {V(1), V(3), I(1), I(1)}, {V(0), V(2), I(2), I(0)}, - {V(2), V(3), I(2), I(1)}, {V(0), V(2), I(3), I(0)}, + {V(2), V(3), I(3), I(1)}, {V(0), V(2), I(4), I(0)}, {V(2), V(4), I(4), I(1)}, }); @@ -312,8 +318,8 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { {V(0), V(1), I(1), I(0), V(11), V(10)}, {V(1), V(3), I(1), I(1), V(21), V(20)}, {V(0), V(2), I(2), I(0), V(31), V(30)}, - {V(2), V(3), I(2), I(1), V(41), V(40)}, {V(0), V(2), I(3), I(0), V(31), V(30)}, + {V(2), V(3), I(3), I(1), V(41), V(40)}, {V(0), V(2), I(4), I(0), V(31), V(30)}, {V(2), V(4), I(4), I(1), V(51), V(50)}, }); diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 00b64bd78c..1a2773fa40 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -860,8 +860,8 @@ TEST(QueryPlanner, PathSearchWithEdgeProperties) { "?middle ?end." "}}}}", h::PathSearch(config, true, true, - join(scan("?start", "", "?middle"), - scan("?middle", "", "?end"))), + h::Sort(join(scan("?start", "", "?middle"), + scan("?middle", "", "?end")))), qec); } @@ -904,9 +904,9 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { "?middle ?end." "}}}}", h::PathSearch(config, true, true, - join(scan("?start", "", "?middle"), - join(scan("?middle", "", "?middleAttribute"), - scan("?middle", "", "?end")))), + h::Sort(join(scan("?start", "", "?middle"), + join(scan("?middle", "", "?middleAttribute"), + scan("?middle", "", "?end"))))), qec); } From ae39abcea317e077b70ee92329240c49383555ef Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 8 Jul 2024 16:59:16 +0200 Subject: [PATCH 37/96] Format fix --- src/engine/PathSearch.cpp | 45 ++++++++++++++++++++++----------------- src/engine/PathSearch.h | 14 +++++++----- test/QueryPlannerTest.cpp | 11 +++++----- 3 files changed, 41 insertions(+), 29 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 908436d795..790d27f734 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -3,6 +3,7 @@ // Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) #include "PathSearch.h" + #include #include @@ -22,10 +23,12 @@ #include "util/Exception.h" // _____________________________________________________________________________ -BinSearchWrapper::BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, std::vector edgeCols) - : table_(table), startCol_(startCol), endCol_(endCol), edgeCols_(std::move(edgeCols)) { - -} +BinSearchWrapper::BinSearchWrapper(const IdTable& table, size_t startCol, + size_t endCol, std::vector edgeCols) + : table_(table), + startCol_(startCol), + endCol_(endCol), + edgeCols_(std::move(edgeCols)) {} // _____________________________________________________________________________ std::vector BinSearchWrapper::outgoingEdes(const Id node) const { @@ -42,26 +45,30 @@ std::vector BinSearchWrapper::outgoingEdes(const Id node) const { return edges; } -std::vector BinSearchWrapper::findPaths(const Id& source, const std::unordered_set& targets) { - if (pathCache_.contains(source.getBits())) { return pathCache_[source.getBits()]; } +std::vector BinSearchWrapper::findPaths( + const Id& source, const std::unordered_set& targets) { + if (pathCache_.contains(source.getBits())) { + return pathCache_[source.getBits()]; + } pathCache_[source.getBits()] = {}; std::vector paths; auto edges = outgoingEdes(source); - for (auto edge: edges) { + for (auto edge : edges) { if (targets.contains(edge.end_) || targets.empty()) { Path path; path.push_back(edge); paths.push_back(std::move(path)); } auto partialPaths = findPaths(Id::fromBits(edge.end_), targets); - for (auto path: partialPaths) { + for (auto path : partialPaths) { path.push_back(edge); paths.push_back(std::move(path)); } } - pathCache_[source.getBits()].insert(pathCache_[source.getBits()].end(), paths.begin(), paths.end()); + pathCache_[source.getBits()].insert(pathCache_[source.getBits()].end(), + paths.begin(), paths.end()); return paths; } @@ -71,7 +78,7 @@ const Edge BinSearchWrapper::makeEdgeFromRow(size_t row) const { edge.start_ = table_(row, startCol_).getBits(); edge.end_ = table_(row, endCol_).getBits(); - for (auto edgeCol: edgeCols_) { + for (auto edgeCol : edgeCols_) { edge.edgeProperties_.push_back(table_(row, edgeCol)); } return edge; @@ -201,19 +208,18 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto subStartColumn = subtree_->getVariableColumn(config_.start_); auto subEndColumn = subtree_->getVariableColumn(config_.end_); - buildGraph(dynSub.getColumn(subStartColumn), dynSub.getColumn(subEndColumn), - edgePropertyLists); - + buildGraph(dynSub.getColumn(subStartColumn), + dynSub.getColumn(subEndColumn), edgePropertyLists); } - auto subStartColumn = subtree_->getVariableColumn(config_.start_); auto subEndColumn = subtree_->getVariableColumn(config_.end_); std::vector edgeColumns; - for (auto edgeProp: config_.edgeProperties_) { + for (auto edgeProp : config_.edgeProperties_) { edgeColumns.push_back(subtree_->getVariableColumn(edgeProp)); } - BinSearchWrapper binSearch{dynSub, subStartColumn, subEndColumn, std::move(edgeColumns)}; + BinSearchWrapper binSearch{dynSub, subStartColumn, subEndColumn, + std::move(edgeColumns)}; timer.stop(); auto buildingTime = timer.msecs(); @@ -314,7 +320,8 @@ void PathSearch::buildGraph(std::span startNodes, // _____________________________________________________________________________ std::vector PathSearch::findPaths(std::span sources, - std::span targets, BinSearchWrapper& binSearch) const { + std::span targets, + BinSearchWrapper& binSearch) const { switch (config_.algorithm_) { case ALL_PATHS: return allPaths(sources, targets, binSearch); @@ -333,7 +340,7 @@ std::vector PathSearch::allPaths(std::span sources, Path path; std::unordered_set targetSet; - for (auto target: targets) { + for (auto target : targets) { targetSet.insert(target.getBits()); } @@ -341,7 +348,7 @@ std::vector PathSearch::allPaths(std::span sources, sources = indexToId_; } for (auto source : sources) { - for (auto path: binSearch.findPaths(source, targetSet)) { + for (auto path : binSearch.findPaths(source, targetSet)) { std::ranges::reverse(path.edges_); paths.push_back(path); } diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 9752001bb6..ebe2031dbd 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -108,15 +108,17 @@ class BinSearchWrapper { std::unordered_map> pathCache_; public: - BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, std::vector edgeCols); + BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, + std::vector edgeCols); std::vector outgoingEdes(const Id node) const; - std::vector findPaths(const Id& source, const std::unordered_set& targets); + std::vector findPaths(const Id& source, + const std::unordered_set& targets); bool isTarget(const Id node) const; -private: + private: const Edge makeEdgeFromRow(size_t row) const; }; @@ -233,14 +235,16 @@ class PathSearch : public Operation { * @return A vector of paths. */ std::vector findPaths(std::span sources, - std::span targets, BinSearchWrapper& binSearch) const; + std::span targets, + BinSearchWrapper& binSearch) const; /** * @brief Finds all paths in the graph. * @return A vector of all paths. */ std::vector allPaths(std::span sources, - std::span targets, BinSearchWrapper& binSearch) const; + std::span targets, + BinSearchWrapper& binSearch) const; /** * @brief Finds the shortest paths in the graph. diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 1a2773fa40..e9bbf9237c 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -861,7 +861,7 @@ TEST(QueryPlanner, PathSearchWithEdgeProperties) { "}}}}", h::PathSearch(config, true, true, h::Sort(join(scan("?start", "", "?middle"), - scan("?middle", "", "?end")))), + scan("?middle", "", "?end")))), qec); } @@ -903,10 +903,11 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { "?middle ?middleAttribute." "?middle ?end." "}}}}", - h::PathSearch(config, true, true, - h::Sort(join(scan("?start", "", "?middle"), - join(scan("?middle", "", "?middleAttribute"), - scan("?middle", "", "?end"))))), + h::PathSearch( + config, true, true, + h::Sort(join(scan("?start", "", "?middle"), + join(scan("?middle", "", "?middleAttribute"), + scan("?middle", "", "?end"))))), qec); } From 411ba0a9c0634781a75401414c1730e8a53b8bb6 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 8 Jul 2024 17:32:01 +0200 Subject: [PATCH 38/96] Extracted visitPathQuery method --- .../sparqlParser/SparqlQleverVisitor.cpp | 44 ++++++++++--------- src/parser/sparqlParser/SparqlQleverVisitor.h | 2 + 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 5512cc3a86..c159e15dc6 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -697,6 +697,29 @@ GraphPatternOperation Visitor::visit(Parser::OptionalGraphPatternContext* ctx) { return GraphPatternOperation{parsedQuery::Optional{std::move(pattern)}}; } +GraphPatternOperation Visitor::visitPathQuery(Parser::ServiceGraphPatternContext* ctx) { + auto parsePathQuery = [](parsedQuery::PathQuery& pathQuery, + const parsedQuery::GraphPatternOperation& op) { + if (std::holds_alternative(op)) { + pathQuery.fromBasicPattern( + std::get(op)); + } else if (std::holds_alternative(op)) { + auto pattern = std::get(op); + pathQuery.childGraphPattern_ = std::move(pattern._child); + } else { + AD_THROW("Unsupported argument in PathSearch"); + } + }; + + parsedQuery::GraphPattern graphPattern = visit(ctx->groupGraphPattern()); + parsedQuery::PathQuery pathQuery; + for (const auto& op : graphPattern._graphPatterns) { + parsePathQuery(pathQuery, op); + } + + return pathQuery; +} + // Parsing for the `serviceGraphPattern` rule. GraphPatternOperation Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { // If SILENT is specified, report that we do not support it yet. @@ -725,26 +748,7 @@ GraphPatternOperation Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { Iri serviceIri = std::get(varOrIri); if (serviceIri.toSparql() == "") { - auto parsePathQuery = [](parsedQuery::PathQuery& pathQuery, - const parsedQuery::GraphPatternOperation& op) { - if (std::holds_alternative(op)) { - pathQuery.fromBasicPattern( - std::get(op)); - } else if (std::holds_alternative(op)) { - auto pattern = std::get(op); - pathQuery.childGraphPattern_ = std::move(pattern._child); - } else { - AD_THROW("Unsupported argument in PathSearch"); - } - }; - - parsedQuery::GraphPattern graphPattern = visit(ctx->groupGraphPattern()); - parsedQuery::PathQuery pathQuery; - for (const auto& op : graphPattern._graphPatterns) { - parsePathQuery(pathQuery, op); - } - - return pathQuery; + return visitPathQuery(ctx); } // Parse the body of the SERVICE query. Add the visible variables from the diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h index 5654d1923f..5895fc0167 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.h +++ b/src/parser/sparqlParser/SparqlQleverVisitor.h @@ -248,6 +248,8 @@ class SparqlQleverVisitor { parsedQuery::GraphPatternOperation visit( Parser::ServiceGraphPatternContext* ctx); + parsedQuery::GraphPatternOperation visitPathQuery(Parser::ServiceGraphPatternContext* ctx); + parsedQuery::GraphPatternOperation visit(Parser::BindContext* ctx); parsedQuery::GraphPatternOperation visit(Parser::InlineDataContext* ctx); From ed03651ef1f6521d0b9744f6268a248b4c504fed Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 8 Jul 2024 18:15:43 +0200 Subject: [PATCH 39/96] Moved PathSearchConfig creation to PathQuery method --- src/engine/QueryPlanner.cpp | 13 +------------ src/parser/GraphPatternOperation.cpp | 15 +++++++++++++++ src/parser/GraphPatternOperation.h | 2 ++ src/parser/sparqlParser/SparqlQleverVisitor.cpp | 6 +++--- src/parser/sparqlParser/SparqlQleverVisitor.h | 3 ++- 5 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 6d5eb1ff18..87a7c52e11 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -2165,18 +2165,7 @@ void QueryPlanner::GraphPatternPlanner::visitPathSearch( std::vector candidatesOut; const auto& vocab = planner_._qec->getIndex().getVocab(); - auto sources = pathQuery.toSearchSide(pathQuery.sources_, vocab); - auto targets = pathQuery.toSearchSide(pathQuery.targets_, vocab); - - auto config = - PathSearchConfiguration{pathQuery.algorithm_, - std::move(sources), - std::move(targets), - std::move(pathQuery.start_.value()), - std::move(pathQuery.end_.value()), - std::move(pathQuery.pathColumn_.value()), - std::move(pathQuery.edgeColumn_.value()), - std::move(pathQuery.edgeProperties_)}; + auto config = pathQuery.toPathSearchConfiguration(vocab); for (auto& sub : candidatesIn) { auto pathSearch = diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index 626fa98e57..4790089603 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -140,6 +140,21 @@ void PathQuery::fromBasicPattern(const BasicGraphPattern& pattern) { } } +// ____________________________________________________________________________ +PathSearchConfiguration PathQuery::toPathSearchConfiguration( + const Index::Vocab& vocab) const { + auto sources = toSearchSide(sources_, vocab); + auto targets = toSearchSide(targets_, vocab); + return PathSearchConfiguration{algorithm_, + std::move(sources), + std::move(targets), + std::move(start_.value()), + std::move(end_.value()), + std::move(pathColumn_.value()), + std::move(edgeColumn_.value()), + std::move(edgeProperties_)}; +} + // ____________________________________________________________________________ cppcoro::generator Bind::containedVariables() const { for (const auto* ptr : _expression.containedVariables()) { diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index b3dd9d1e2b..50c03eaa48 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -153,6 +153,8 @@ struct PathQuery { void fromBasicPattern(const BasicGraphPattern& pattern); std::variant> toSearchSide( std::vector side, const Index::Vocab& vocab) const; + PathSearchConfiguration toPathSearchConfiguration( + const Index::Vocab& vocab) const; }; // A SPARQL Bind construct. diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index c159e15dc6..73d337e2cf 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -697,12 +697,12 @@ GraphPatternOperation Visitor::visit(Parser::OptionalGraphPatternContext* ctx) { return GraphPatternOperation{parsedQuery::Optional{std::move(pattern)}}; } -GraphPatternOperation Visitor::visitPathQuery(Parser::ServiceGraphPatternContext* ctx) { +GraphPatternOperation Visitor::visitPathQuery( + Parser::ServiceGraphPatternContext* ctx) { auto parsePathQuery = [](parsedQuery::PathQuery& pathQuery, const parsedQuery::GraphPatternOperation& op) { if (std::holds_alternative(op)) { - pathQuery.fromBasicPattern( - std::get(op)); + pathQuery.fromBasicPattern(std::get(op)); } else if (std::holds_alternative(op)) { auto pattern = std::get(op); pathQuery.childGraphPattern_ = std::move(pattern._child); diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h index 5895fc0167..495980c952 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.h +++ b/src/parser/sparqlParser/SparqlQleverVisitor.h @@ -248,7 +248,8 @@ class SparqlQleverVisitor { parsedQuery::GraphPatternOperation visit( Parser::ServiceGraphPatternContext* ctx); - parsedQuery::GraphPatternOperation visitPathQuery(Parser::ServiceGraphPatternContext* ctx); + parsedQuery::GraphPatternOperation visitPathQuery( + Parser::ServiceGraphPatternContext* ctx); parsedQuery::GraphPatternOperation visit(Parser::BindContext* ctx); From 818e41bcc66df0ec87e15b7b9f4ef98c459d73bd Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 8 Jul 2024 18:53:49 +0200 Subject: [PATCH 40/96] Sonar Fixes --- src/engine/PathSearch.cpp | 42 +++-- src/engine/PathSearch.h | 8 +- src/engine/QueryPlanner.cpp | 2 +- src/engine/QueryPlanner.h | 2 +- src/parser/GraphPatternOperation.cpp | 2 +- test/PathSearchTest.cpp | 223 +++++++++++++++++++-------- test/QueryPlannerTest.cpp | 84 ++++++---- 7 files changed, 241 insertions(+), 122 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 790d27f734..bfb07106d7 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -4,23 +4,10 @@ #include "PathSearch.h" -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - #include "engine/CallFixedSize.h" #include "engine/PathSearchVisitors.h" #include "engine/QueryExecutionTree.h" #include "engine/VariableToColumnMap.h" -#include "util/Exception.h" // _____________________________________________________________________________ BinSearchWrapper::BinSearchWrapper(const IdTable& table, size_t startCol, @@ -102,25 +89,32 @@ PathSearch::PathSearch(QueryExecutionContext* qec, size_t colIndex = 0; - variableColumns_[config_.start_] = makeAlwaysDefinedColumn(colIndex++); - variableColumns_[config_.end_] = makeAlwaysDefinedColumn(colIndex++); - variableColumns_[config_.pathColumn_] = makeAlwaysDefinedColumn(colIndex++); - variableColumns_[config_.edgeColumn_] = makeAlwaysDefinedColumn(colIndex++); + variableColumns_[config_.start_] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + variableColumns_[config_.end_] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + variableColumns_[config_.pathColumn_] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + variableColumns_[config_.edgeColumn_] = makeAlwaysDefinedColumn(colIndex); + colIndex++; if (std::holds_alternative(config_.sources_)) { resultWidth_++; const auto& sourceColumn = std::get(config_.sources_); - variableColumns_[sourceColumn] = makeAlwaysDefinedColumn(colIndex++); + variableColumns_[sourceColumn] = makeAlwaysDefinedColumn(colIndex); + colIndex++; } if (std::holds_alternative(config_.targets_)) { resultWidth_++; const auto& targetColumn = std::get(config_.targets_); - variableColumns_[targetColumn] = makeAlwaysDefinedColumn(colIndex++); + variableColumns_[targetColumn] = makeAlwaysDefinedColumn(colIndex); + colIndex++; } - for (auto edgeProperty : config_.edgeProperties_) { - variableColumns_[edgeProperty] = makeAlwaysDefinedColumn(colIndex++); + for (const auto& edgeProperty : config_.edgeProperties_) { + variableColumns_[edgeProperty] = makeAlwaysDefinedColumn(colIndex); + colIndex++; } } @@ -198,7 +192,7 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); timer.start(); - if (config_.algorithm_ == SHORTEST_PATHS) { + if (config_.algorithm_ == PathSearchAlgorithm::SHORTEST_PATHS) { std::vector> edgePropertyLists; for (const auto& edgeProperty : config_.edgeProperties_) { auto edgePropertyIndex = subtree_->getVariableColumn(edgeProperty); @@ -323,9 +317,9 @@ std::vector PathSearch::findPaths(std::span sources, std::span targets, BinSearchWrapper& binSearch) const { switch (config_.algorithm_) { - case ALL_PATHS: + case PathSearchAlgorithm::ALL_PATHS: return allPaths(sources, targets, binSearch); - case SHORTEST_PATHS: + case PathSearchAlgorithm::SHORTEST_PATHS: return shortestPaths(sources, targets); default: AD_FAIL(); diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index ebe2031dbd..434c6acf04 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -28,7 +28,7 @@ using IdToNodeMap = std::unordered_map< Id, size_t, IdHash, std::equal_to, ad_utility::AllocatorWithLimit>>; -enum PathSearchAlgorithm { ALL_PATHS, SHORTEST_PATHS }; +enum class PathSearchAlgorithm { ALL_PATHS, SHORTEST_PATHS }; using TreeAndCol = std::pair, size_t>; using SearchSide = std::variant>; @@ -75,10 +75,10 @@ struct PathSearchConfiguration { std::string toString() const { std::ostringstream os; switch (algorithm_) { - case ALL_PATHS: + case PathSearchAlgorithm::ALL_PATHS: os << "Algorithm: All paths" << '\n'; break; - case SHORTEST_PATHS: + case PathSearchAlgorithm::SHORTEST_PATHS: os << "Algorithm: Shortest paths" << '\n'; break; } @@ -92,7 +92,7 @@ struct PathSearchConfiguration { os << "EdgeColumn: " << edgeColumn_.toSparql() << '\n'; os << "EdgeProperties:" << '\n'; - for (auto edgeProperty : edgeProperties_) { + for (const auto& edgeProperty : edgeProperties_) { os << " " << edgeProperty.toSparql() << '\n'; } diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 87a7c52e11..208ea79e55 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1816,7 +1816,7 @@ auto QueryPlanner::createJoinWithService( // _____________________________________________________________________ auto QueryPlanner::createJoinWithPathSearch( - SubtreePlan a, SubtreePlan b, + const SubtreePlan& a, const SubtreePlan& b, const std::vector>& jcs) -> std::optional { auto aRootOp = diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 64a12af184..501c51a4ac 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -334,7 +334,7 @@ class QueryPlanner { const std::vector>& jcs); [[nodiscard]] static std::optional createJoinWithPathSearch( - SubtreePlan a, SubtreePlan b, + const SubtreePlan& a, const SubtreePlan& b, const std::vector>& jcs); [[nodiscard]] vector getOrderByRow( diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index 4790089603..63513e4137 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -118,7 +118,7 @@ std::variant> PathQuery::toSearchSide( return side[0].getVariable(); } else { std::vector sideIds; - for (auto comp : side) { + for (const auto& comp : side) { if (comp.isVariable()) { AD_THROW("Only one variable is allowed per search side"); } diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 6b85f672ed..9c22b22396 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -42,9 +42,14 @@ TEST(PathSearchTest, constructor) { std::vector sources{V(0)}; std::vector targets{V(1)}; - PathSearchConfiguration config{ - ALL_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; PathSearch p = PathSearch(qec, std::move(subtree), config); } @@ -57,9 +62,14 @@ TEST(PathSearchTest, emptyGraph) { std::vector sources{V(0)}; std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - ALL_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -82,9 +92,14 @@ TEST(PathSearchTest, singlePath) { std::vector sources{V(0)}; std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - ALL_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -104,10 +119,14 @@ TEST(PathSearchTest, singlePathWithProperties) { std::vector sources{V(0)}; std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; - PathSearchConfiguration config{ALL_PATHS, sources, - targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {Var{"?edgeProperty"}}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {Var{"?edgeProperty"}}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -126,9 +145,14 @@ TEST(PathSearchTest, singlePathWithDijkstra) { std::vector sources{V(0)}; std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - SHORTEST_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -148,10 +172,14 @@ TEST(PathSearchTest, singlePathWithDijkstraAndProperties) { std::vector sources{V(0)}; std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; - PathSearchConfiguration config{SHORTEST_PATHS, sources, - targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {Var{"?edgeProperty"}}}; + PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {Var{"?edgeProperty"}}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -178,9 +206,14 @@ TEST(PathSearchTest, twoPathsOneTarget) { std::vector sources{V(0)}; std::vector targets{V(2)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - ALL_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -207,9 +240,14 @@ TEST(PathSearchTest, twoPathsTwoTargets) { std::vector sources{V(0)}; std::vector targets{V(2), V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - ALL_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -234,9 +272,14 @@ TEST(PathSearchTest, cycle) { std::vector sources{V(0)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - ALL_PATHS, sources, sources, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + sources, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -264,9 +307,14 @@ TEST(PathSearchTest, twoCycle) { std::vector sources{V(0)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - ALL_PATHS, sources, sources, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + sources, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -298,9 +346,14 @@ TEST(PathSearchTest, allPaths) { std::vector sources{V(0)}; std::vector targets{}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - ALL_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -329,10 +382,14 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty1"}, Variable{"?edgeProperty2"}}; PathSearchConfiguration config{ - ALL_PATHS, sources, - targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, - Var{"?pathIndex"}, {Var{"?edgeProperty2"}, Var{"?edgeProperty1"}}}; + PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {Var{"?edgeProperty2"}, Var{"?edgeProperty1"}}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -360,9 +417,14 @@ TEST(PathSearchTest, singleShortestPath) { std::vector sources{V(0)}; std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - SHORTEST_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -393,9 +455,14 @@ TEST(PathSearchTest, twoShortestPaths) { std::vector sources{V(0)}; std::vector targets{V(5)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - SHORTEST_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -421,9 +488,14 @@ TEST(PathSearchTest, singlePathWithIrrelevantNode) { std::vector sources{V(0)}; std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - ALL_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -442,9 +514,14 @@ TEST(PathSearchTest, shortestPathWithIrrelevantNode) { std::vector sources{V(0)}; std::vector targets{V(4)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - SHORTEST_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -480,9 +557,14 @@ TEST(PathSearchTest, allPathsElongatedDiamond) { std::vector sources{V(0)}; std::vector targets{V(5)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - ALL_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -500,9 +582,14 @@ TEST(PathSearchTest, shortestPathsElongatedDiamond) { std::vector sources{V(0)}; std::vector targets{V(5)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - SHORTEST_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -537,9 +624,14 @@ TEST(PathSearchTest, multiSourceMultiTargetallPaths) { std::vector sources{V(0), V(1)}; std::vector targets{V(4), V(5)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{ - ALL_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), @@ -566,9 +658,14 @@ TEST(PathSearchTest, multiSourceMultiTargetshortestPaths) { Vars vars = {Variable{"?start"}, Variable{"?end"}}; std::vector sources{V(0), V(1)}; std::vector targets{V(4), V(5)}; - PathSearchConfiguration config{ - SHORTEST_PATHS, sources, targets, Var{"?start"}, - Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, {}}; + PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index e9bbf9237c..8fa2c1602f 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -780,10 +780,14 @@ TEST(QueryPlanner, PathSearchSingleTarget) { std::vector sources{getId("")}; std::vector targets{getId("")}; - PathSearchConfiguration config{ALL_PATHS, sources, - targets, Variable("?start"), - Variable("?end"), Variable("?path"), - Variable("?edge"), {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; h::expect( "PREFIX pathSearch: " "SELECT ?start ?end ?path ?edge WHERE {" @@ -808,10 +812,14 @@ TEST(QueryPlanner, PathSearchMultipleTargets) { std::vector sources{getId("")}; std::vector targets{getId(""), getId("")}; - PathSearchConfiguration config{ALL_PATHS, sources, - targets, Variable("?start"), - Variable("?end"), Variable("?path"), - Variable("?edge"), {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; h::expect( "PREFIX pathSearch: " "SELECT ?start ?end ?path ?edge WHERE {" @@ -839,10 +847,14 @@ TEST(QueryPlanner, PathSearchWithEdgeProperties) { std::vector sources{getId("")}; std::vector targets{getId("")}; - PathSearchConfiguration config{ALL_PATHS, sources, - targets, Variable("?start"), - Variable("?end"), Variable("?path"), - Variable("?edge"), {Variable("?middle")}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {Variable("?middle")}}; h::expect( "PREFIX pathSearch: " "SELECT ?start ?end ?path ?edge WHERE {" @@ -880,10 +892,14 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { std::vector sources{getId("")}; std::vector targets{getId(""), getId("")}; PathSearchConfiguration config{ - ALL_PATHS, sources, - targets, Variable("?start"), - Variable("?end"), Variable("?path"), - Variable("?edge"), {Variable("?middle"), Variable("?middleAttribute")}}; + PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {Variable("?middle"), Variable("?middleAttribute")}}; h::expect( "PREFIX pathSearch: " "SELECT ?start ?end ?path ?edge WHERE {" @@ -918,10 +934,14 @@ TEST(QueryPlanner, PathSearchSourceBound) { Variable sources{"?source"}; std::vector targets{getId("")}; - PathSearchConfiguration config{ALL_PATHS, sources, - targets, Variable("?start"), - Variable("?end"), Variable("?path"), - Variable("?edge"), {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; h::expect( "PREFIX pathSearch: " "SELECT ?start ?end ?path ?edge WHERE {" @@ -947,10 +967,14 @@ TEST(QueryPlanner, PathSearchTargetBound) { std::vector sources{getId("")}; Variable targets{"?target"}; - PathSearchConfiguration config{ALL_PATHS, sources, - targets, Variable("?start"), - Variable("?end"), Variable("?path"), - Variable("?edge"), {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; h::expect( "PREFIX pathSearch: " "SELECT ?start ?end ?path ?edge WHERE {" @@ -976,10 +1000,14 @@ TEST(QueryPlanner, PathSearchBothBound) { Variable sources{"?source"}; Variable targets{"?target"}; - PathSearchConfiguration config{ALL_PATHS, sources, - targets, Variable("?start"), - Variable("?end"), Variable("?path"), - Variable("?edge"), {}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; h::expect( "PREFIX pathSearch: " "SELECT ?start ?end ?path ?edge WHERE {" From acf09c890bf280d381a49457ad59da9447ef6290 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 9 Jul 2024 09:32:22 +0200 Subject: [PATCH 41/96] Sonar fixes --- src/engine/PathSearch.cpp | 6 +++--- src/engine/PathSearch.h | 2 +- src/parser/GraphPatternOperation.cpp | 12 ++++-------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index bfb07106d7..c8eac09d31 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -41,7 +41,7 @@ std::vector BinSearchWrapper::findPaths( std::vector paths; auto edges = outgoingEdes(source); - for (auto edge : edges) { + for (const auto& edge : edges) { if (targets.contains(edge.end_) || targets.empty()) { Path path; path.push_back(edge); @@ -60,7 +60,7 @@ std::vector BinSearchWrapper::findPaths( } // _____________________________________________________________________________ -const Edge BinSearchWrapper::makeEdgeFromRow(size_t row) const { +Edge BinSearchWrapper::makeEdgeFromRow(size_t row) const { Edge edge; edge.start_ = table_(row, startCol_).getBits(); edge.end_ = table_(row, endCol_).getBits(); @@ -209,7 +209,7 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto subStartColumn = subtree_->getVariableColumn(config_.start_); auto subEndColumn = subtree_->getVariableColumn(config_.end_); std::vector edgeColumns; - for (auto edgeProp : config_.edgeProperties_) { + for (const auto& edgeProp : config_.edgeProperties_) { edgeColumns.push_back(subtree_->getVariableColumn(edgeProp)); } BinSearchWrapper binSearch{dynSub, subStartColumn, subEndColumn, diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 434c6acf04..26f0e48c6e 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -119,7 +119,7 @@ class BinSearchWrapper { bool isTarget(const Id node) const; private: - const Edge makeEdgeFromRow(size_t row) const; + Edge makeEdgeFromRow(size_t row) const; }; /** diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index 63513e4137..6e13d66d88 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -145,14 +145,10 @@ PathSearchConfiguration PathQuery::toPathSearchConfiguration( const Index::Vocab& vocab) const { auto sources = toSearchSide(sources_, vocab); auto targets = toSearchSide(targets_, vocab); - return PathSearchConfiguration{algorithm_, - std::move(sources), - std::move(targets), - std::move(start_.value()), - std::move(end_.value()), - std::move(pathColumn_.value()), - std::move(edgeColumn_.value()), - std::move(edgeProperties_)}; + return PathSearchConfiguration{ + algorithm_, sources, targets, + start_.value(), end_.value(), pathColumn_.value(), + edgeColumn_.value(), edgeProperties_}; } // ____________________________________________________________________________ From 385f67ad08ac2d50feaafbf290d4e64ebbd27e52 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 9 Jul 2024 10:40:04 +0200 Subject: [PATCH 42/96] Added PathSearchException --- src/parser/GraphPatternOperation.cpp | 75 +++++++++++++------ src/parser/GraphPatternOperation.h | 7 ++ .../sparqlParser/SparqlQleverVisitor.cpp | 5 +- 3 files changed, 65 insertions(+), 22 deletions(-) diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index 6e13d66d88..258a70a8fc 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -73,42 +73,75 @@ void PathQuery::addParameter(const SparqlTriple& triple) { auto simpleTriple = triple.getSimple(); TripleComponent predicate = simpleTriple.p_; TripleComponent object = simpleTriple.o_; - AD_CORRECTNESS_CHECK(predicate.isIri()); - if (predicate.getIri().toStringRepresentation().ends_with("source>")) { + + if (!predicate.isIri()) { + throw PathSearchException("Predicates must be IRIs"); + } + + std::string predString = predicate.getIri().toStringRepresentation(); + if (predString.ends_with("source>")) { sources_.push_back(std::move(object)); - } else if (predicate.getIri().toStringRepresentation().ends_with("target>")) { + } else if (predString.ends_with("target>")) { targets_.push_back(std::move(object)); - } else if (predicate.getIri().toStringRepresentation().ends_with("start>")) { - AD_CORRECTNESS_CHECK(object.isVariable()); + } else if (predString.ends_with("start>")) { + + if (!object.isVariable()) { + throw PathSearchException("The 'start' value has to be a variable"); + } + start_ = object.getVariable(); - } else if (predicate.getIri().toStringRepresentation().ends_with("end>")) { - AD_CORRECTNESS_CHECK(object.isVariable()); + } else if (predString.ends_with("end>")) { + + if (!object.isVariable()) { + throw PathSearchException("The 'end' value has to be a variable"); + } + end_ = object.getVariable(); - } else if (predicate.getIri().toStringRepresentation().ends_with( + } else if (predString.ends_with( "pathColumn>")) { - AD_CORRECTNESS_CHECK(object.isVariable()); + + if (!object.isVariable()) { + throw PathSearchException("The 'pathColumn' value has to be a variable"); + } + pathColumn_ = object.getVariable(); - } else if (predicate.getIri().toStringRepresentation().ends_with( + } else if (predString.ends_with( "edgeColumn>")) { - AD_CORRECTNESS_CHECK(object.isVariable()); + + if (!object.isVariable()) { + throw PathSearchException("The 'edgeColumn' value has to be a variable"); + } + edgeColumn_ = object.getVariable(); - } else if (predicate.getIri().toStringRepresentation().ends_with( + } else if (predString.ends_with( "edgeProperty>")) { - AD_CORRECTNESS_CHECK(object.isVariable()); + + if (!object.isVariable()) { + throw PathSearchException("The 'edgeProperty' values have to be variables"); + } + edgeProperties_.push_back(object.getVariable()); - } else if (predicate.getIri().toStringRepresentation().ends_with( + } else if (predString.ends_with( "algorithm>")) { - AD_CORRECTNESS_CHECK(object.isIri()); - if (object.getIri().toStringRepresentation().ends_with("allPaths>")) { + + if (!object.isIri()) { + throw PathSearchException("The 'algorithm' value has to be an Iri"); + } + auto objString = object.getIri().toStringRepresentation(); + + if (objString.ends_with("allPaths>")) { algorithm_ = PathSearchAlgorithm::ALL_PATHS; - } else if (object.getIri().toStringRepresentation().ends_with( + } else if (objString.ends_with( "shortestPaths>")) { algorithm_ = PathSearchAlgorithm::SHORTEST_PATHS; } else { - AD_THROW("Unsupported algorithm in PathSearch"); + throw PathSearchException("Unsupported algorithm in pathSearch: " + objString + ". Supported Algorithms: " + "allPaths, shortestPaths."); } } else { - AD_THROW("Unsupported argument in PathSearch"); + PathSearchException("Unsupported argument " + predString + " in PathSearch." + "Supported Arguments: source, target, start, end, pathColumn, edgeColumn," + "edgeProperty, algorithm."); } } @@ -120,13 +153,13 @@ std::variant> PathQuery::toSearchSide( std::vector sideIds; for (const auto& comp : side) { if (comp.isVariable()) { - AD_THROW("Only one variable is allowed per search side"); + throw PathSearchException("Only one variable is allowed per search side"); } auto opt = comp.toValueId(vocab); if (opt.has_value()) { sideIds.push_back(opt.value()); } else { - AD_THROW("No vocabulary entry for " + comp.toString()); + throw PathSearchException("No vocabulary entry for " + comp.toString()); } } return sideIds; diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 50c03eaa48..43afa4c8f3 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -137,6 +137,13 @@ struct TransPath { GraphPattern _childGraphPattern; }; +class PathSearchException : public std::exception { + std::string message_; +public: + PathSearchException(std::string message) : message_(message) {} + const char* what() const noexcept override { return message_.data(); } +}; + struct PathQuery { std::vector sources_; std::vector targets_; diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 73d337e2cf..746a700e1c 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -23,6 +23,7 @@ #include "engine/sparqlExpressions/RelationalExpressions.h" #include "engine/sparqlExpressions/SampleExpression.h" #include "engine/sparqlExpressions/UuidExpressions.h" +#include "parser/GraphPatternOperation.h" #include "parser/SparqlParser.h" #include "parser/TokenizerCtre.h" #include "parser/TurtleParser.h" @@ -707,7 +708,9 @@ GraphPatternOperation Visitor::visitPathQuery( auto pattern = std::get(op); pathQuery.childGraphPattern_ = std::move(pattern._child); } else { - AD_THROW("Unsupported argument in PathSearch"); + throw parsedQuery::PathSearchException("Unsupported subquery in pathSearch." + "PathQuery may only consist of triples for configuration" + "And a subquery specifiying edges."); } }; From 946bda3a5d0b3ae11f0a4522f39bdf63aaecfc09 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 9 Jul 2024 12:48:10 +0200 Subject: [PATCH 43/96] Improved error handling and path query parsing --- src/parser/GraphPatternOperation.cpp | 102 ++++++++++-------- src/parser/GraphPatternOperation.h | 3 +- .../sparqlParser/SparqlQleverVisitor.cpp | 7 +- 3 files changed, 62 insertions(+), 50 deletions(-) diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index 258a70a8fc..ec411dfc41 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -5,9 +5,12 @@ #include "parser/GraphPatternOperation.h" +#include + #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "parser/ParsedQuery.h" +#include "parser/TripleComponent.h" #include "util/Exception.h" #include "util/Forward.h" @@ -78,52 +81,44 @@ void PathQuery::addParameter(const SparqlTriple& triple) { throw PathSearchException("Predicates must be IRIs"); } + auto setVariable = + [](std::string parameter, const TripleComponent& object, + const std::optional& existingValue) -> Variable { + if (!object.isVariable()) { + throw PathSearchException("The value " + object.toString() + + " for parameter '" + parameter + + "' has to be a variable"); + } + + if (existingValue.has_value()) { + throw PathSearchException("The parameter '" + parameter + + "' has already been set to variable: '" + + existingValue.value().toSparql() + + "'." + "New variable: '" + + object.toString() + "'."); + } + + return object.getVariable(); + }; + std::string predString = predicate.getIri().toStringRepresentation(); if (predString.ends_with("source>")) { sources_.push_back(std::move(object)); } else if (predString.ends_with("target>")) { targets_.push_back(std::move(object)); } else if (predString.ends_with("start>")) { - - if (!object.isVariable()) { - throw PathSearchException("The 'start' value has to be a variable"); - } - - start_ = object.getVariable(); + start_ = setVariable("start", object, start_); } else if (predString.ends_with("end>")) { - - if (!object.isVariable()) { - throw PathSearchException("The 'end' value has to be a variable"); - } - - end_ = object.getVariable(); - } else if (predString.ends_with( - "pathColumn>")) { - - if (!object.isVariable()) { - throw PathSearchException("The 'pathColumn' value has to be a variable"); - } - - pathColumn_ = object.getVariable(); - } else if (predString.ends_with( - "edgeColumn>")) { - - if (!object.isVariable()) { - throw PathSearchException("The 'edgeColumn' value has to be a variable"); - } - - edgeColumn_ = object.getVariable(); - } else if (predString.ends_with( - "edgeProperty>")) { - - if (!object.isVariable()) { - throw PathSearchException("The 'edgeProperty' values have to be variables"); - } - - edgeProperties_.push_back(object.getVariable()); - } else if (predString.ends_with( - "algorithm>")) { - + end_ = setVariable("end", object, end_); + } else if (predString.ends_with("pathColumn>")) { + pathColumn_ = setVariable("pathColumn", object, pathColumn_); + } else if (predString.ends_with("edgeColumn>")) { + edgeColumn_ = setVariable("edgeColumn", object, edgeColumn_); + } else if (predString.ends_with("edgeProperty>")) { + edgeProperties_.push_back( + setVariable("edgeProperty", object, std::nullopt)); + } else if (predString.ends_with("algorithm>")) { if (!object.isIri()) { throw PathSearchException("The 'algorithm' value has to be an Iri"); } @@ -131,16 +126,19 @@ void PathQuery::addParameter(const SparqlTriple& triple) { if (objString.ends_with("allPaths>")) { algorithm_ = PathSearchAlgorithm::ALL_PATHS; - } else if (objString.ends_with( - "shortestPaths>")) { + } else if (objString.ends_with("shortestPaths>")) { algorithm_ = PathSearchAlgorithm::SHORTEST_PATHS; } else { - throw PathSearchException("Unsupported algorithm in pathSearch: " + objString + ". Supported Algorithms: " - "allPaths, shortestPaths."); + throw PathSearchException( + "Unsupported algorithm in pathSearch: " + objString + + ". Supported Algorithms: " + "allPaths, shortestPaths."); } } else { - PathSearchException("Unsupported argument " + predString + " in PathSearch." - "Supported Arguments: source, target, start, end, pathColumn, edgeColumn," + PathSearchException("Unsupported argument " + predString + + " in PathSearch." + "Supported Arguments: source, target, start, end, " + "pathColumn, edgeColumn," "edgeProperty, algorithm."); } } @@ -153,7 +151,8 @@ std::variant> PathQuery::toSearchSide( std::vector sideIds; for (const auto& comp : side) { if (comp.isVariable()) { - throw PathSearchException("Only one variable is allowed per search side"); + throw PathSearchException( + "Only one variable is allowed per search side"); } auto opt = comp.toValueId(vocab); if (opt.has_value()) { @@ -178,6 +177,17 @@ PathSearchConfiguration PathQuery::toPathSearchConfiguration( const Index::Vocab& vocab) const { auto sources = toSearchSide(sources_, vocab); auto targets = toSearchSide(targets_, vocab); + + if (!start_.has_value()) { + throw PathSearchException("Missing parameter 'start' in path search."); + } else if (!end_.has_value()) { + throw PathSearchException("Missing parameter 'end' in path search."); + } else if (!pathColumn_.has_value()) { + throw PathSearchException("Missing parameter 'pathColumn' in path search."); + } else if (!edgeColumn_.has_value()) { + throw PathSearchException("Missing parameter 'edgeColumn' in path search."); + } + return PathSearchConfiguration{ algorithm_, sources, targets, start_.value(), end_.value(), pathColumn_.value(), diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 43afa4c8f3..62f6490d27 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -139,7 +139,8 @@ struct TransPath { class PathSearchException : public std::exception { std::string message_; -public: + + public: PathSearchException(std::string message) : message_(message) {} const char* what() const noexcept override { return message_.data(); } }; diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 746a700e1c..bdf792e1d6 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -708,9 +708,10 @@ GraphPatternOperation Visitor::visitPathQuery( auto pattern = std::get(op); pathQuery.childGraphPattern_ = std::move(pattern._child); } else { - throw parsedQuery::PathSearchException("Unsupported subquery in pathSearch." - "PathQuery may only consist of triples for configuration" - "And a subquery specifiying edges."); + throw parsedQuery::PathSearchException( + "Unsupported subquery in pathSearch." + "PathQuery may only consist of triples for configuration" + "And a subquery specifiying edges."); } }; From aec3e3443cae31157249b2ddac89cd4149d75342 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 9 Jul 2024 12:58:31 +0200 Subject: [PATCH 44/96] Added docstring for PathQuery --- src/parser/GraphPatternOperation.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 62f6490d27..40e5c38fd1 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -145,6 +145,15 @@ class PathSearchException : public std::exception { const char* what() const noexcept override { return message_.data(); } }; +// The PathQuery object holds intermediate information for the PathSearch. +// The PathSearchConfiguration requires concrete Ids. The vocabulary from the +// QueryPlanner is needed to translate the TripleComponents to ValueIds. +// Also, the members of the PathQuery have defaults and can be set after +// the object creation, simplifying the parsing process. If a required +// value has not been set during parsing, the method 'toPathSearchConfiguration' +// will throw an exception. +// All the error handling for the PathSearch happens in the PathQuery object. +// Thus, if a PathSearchConfiguration can be constructed, it is valid. struct PathQuery { std::vector sources_; std::vector targets_; From de33fddfc32738fa1b722f8228f997ee0a8e7ceb Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 9 Jul 2024 13:00:31 +0200 Subject: [PATCH 45/96] Fixed typo --- src/parser/sparqlParser/SparqlQleverVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index bdf792e1d6..72528326f6 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -711,7 +711,7 @@ GraphPatternOperation Visitor::visitPathQuery( throw parsedQuery::PathSearchException( "Unsupported subquery in pathSearch." "PathQuery may only consist of triples for configuration" - "And a subquery specifiying edges."); + "And a subquery specifying edges."); } }; From e9def110f4a26740e276929d0f9c1786ebadf037 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 9 Jul 2024 13:49:37 +0200 Subject: [PATCH 46/96] Added tests for path search exceptions --- src/parser/GraphPatternOperation.cpp | 16 +-- test/QueryPlannerTest.cpp | 151 +++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 8 deletions(-) diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index ec411dfc41..c377c43e64 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -94,9 +94,8 @@ void PathQuery::addParameter(const SparqlTriple& triple) { throw PathSearchException("The parameter '" + parameter + "' has already been set to variable: '" + existingValue.value().toSparql() + - "'." - "New variable: '" + - object.toString() + "'."); + "'. New variable: '" + object.toString() + + "'."); } return object.getVariable(); @@ -135,11 +134,12 @@ void PathQuery::addParameter(const SparqlTriple& triple) { "allPaths, shortestPaths."); } } else { - PathSearchException("Unsupported argument " + predString + - " in PathSearch." - "Supported Arguments: source, target, start, end, " - "pathColumn, edgeColumn," - "edgeProperty, algorithm."); + throw PathSearchException( + "Unsupported argument " + predString + + " in PathSearch. " + "Supported Arguments: source, target, start, end, " + "pathColumn, edgeColumn, " + "edgeProperty, algorithm."); } } diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 8fa2c1602f..738f310297 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -1026,6 +1026,157 @@ TEST(QueryPlanner, PathSearchBothBound) { h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); } +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchMissingStart) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(h::parseAndPlan(std::move(query), qec), + HasSubstr("Missing parameter 'start'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchMultipleStarts) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start1;" + "pathSearch:start ?start2;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("parameter 'start' has already been set " + "to variable: '?start1'. New variable: '?start2'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchStartNotVariable) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("The value for parameter 'start'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchPredicateNotIri) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path ?algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(h::parseAndPlan(std::move(query), qec), + HasSubstr("Predicates must be IRIs"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchUnsupportedArgument) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + " ?error;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("Unsupported argument in PathSearch"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchTwoVariablesForSource) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source1 ;" + "pathSearch:source ?source2 ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("Only one variable is allowed per search side"), + parsedQuery::PathSearchException); +} + // __________________________________________________________________________ TEST(QueryPlanner, BindAtBeginningOfQuery) { h::expect( From eea3625dc80d2246dd977d4f1569e06e38682ed1 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 10 Jul 2024 12:29:26 +0200 Subject: [PATCH 47/96] Improved setVariable lambda in PathQuery --- src/parser/GraphPatternOperation.cpp | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index c377c43e64..357958a8af 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -81,14 +81,20 @@ void PathQuery::addParameter(const SparqlTriple& triple) { throw PathSearchException("Predicates must be IRIs"); } - auto setVariable = - [](std::string parameter, const TripleComponent& object, - const std::optional& existingValue) -> Variable { + auto getVariable = [](std::string parameter, const TripleComponent& object){ if (!object.isVariable()) { throw PathSearchException("The value " + object.toString() + " for parameter '" + parameter + "' has to be a variable"); } + + return object.getVariable(); + }; + + auto setVariable = + [&](std::string parameter, const TripleComponent& object, + std::optional& existingValue) { + auto variable = getVariable(parameter, object); if (existingValue.has_value()) { throw PathSearchException("The parameter '" + parameter + @@ -98,7 +104,7 @@ void PathQuery::addParameter(const SparqlTriple& triple) { "'."); } - return object.getVariable(); + existingValue = object.getVariable(); }; std::string predString = predicate.getIri().toStringRepresentation(); @@ -107,16 +113,16 @@ void PathQuery::addParameter(const SparqlTriple& triple) { } else if (predString.ends_with("target>")) { targets_.push_back(std::move(object)); } else if (predString.ends_with("start>")) { - start_ = setVariable("start", object, start_); + setVariable("start", object, start_); } else if (predString.ends_with("end>")) { - end_ = setVariable("end", object, end_); + setVariable("end", object, end_); } else if (predString.ends_with("pathColumn>")) { - pathColumn_ = setVariable("pathColumn", object, pathColumn_); + setVariable("pathColumn", object, pathColumn_); } else if (predString.ends_with("edgeColumn>")) { - edgeColumn_ = setVariable("edgeColumn", object, edgeColumn_); + setVariable("edgeColumn", object, edgeColumn_); } else if (predString.ends_with("edgeProperty>")) { edgeProperties_.push_back( - setVariable("edgeProperty", object, std::nullopt)); + getVariable("edgeProperty", object)); } else if (predString.ends_with("algorithm>")) { if (!object.isIri()) { throw PathSearchException("The 'algorithm' value has to be an Iri"); From ae175acafb27244766468571ae69298a1eced578 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 10 Jul 2024 13:15:41 +0200 Subject: [PATCH 48/96] Removed shortestPaths and boost BGL --- src/engine/PathSearch.cpp | 134 ++------------ src/engine/PathSearch.h | 131 +++++++++----- src/engine/PathSearchVisitors.h | 251 --------------------------- src/parser/GraphPatternOperation.cpp | 4 +- test/PathSearchTest.cpp | 208 ---------------------- 5 files changed, 103 insertions(+), 625 deletions(-) delete mode 100644 src/engine/PathSearchVisitors.h diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index c8eac09d31..26f4e4aa77 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -5,7 +5,6 @@ #include "PathSearch.h" #include "engine/CallFixedSize.h" -#include "engine/PathSearchVisitors.h" #include "engine/QueryExecutionTree.h" #include "engine/VariableToColumnMap.h" @@ -59,6 +58,22 @@ std::vector BinSearchWrapper::findPaths( return paths; } +// _____________________________________________________________________________ +std::span BinSearchWrapper::getSources() const { + auto startIds = table_.getColumn(startCol_); + std::vector sources; + + size_t index = 0; + Id lastId; + while (index < startIds.size()) { + lastId = startIds[index]; + sources.push_back(lastId); + while (lastId == startIds[index]) { index++; } + } + + return sources; +} + // _____________________________________________________________________________ Edge BinSearchWrapper::makeEdgeFromRow(size_t row) const { Edge edge; @@ -77,8 +92,7 @@ PathSearch::PathSearch(QueryExecutionContext* qec, PathSearchConfiguration config) : Operation(qec), subtree_(std::move(subtree)), - config_(std::move(config)), - idToIndex_(allocator()) { + config_(std::move(config)) { AD_CORRECTNESS_CHECK(qec != nullptr); auto startCol = subtree_->getVariableColumn(config_.start_); @@ -192,20 +206,6 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); timer.start(); - if (config_.algorithm_ == PathSearchAlgorithm::SHORTEST_PATHS) { - std::vector> edgePropertyLists; - for (const auto& edgeProperty : config_.edgeProperties_) { - auto edgePropertyIndex = subtree_->getVariableColumn(edgeProperty); - edgePropertyLists.push_back(dynSub.getColumn(edgePropertyIndex)); - } - - auto subStartColumn = subtree_->getVariableColumn(config_.start_); - auto subEndColumn = subtree_->getVariableColumn(config_.end_); - - buildGraph(dynSub.getColumn(subStartColumn), - dynSub.getColumn(subEndColumn), edgePropertyLists); - } - auto subStartColumn = subtree_->getVariableColumn(config_.start_); auto subEndColumn = subtree_->getVariableColumn(config_.end_); std::vector edgeColumns; @@ -256,22 +256,6 @@ VariableToColumnMap PathSearch::computeVariableToColumnMap() const { return variableColumns_; }; -// _____________________________________________________________________________ -void PathSearch::buildMapping(std::span startNodes, - std::span endNodes) { - auto addNode = [this](const Id node) { - if (!idToIndex_.contains(node)) { - idToIndex_[node] = indexToId_.size(); - indexToId_.push_back(node); - } - }; - for (size_t i = 0; i < startNodes.size(); i++) { - checkCancellation(); - addNode(startNodes[i]); - addNode(endNodes[i]); - } -} - std::span PathSearch::handleSearchSide( const SearchSide& side, const std::optional& binding) const { std::span ids; @@ -286,32 +270,6 @@ std::span PathSearch::handleSearchSide( return ids; } -// _____________________________________________________________________________ -void PathSearch::buildGraph(std::span startNodes, - std::span endNodes, - std::span> edgePropertyLists) { - AD_CORRECTNESS_CHECK(startNodes.size() == endNodes.size()); - buildMapping(startNodes, endNodes); - - while (boost::num_vertices(graph_) < indexToId_.size()) { - boost::add_vertex(graph_); - } - - for (size_t i = 0; i < startNodes.size(); i++) { - checkCancellation(); - auto startIndex = idToIndex_[startNodes[i]]; - auto endIndex = idToIndex_[endNodes[i]]; - - std::vector edgeProperties; - for (size_t j = 0; j < edgePropertyLists.size(); j++) { - edgeProperties.push_back(edgePropertyLists[j][i]); - } - - Edge edge{startNodes[i].getBits(), endNodes[i].getBits(), edgeProperties}; - boost::add_edge(startIndex, endIndex, edge, graph_); - } -} - // _____________________________________________________________________________ std::vector PathSearch::findPaths(std::span sources, std::span targets, @@ -319,8 +277,6 @@ std::vector PathSearch::findPaths(std::span sources, switch (config_.algorithm_) { case PathSearchAlgorithm::ALL_PATHS: return allPaths(sources, targets, binSearch); - case PathSearchAlgorithm::SHORTEST_PATHS: - return shortestPaths(sources, targets); default: AD_FAIL(); } @@ -339,7 +295,7 @@ std::vector PathSearch::allPaths(std::span sources, } if (sources.empty()) { - sources = indexToId_; + sources = binSearch.getSources(); } for (auto source : sources) { for (auto path : binSearch.findPaths(source, targetSet)) { @@ -350,60 +306,6 @@ std::vector PathSearch::allPaths(std::span sources, return paths; } -// _____________________________________________________________________________ -std::vector PathSearch::shortestPaths(std::span sources, - std::span targets) const { - std::vector paths; - Path path; - for (auto source : sources) { - auto startIndex = idToIndex_.at(source); - - std::unordered_set targetIndices; - for (auto target : targets) { - targetIndices.insert(target.getBits()); - } - std::vector predecessors(indexToId_.size()); - std::vector distances(indexToId_.size(), - std::numeric_limits::max()); - - DijkstraAllPathsVisitor vis(startIndex, targetIndices, path, paths, - predecessors, distances); - - auto weightMap = get(&Edge::weight_, graph_); - - boost::dijkstra_shortest_paths( - graph_, startIndex, - boost::visitor(vis) - .weight_map(weightMap) - .predecessor_map(predecessors.data()) - .distance_map(distances.data()) - .distance_compare(std::less_equal())); - } - return paths; -} - -// _____________________________________________________________________________ -std::vector PathSearch::reconstructPaths( - uint64_t source, uint64_t target, PredecessorMap predecessors) const { - const auto& edges = predecessors[target]; - std::vector paths; - - for (const auto& edge : edges) { - std::vector subPaths; - if (edge.start_ == source) { - subPaths = {Path()}; - } else { - subPaths = reconstructPaths(source, edge.start_, predecessors); - } - - for (auto path : subPaths) { - path.push_back(edge); - paths.push_back(path); - } - } - return paths; -} - // _____________________________________________________________________________ template void PathSearch::pathsToResultTable(IdTable& tableDyn, diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 26f0e48c6e..264be16a06 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -13,25 +13,97 @@ #include #include "engine/Operation.h" -#include "engine/PathSearchVisitors.h" #include "engine/VariableToColumnMap.h" #include "global/Id.h" #include "index/Vocabulary.h" -// We deliberately use the `std::` variants of a hash set and hash map because -// `absl`s types are not exception safe. -struct IdHash { - auto operator()(Id id) const { return std::hash{}(id.getBits()); } +enum class PathSearchAlgorithm { ALL_PATHS }; + +using TreeAndCol = std::pair, size_t>; +using SearchSide = std::variant>; + +/** + * @brief Represents an edge in the graph. + */ +struct Edge { + // The starting node ID. + uint64_t start_; + + // The ending node ID. + uint64_t end_; + + // Properties associated with the edge. + std::vector edgeProperties_; + + // The weight of the edge. + double weight_ = 1; + + /** + * @brief Converts the edge to a pair of IDs. + * @return A pair of IDs representing the start and end of the edge. + */ + std::pair toIds() const { + return {Id::fromBits(start_), Id::fromBits(end_)}; + } }; -using IdToNodeMap = std::unordered_map< - Id, size_t, IdHash, std::equal_to, - ad_utility::AllocatorWithLimit>>; +/** + * @brief Represents a path consisting of multiple edges. + */ +struct Path { + // The edges that make up the path. + std::vector edges_; -enum class PathSearchAlgorithm { ALL_PATHS, SHORTEST_PATHS }; + /** + * @brief Checks if the path is empty. + * @return True if the path is empty, false otherwise. + */ + bool empty() const { return edges_.empty(); } -using TreeAndCol = std::pair, size_t>; -using SearchSide = std::variant>; + /** + * @brief Returns the number of edges in the path. + * @return The number of edges in the path. + */ + size_t size() const { return edges_.size(); } + + /** + * @brief Adds an edge to the end of the path. + * @param edge The edge to add. + */ + void push_back(const Edge& edge) { edges_.push_back(edge); } + + /** + * @brief Reverses the order of the edges in the path. + */ + void reverse() { std::ranges::reverse(edges_); } + + /** + * @brief Returns the ID of the first node in the path, if it exists. + * @return The ID of the first node, or std::nullopt if the path is empty. + */ + std::optional firstNode() const { + return !empty() ? std::optional{edges_.front().start_} + : std::nullopt; + } + + /** + * @brief Returns the ID of the last node in the path, if it exists. + * @return The ID of the last node, or std::nullopt if the path is empty. + */ + std::optional lastNode() const { + return !empty() ? std::optional{edges_.back().end_} + : std::nullopt; + } + + /** + * @brief Checks if the path ends with the given node ID. + * @param node The node ID to check. + * @return True if the path ends with the given node ID, false otherwise. + */ + bool ends_with(uint64_t node) const { + return (!empty() && node == lastNode().value()); + } +}; /** * @brief Struct to hold configuration parameters for the path search. @@ -78,9 +150,6 @@ struct PathSearchConfiguration { case PathSearchAlgorithm::ALL_PATHS: os << "Algorithm: All paths" << '\n'; break; - case PathSearchAlgorithm::SHORTEST_PATHS: - os << "Algorithm: Shortest paths" << '\n'; - break; } os << "Source: " << searchSideToString(sources_) << '\n'; @@ -116,7 +185,7 @@ class BinSearchWrapper { std::vector findPaths(const Id& source, const std::unordered_set& targets); - bool isTarget(const Id node) const; + std::span getSources() const; private: Edge makeEdgeFromRow(size_t row) const; @@ -130,14 +199,9 @@ class PathSearch : public Operation { size_t resultWidth_; VariableToColumnMap variableColumns_; - // The graph on which the path search is performed. - Graph graph_; // Configuration for the path search. PathSearchConfiguration config_; - std::vector indexToId_; - IdToNodeMap idToIndex_; - std::optional boundSources_; std::optional boundTargets_; @@ -210,23 +274,6 @@ class PathSearch : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; private: - /** - * @brief Builds the graph from the given nodes and edge properties. - * @param startNodes A span of start nodes. - * @param endNodes A span of end nodes. - * @param edgePropertyLists A span of edge property lists. - */ - void buildGraph(std::span startNodes, std::span endNodes, - std::span> edgePropertyLists); - - /** - * @brief Builds the mapping from node IDs to indices. - * @param startNodes A span of start nodes. - * @param endNodes A span of end nodes. - */ - void buildMapping(std::span startNodes, - std::span endNodes); - std::span handleSearchSide( const SearchSide& side, const std::optional& binding) const; @@ -246,16 +293,6 @@ class PathSearch : public Operation { std::span targets, BinSearchWrapper& binSearch) const; - /** - * @brief Finds the shortest paths in the graph. - * @return A vector of the shortest paths. - */ - std::vector shortestPaths(std::span sources, - std::span targets) const; - - std::vector reconstructPaths(uint64_t source, uint64_t target, - PredecessorMap predecessors) const; - /** * @brief Converts paths to a result table with a specified width. * @tparam WIDTH The width of the result table. diff --git a/src/engine/PathSearchVisitors.h b/src/engine/PathSearchVisitors.h deleted file mode 100644 index d5558a504f..0000000000 --- a/src/engine/PathSearchVisitors.h +++ /dev/null @@ -1,251 +0,0 @@ -// Copyright 2024, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -/** - * @brief Represents an edge in the graph. - */ -struct Edge { - // The starting node ID. - uint64_t start_; - - // The ending node ID. - uint64_t end_; - - // Properties associated with the edge. - std::vector edgeProperties_; - - // The weight of the edge. - double weight_ = 1; - - /** - * @brief Converts the edge to a pair of IDs. - * @return A pair of IDs representing the start and end of the edge. - */ - std::pair toIds() const { - return {Id::fromBits(start_), Id::fromBits(end_)}; - } -}; - -/** - * @brief Represents a path consisting of multiple edges. - */ -struct Path { - // The edges that make up the path. - std::vector edges_; - - /** - * @brief Checks if the path is empty. - * @return True if the path is empty, false otherwise. - */ - bool empty() const { return edges_.empty(); } - - /** - * @brief Returns the number of edges in the path. - * @return The number of edges in the path. - */ - size_t size() const { return edges_.size(); } - - /** - * @brief Adds an edge to the end of the path. - * @param edge The edge to add. - */ - void push_back(const Edge& edge) { edges_.push_back(edge); } - - /** - * @brief Reverses the order of the edges in the path. - */ - void reverse() { std::ranges::reverse(edges_); } - - /** - * @brief Returns the ID of the first node in the path, if it exists. - * @return The ID of the first node, or std::nullopt if the path is empty. - */ - std::optional firstNode() const { - return !empty() ? std::optional{edges_.front().start_} - : std::nullopt; - } - - /** - * @brief Returns the ID of the last node in the path, if it exists. - * @return The ID of the last node, or std::nullopt if the path is empty. - */ - std::optional lastNode() const { - return !empty() ? std::optional{edges_.back().end_} - : std::nullopt; - } - - /** - * @brief Checks if the path ends with the given node ID. - * @param node The node ID to check. - * @return True if the path ends with the given node ID, false otherwise. - */ - bool ends_with(uint64_t node) const { - return (!empty() && node == lastNode().value()); - } -}; - -/** - * @brief Boost graph types and descriptors. - */ -using Graph = boost::adjacency_list; -using VertexDescriptor = boost::graph_traits::vertex_descriptor; -using EdgeDescriptor = boost::graph_traits::edge_descriptor; - -using PredecessorMap = std::unordered_map>; - -struct StopSearchException : public std::exception { - const char* what() const noexcept override { return "Stop DFS"; } -}; - -/** - * @brief Visitor for performing a depth-first search to find all paths. - */ -class AllPathsVisitor : public boost::default_dfs_visitor { - VertexDescriptor start_; - - PredecessorMap& predecessors_; - - public: - /** - * @brief Constructor for AllPathsVisitor. - * @param targets Set of target node IDs. - * @param path Reference to the current path being explored. - * @param paths Reference to the collection of all found paths. - * @param indexToId Mapping from indices to IDs. - */ - AllPathsVisitor(VertexDescriptor start, PredecessorMap& predecessors) - : start_(start), predecessors_(predecessors) {} - - /** - * @brief Examines an edge during the depth-first search. - * @param edgeDesc The descriptor of the edge being examined. - * @param graph The graph being searched. - */ - void examine_edge(EdgeDescriptor edgeDesc, const Graph& graph) { - const Edge& edge = graph[edgeDesc]; - - if (!predecessors_.contains(edge.end_)) { - predecessors_[edge.end_] = {}; - } - - auto& predEdges = predecessors_[edge.end_]; - predEdges.push_back(edge); - } - - /** - * @brief Called when a vertex has been finished during the depth-first - * search. - * @param vertex The descriptor of the vertex being finished. - * @param graph The graph being searched. - */ - void finish_vertex(VertexDescriptor vertex, const Graph& graph) { - (void)graph; - if (vertex == start_) { - throw StopSearchException(); - } - } -}; - -/** - * @brief Visitor for performing Dijkstra's algorithm to find all shortest - * paths. - */ -class DijkstraAllPathsVisitor : public boost::default_dijkstra_visitor { - // The source vertex descriptor. - VertexDescriptor source_; - - // Set of target node IDs. - std::unordered_set targets_; - - // Reference to the current path being explored. - Path& currentPath_; - - // Reference to the collection of all found paths. - std::vector& allPaths_; - - // Reference to the vector of predecessors. - std::vector& predecessors_; - - // Reference to the vector of distances. - std::vector& distances_; - - public: - /** - * @brief Constructor for DijkstraAllPathsVisitor. - * @param source The source vertex descriptor. - * @param targets Set of target node IDs. - * @param path Reference to the current path being explored. - * @param paths Reference to the collection of all found paths. - * @param predecessors Reference to the vector of predecessors. - * @param distances Reference to the vector of distances. - */ - DijkstraAllPathsVisitor(VertexDescriptor source, - std::unordered_set targets, Path& path, - std::vector& paths, - std::vector& predecessors, - std::vector& distances) - : source_(source), - targets_(std::move(targets)), - currentPath_(path), - allPaths_(paths), - predecessors_(predecessors), - distances_(distances) {} - - /** - * @brief Returns the vector of predecessors. - * @return The vector of predecessors. - */ - const std::vector& getPredecessors() const { - return predecessors_; - } - - /** - * @brief Returns the vector of distances. - * @return The vector of distances. - */ - const std::vector& getDistances() const { return distances_; } - - /** - * @brief Called when an edge is relaxed during Dijkstra's algorithm. - * @param edgeDesc The descriptor of the edge being relaxed. - * @param graph The graph being searched. - */ - void edge_relaxed(EdgeDescriptor edgeDesc, const Graph& graph) { - const Edge& edge = graph[edgeDesc]; - if (targets_.empty() || targets_.contains(edge.end_)) { - rebuild_path(target(edgeDesc, graph), graph); - } - } - - /** - * @brief Rebuilds the path from the source to the given vertex. - * @param vertex The descriptor of the vertex. - * @param graph The graph being searched. - */ - void rebuild_path(VertexDescriptor vertex, const Graph& graph) { - currentPath_.edges_.clear(); - for (VertexDescriptor v = vertex; v != source_; v = predecessors_[v]) { - EdgeDescriptor e; - bool exists; - boost::tie(e, exists) = edge(predecessors_[v], v, graph); - if (exists) { - currentPath_.push_back(graph[e]); - } - } - currentPath_.reverse(); - allPaths_.push_back(currentPath_); - } -}; diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index 357958a8af..a8b24a99e2 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -131,13 +131,11 @@ void PathQuery::addParameter(const SparqlTriple& triple) { if (objString.ends_with("allPaths>")) { algorithm_ = PathSearchAlgorithm::ALL_PATHS; - } else if (objString.ends_with("shortestPaths>")) { - algorithm_ = PathSearchAlgorithm::SHORTEST_PATHS; } else { throw PathSearchException( "Unsupported algorithm in pathSearch: " + objString + ". Supported Algorithms: " - "allPaths, shortestPaths."); + "allPaths."); } } else { throw PathSearchException( diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 9c22b22396..752c75a2bb 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -133,58 +133,6 @@ TEST(PathSearchTest, singlePathWithProperties) { ::testing::UnorderedElementsAreArray(expected)); } -TEST(PathSearchTest, singlePathWithDijkstra) { - auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); - auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0)}, - {V(1), V(2), I(0), I(1)}, - {V(2), V(3), I(0), I(2)}, - {V(3), V(4), I(0), I(3)}, - }); - - std::vector sources{V(0)}; - std::vector targets{V(4)}; - Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, - sources, - targets, - Var{"?start"}, - Var{"?end"}, - Var{"?edgeIndex"}, - Var{"?pathIndex"}, - {}}; - - auto resultTable = performPathSearch(config, std::move(sub), vars); - ASSERT_THAT(resultTable.idTable(), - ::testing::UnorderedElementsAreArray(expected)); -} - -TEST(PathSearchTest, singlePathWithDijkstraAndProperties) { - auto sub = - makeIdTableFromVector({{0, 1, 10}, {1, 2, 20}, {2, 3, 30}, {3, 4, 40}}); - auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0), V(10)}, - {V(1), V(2), I(0), I(1), V(20)}, - {V(2), V(3), I(0), I(2), V(30)}, - {V(3), V(4), I(0), I(3), V(40)}, - }); - - std::vector sources{V(0)}; - std::vector targets{V(4)}; - Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; - PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, - sources, - targets, - Var{"?start"}, - Var{"?end"}, - Var{"?edgeIndex"}, - Var{"?pathIndex"}, - {Var{"?edgeProperty"}}}; - - auto resultTable = performPathSearch(config, std::move(sub), vars); - ASSERT_THAT(resultTable.idTable(), - ::testing::UnorderedElementsAreArray(expected)); -} /** * Graph: @@ -396,78 +344,6 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { ::testing::UnorderedElementsAreArray(expected)); } -/** - * Graph: - * - * 0 - * / \ - * 1 2 - * | | - * | 3 - * \ / - * 4 - */ -TEST(PathSearchTest, singleShortestPath) { - auto sub = makeIdTableFromVector({{0, 1}, {0, 2}, {1, 4}, {2, 3}, {3, 4}}); - auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0)}, - {V(1), V(4), I(0), I(1)}, - }); - - std::vector sources{V(0)}; - std::vector targets{V(4)}; - Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, - sources, - targets, - Var{"?start"}, - Var{"?end"}, - Var{"?edgeIndex"}, - Var{"?pathIndex"}, - {}}; - - auto resultTable = performPathSearch(config, std::move(sub), vars); - ASSERT_THAT(resultTable.idTable(), - ::testing::UnorderedElementsAreArray(expected)); -} - -/** - * Graph: - * - * 0 - * /|\ - * 1 2 4 - * | | | - * | 3 | - * \|/ - * 5 - */ -TEST(PathSearchTest, twoShortestPaths) { - auto sub = makeIdTableFromVector( - {{0, 1}, {0, 2}, {0, 4}, {1, 5}, {2, 3}, {3, 5}, {4, 5}}); - auto expected = makeIdTableFromVector({ - {V(0), V(4), I(0), I(0)}, - {V(4), V(5), I(0), I(1)}, - {V(0), V(1), I(1), I(0)}, - {V(1), V(5), I(1), I(1)}, - }); - - std::vector sources{V(0)}; - std::vector targets{V(5)}; - Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, - sources, - targets, - Var{"?start"}, - Var{"?end"}, - Var{"?edgeIndex"}, - Var{"?pathIndex"}, - {}}; - - auto resultTable = performPathSearch(config, std::move(sub), vars); - ASSERT_THAT(resultTable.idTable(), - ::testing::UnorderedElementsAreArray(expected)); -} /** * Graph: @@ -502,32 +378,6 @@ TEST(PathSearchTest, singlePathWithIrrelevantNode) { ::testing::UnorderedElementsAreArray(expected)); } -TEST(PathSearchTest, shortestPathWithIrrelevantNode) { - auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}, {5, 4}}); - auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0)}, - {V(1), V(2), I(0), I(1)}, - {V(2), V(3), I(0), I(2)}, - {V(3), V(4), I(0), I(3)}, - }); - - std::vector sources{V(0)}; - std::vector targets{V(4)}; - Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, - sources, - targets, - Var{"?start"}, - Var{"?end"}, - Var{"?edgeIndex"}, - Var{"?pathIndex"}, - {}}; - - auto resultTable = performPathSearch(config, std::move(sub), vars); - ASSERT_THAT(resultTable.idTable(), - ::testing::UnorderedElementsAreArray(expected)); -} - /** * Graph: * 0 @@ -571,31 +421,6 @@ TEST(PathSearchTest, allPathsElongatedDiamond) { ::testing::UnorderedElementsAreArray(expected)); } -TEST(PathSearchTest, shortestPathsElongatedDiamond) { - auto sub = - makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 4}, {3, 4}, {4, 5}}); - auto expected = makeIdTableFromVector({{V(0), V(1), I(0), I(0)}, - {V(1), V(2), I(0), I(1)}, - {V(2), V(4), I(0), I(2)}, - {V(4), V(5), I(0), I(3)}}); - - std::vector sources{V(0)}; - std::vector targets{V(5)}; - Vars vars = {Variable{"?start"}, Variable{"?end"}}; - PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, - sources, - targets, - Var{"?start"}, - Var{"?end"}, - Var{"?edgeIndex"}, - Var{"?pathIndex"}, - {}}; - - auto resultTable = performPathSearch(config, std::move(sub), vars); - ASSERT_THAT(resultTable.idTable(), - ::testing::UnorderedElementsAreArray(expected)); -} - /** * Graph: * 0 4 @@ -638,36 +463,3 @@ TEST(PathSearchTest, multiSourceMultiTargetallPaths) { ::testing::UnorderedElementsAreArray(expected)); } -TEST(PathSearchTest, multiSourceMultiTargetshortestPaths) { - auto sub = makeIdTableFromVector({{0, 2}, {1, 2}, {2, 3}, {3, 4}, {3, 5}}); - auto expected = makeIdTableFromVector({ - {V(0), V(2), I(0), I(0)}, - {V(2), V(3), I(0), I(1)}, - {V(3), V(4), I(0), I(2)}, - {V(0), V(2), I(1), I(0)}, - {V(2), V(3), I(1), I(1)}, - {V(3), V(5), I(1), I(2)}, - {V(1), V(2), I(2), I(0)}, - {V(2), V(3), I(2), I(1)}, - {V(3), V(4), I(2), I(2)}, - {V(1), V(2), I(3), I(0)}, - {V(2), V(3), I(3), I(1)}, - {V(3), V(5), I(3), I(2)}, - }); - - Vars vars = {Variable{"?start"}, Variable{"?end"}}; - std::vector sources{V(0), V(1)}; - std::vector targets{V(4), V(5)}; - PathSearchConfiguration config{PathSearchAlgorithm::SHORTEST_PATHS, - sources, - targets, - Var{"?start"}, - Var{"?end"}, - Var{"?edgeIndex"}, - Var{"?pathIndex"}, - {}}; - - auto resultTable = performPathSearch(config, std::move(sub), vars); - ASSERT_THAT(resultTable.idTable(), - ::testing::UnorderedElementsAreArray(expected)); -} From 13494b8a86347e1db91ccd40c440e620d4581546 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 10 Jul 2024 18:52:20 +0200 Subject: [PATCH 49/96] Simplified Edge --- src/engine/PathSearch.cpp | 13 ++++++------ src/engine/PathSearch.h | 42 ++------------------------------------- 2 files changed, 8 insertions(+), 47 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 26f4e4aa77..87767619e3 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -41,12 +41,12 @@ std::vector BinSearchWrapper::findPaths( auto edges = outgoingEdes(source); for (const auto& edge : edges) { - if (targets.contains(edge.end_) || targets.empty()) { + if (targets.contains(edge.end_.getBits()) || targets.empty()) { Path path; path.push_back(edge); paths.push_back(std::move(path)); } - auto partialPaths = findPaths(Id::fromBits(edge.end_), targets); + auto partialPaths = findPaths(edge.end_, targets); for (auto path : partialPaths) { path.push_back(edge); paths.push_back(std::move(path)); @@ -77,8 +77,8 @@ std::span BinSearchWrapper::getSources() const { // _____________________________________________________________________________ Edge BinSearchWrapper::makeEdgeFromRow(size_t row) const { Edge edge; - edge.start_ = table_(row, startCol_).getBits(); - edge.end_ = table_(row, endCol_).getBits(); + edge.start_ = table_(row, startCol_); + edge.end_ = table_(row, endCol_); for (auto edgeCol : edgeCols_) { edge.edgeProperties_.push_back(table_(row, edgeCol)); @@ -317,10 +317,9 @@ void PathSearch::pathsToResultTable(IdTable& tableDyn, auto path = paths[pathIndex]; for (size_t edgeIndex = 0; edgeIndex < path.size(); edgeIndex++) { auto edge = path.edges_[edgeIndex]; - auto [start, end] = edge.toIds(); table.emplace_back(); - table(rowIndex, getStartIndex()) = start; - table(rowIndex, getEndIndex()) = end; + table(rowIndex, getStartIndex()) = edge.start_; + table(rowIndex, getEndIndex()) = edge.end_; table(rowIndex, getPathIndex()) = Id::makeFromInt(pathIndex); table(rowIndex, getEdgeIndex()) = Id::makeFromInt(edgeIndex); diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 264be16a06..8ac50c389a 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -27,24 +27,13 @@ using SearchSide = std::variant>; */ struct Edge { // The starting node ID. - uint64_t start_; + Id start_; // The ending node ID. - uint64_t end_; + Id end_; // Properties associated with the edge. std::vector edgeProperties_; - - // The weight of the edge. - double weight_ = 1; - - /** - * @brief Converts the edge to a pair of IDs. - * @return A pair of IDs representing the start and end of the edge. - */ - std::pair toIds() const { - return {Id::fromBits(start_), Id::fromBits(end_)}; - } }; /** @@ -76,33 +65,6 @@ struct Path { * @brief Reverses the order of the edges in the path. */ void reverse() { std::ranges::reverse(edges_); } - - /** - * @brief Returns the ID of the first node in the path, if it exists. - * @return The ID of the first node, or std::nullopt if the path is empty. - */ - std::optional firstNode() const { - return !empty() ? std::optional{edges_.front().start_} - : std::nullopt; - } - - /** - * @brief Returns the ID of the last node in the path, if it exists. - * @return The ID of the last node, or std::nullopt if the path is empty. - */ - std::optional lastNode() const { - return !empty() ? std::optional{edges_.back().end_} - : std::nullopt; - } - - /** - * @brief Checks if the path ends with the given node ID. - * @param node The node ID to check. - * @return True if the path ends with the given node ID, false otherwise. - */ - bool ends_with(uint64_t node) const { - return (!empty() && node == lastNode().value()); - } }; /** From 1c209febf9073e88ba5159dec67fc2dbe0fcf551 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Thu, 11 Jul 2024 02:30:37 +0200 Subject: [PATCH 50/96] Refactored DFS --- src/engine/PathSearch.cpp | 83 +++++++++++++++++++++++----- src/engine/PathSearch.h | 27 ++++++++- src/parser/GraphPatternOperation.cpp | 12 ++-- test/PathSearchTest.cpp | 73 ++++++++++++------------ 4 files changed, 132 insertions(+), 63 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 87767619e3..d5aaedf8a4 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -4,6 +4,8 @@ #include "PathSearch.h" +#include + #include "engine/CallFixedSize.h" #include "engine/QueryExecutionTree.h" #include "engine/VariableToColumnMap.h" @@ -68,7 +70,9 @@ std::span BinSearchWrapper::getSources() const { while (index < startIds.size()) { lastId = startIds[index]; sources.push_back(lastId); - while (lastId == startIds[index]) { index++; } + while (lastId == startIds[index]) { + index++; + } } return sources; @@ -90,9 +94,7 @@ Edge BinSearchWrapper::makeEdgeFromRow(size_t row) const { PathSearch::PathSearch(QueryExecutionContext* qec, std::shared_ptr subtree, PathSearchConfiguration config) - : Operation(qec), - subtree_(std::move(subtree)), - config_(std::move(config)) { + : Operation(qec), subtree_(std::move(subtree)), config_(std::move(config)) { AD_CORRECTNESS_CHECK(qec != nullptr); auto startCol = subtree_->getVariableColumn(config_.start_); @@ -228,7 +230,7 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto sideTime = timer.msecs(); timer.start(); - auto paths = findPaths(sources, targets, binSearch); + auto paths = allPaths(sources, targets, binSearch); timer.stop(); auto searchTime = timer.msecs(); @@ -271,15 +273,67 @@ std::span PathSearch::handleSearchSide( } // _____________________________________________________________________________ -std::vector PathSearch::findPaths(std::span sources, - std::span targets, - BinSearchWrapper& binSearch) const { - switch (config_.algorithm_) { - case PathSearchAlgorithm::ALL_PATHS: - return allPaths(sources, targets, binSearch); - default: - AD_FAIL(); +std::vector PathSearch::findPaths( + const Id source, const std::unordered_set& targets, + const BinSearchWrapper& binSearch) const { + std::forward_list edgeStack; + Path currentPath; + std::unordered_map> pathCache; + std::unordered_set visited; + + auto addToCache = + [](std::unordered_map>& pathCache, + const Path& path, size_t stopIndex) { + for (size_t i = 0; i < stopIndex; i++) { + const auto& edge = path.edges_[i]; + auto startIndex = edge.start_.getBits(); + pathCache.try_emplace(startIndex, std::vector()); + pathCache[startIndex].push_back(path.startingAt(i)); + } + }; + + visited.insert(source.getBits()); + for (auto edge : binSearch.outgoingEdes(source)) { + edgeStack.push_front(std::move(edge)); + } + + while (!edgeStack.empty()) { + auto edge = edgeStack.front(); + edgeStack.pop_front(); + + while (!currentPath.empty() && edge.start_ != currentPath.end()) { + currentPath.pop_back(); + } + + currentPath.push_back(edge); + + // TODO clean this up + if (pathCache.contains(edge.end_.getBits())) { + for (auto subPath : pathCache[edge.end_.getBits()]) { + if (subPath.first() == currentPath.first()) { + addToCache(pathCache, currentPath, currentPath.size()); + } else { + auto fullPath = currentPath.concat(subPath); + addToCache(pathCache, fullPath, currentPath.size()); + } + } + continue; + } + + if (targets.empty() || targets.contains(edge.end_.getBits())) { + addToCache(pathCache, currentPath, currentPath.size()); + } + + if (!visited.contains(edge.end_.getBits())) { + for (auto outgoingEdge : binSearch.outgoingEdes(edge.end_)) { + edgeStack.push_front(outgoingEdge); + } + } + + visited.insert(edge.end_.getBits()); } + + return pathCache[source.getBits()]; } // _____________________________________________________________________________ @@ -298,8 +352,7 @@ std::vector PathSearch::allPaths(std::span sources, sources = binSearch.getSources(); } for (auto source : sources) { - for (auto path : binSearch.findPaths(source, targetSet)) { - std::ranges::reverse(path.edges_); + for (auto path : findPaths(source, targetSet, binSearch)) { paths.push_back(path); } } diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 8ac50c389a..62cbe50091 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -61,10 +61,31 @@ struct Path { */ void push_back(const Edge& edge) { edges_.push_back(edge); } + void pop_back() { edges_.pop_back(); } + /** * @brief Reverses the order of the edges in the path. */ void reverse() { std::ranges::reverse(edges_); } + + Path concat(const Path& other) const { + Path path; + path.edges_ = edges_; + path.edges_.insert(path.edges_.end(), other.edges_.begin(), + other.edges_.end()); + return path; + } + + const Id& end() { return edges_.back().end_; } + const Id& first() { return edges_.front().start_; } + + Path startingAt(size_t index) const { + std::vector edges; + for (size_t i = index; i < edges_.size(); i++) { + edges.push_back(edges_[i]); + } + return Path{edges}; + } }; /** @@ -243,9 +264,9 @@ class PathSearch : public Operation { * @brief Finds paths based on the configured algorithm. * @return A vector of paths. */ - std::vector findPaths(std::span sources, - std::span targets, - BinSearchWrapper& binSearch) const; + std::vector findPaths(const Id source, + const std::unordered_set& targets, + const BinSearchWrapper& binSearch) const; /** * @brief Finds all paths in the graph. diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index a8b24a99e2..e5ea06481d 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -81,19 +81,18 @@ void PathQuery::addParameter(const SparqlTriple& triple) { throw PathSearchException("Predicates must be IRIs"); } - auto getVariable = [](std::string parameter, const TripleComponent& object){ + auto getVariable = [](std::string parameter, const TripleComponent& object) { if (!object.isVariable()) { throw PathSearchException("The value " + object.toString() + " for parameter '" + parameter + "' has to be a variable"); } - + return object.getVariable(); }; - auto setVariable = - [&](std::string parameter, const TripleComponent& object, - std::optional& existingValue) { + auto setVariable = [&](std::string parameter, const TripleComponent& object, + std::optional& existingValue) { auto variable = getVariable(parameter, object); if (existingValue.has_value()) { @@ -121,8 +120,7 @@ void PathQuery::addParameter(const SparqlTriple& triple) { } else if (predString.ends_with("edgeColumn>")) { setVariable("edgeColumn", object, edgeColumn_); } else if (predString.ends_with("edgeProperty>")) { - edgeProperties_.push_back( - getVariable("edgeProperty", object)); + edgeProperties_.push_back(getVariable("edgeProperty", object)); } else if (predString.ends_with("algorithm>")) { if (!object.isIri()) { throw PathSearchException("The 'algorithm' value has to be an Iri"); diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 752c75a2bb..7dc7413688 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -133,7 +133,6 @@ TEST(PathSearchTest, singlePathWithProperties) { ::testing::UnorderedElementsAreArray(expected)); } - /** * Graph: * 0 @@ -145,10 +144,10 @@ TEST(PathSearchTest, singlePathWithProperties) { TEST(PathSearchTest, twoPathsOneTarget) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 3}, {3, 2}}); auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0)}, - {V(1), V(2), I(0), I(1)}, - {V(0), V(3), I(1), I(0)}, - {V(3), V(2), I(1), I(1)}, + {V(0), V(3), I(0), I(0)}, + {V(3), V(2), I(0), I(1)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(2), I(1), I(1)}, }); std::vector sources{V(0)}; @@ -179,10 +178,10 @@ TEST(PathSearchTest, twoPathsOneTarget) { TEST(PathSearchTest, twoPathsTwoTargets) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 3}, {3, 4}}); auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0)}, - {V(1), V(2), I(0), I(1)}, - {V(0), V(3), I(1), I(0)}, - {V(3), V(4), I(1), I(1)}, + {V(0), V(3), I(0), I(0)}, + {V(3), V(4), I(0), I(1)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(2), I(1), I(1)}, }); std::vector sources{V(0)}; @@ -246,11 +245,11 @@ TEST(PathSearchTest, twoCycle) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}, {1, 3}, {3, 0}}); auto expected = makeIdTableFromVector({ {V(0), V(1), I(0), I(0)}, - {V(1), V(2), I(0), I(1)}, - {V(2), V(0), I(0), I(2)}, + {V(1), V(3), I(0), I(1)}, + {V(3), V(0), I(0), I(2)}, {V(0), V(1), I(1), I(0)}, - {V(1), V(3), I(1), I(1)}, - {V(3), V(0), I(1), I(2)}, + {V(1), V(2), I(1), I(1)}, + {V(2), V(0), I(1), I(2)}, }); std::vector sources{V(0)}; @@ -281,14 +280,14 @@ TEST(PathSearchTest, twoCycle) { TEST(PathSearchTest, allPaths) { auto sub = makeIdTableFromVector({{0, 1}, {0, 2}, {1, 3}, {2, 3}, {2, 4}}); auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0)}, - {V(0), V(1), I(1), I(0)}, - {V(1), V(3), I(1), I(1)}, + {V(0), V(2), I(0), I(0)}, + {V(0), V(2), I(1), I(0)}, + {V(2), V(4), I(1), I(1)}, {V(0), V(2), I(2), I(0)}, - {V(0), V(2), I(3), I(0)}, - {V(2), V(3), I(3), I(1)}, - {V(0), V(2), I(4), I(0)}, - {V(2), V(4), I(4), I(1)}, + {V(2), V(3), I(2), I(1)}, + {V(0), V(1), I(3), I(0)}, + {V(0), V(1), I(4), I(0)}, + {V(1), V(3), I(4), I(1)}, }); std::vector sources{V(0)}; @@ -315,14 +314,14 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { {2, 3, 40, 41}, {2, 4, 50, 51}}); auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0), V(11), V(10)}, - {V(0), V(1), I(1), I(0), V(11), V(10)}, - {V(1), V(3), I(1), I(1), V(21), V(20)}, + {V(0), V(2), I(0), I(0), V(31), V(30)}, + {V(0), V(2), I(1), I(0), V(31), V(30)}, + {V(2), V(4), I(1), I(1), V(51), V(50)}, {V(0), V(2), I(2), I(0), V(31), V(30)}, - {V(0), V(2), I(3), I(0), V(31), V(30)}, - {V(2), V(3), I(3), I(1), V(41), V(40)}, - {V(0), V(2), I(4), I(0), V(31), V(30)}, - {V(2), V(4), I(4), I(1), V(51), V(50)}, + {V(2), V(3), I(2), I(1), V(41), V(40)}, + {V(0), V(1), I(3), I(0), V(11), V(10)}, + {V(0), V(1), I(4), I(0), V(11), V(10)}, + {V(1), V(3), I(4), I(1), V(21), V(20)}, }); std::vector sources{V(0)}; @@ -344,7 +343,6 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { ::testing::UnorderedElementsAreArray(expected)); } - /** * Graph: * 0 -> 1 -> 2 -> 3 -> 4 @@ -390,17 +388,17 @@ TEST(PathSearchTest, singlePathWithIrrelevantNode) { * | * 5 */ -TEST(PathSearchTest, allPathsElongatedDiamond) { +TEST(PathSearchTest, elongatedDiamond) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 4}, {3, 4}, {4, 5}}); auto expected = makeIdTableFromVector({ {V(0), V(1), I(0), I(0)}, - {V(1), V(2), I(0), I(1)}, - {V(2), V(4), I(0), I(2)}, + {V(1), V(3), I(0), I(1)}, + {V(3), V(4), I(0), I(2)}, {V(4), V(5), I(0), I(3)}, {V(0), V(1), I(1), I(0)}, - {V(1), V(3), I(1), I(1)}, - {V(3), V(4), I(1), I(2)}, + {V(1), V(2), I(1), I(1)}, + {V(2), V(4), I(1), I(2)}, {V(4), V(5), I(1), I(3)}, }); @@ -434,16 +432,16 @@ TEST(PathSearchTest, multiSourceMultiTargetallPaths) { auto expected = makeIdTableFromVector({ {V(0), V(2), I(0), I(0)}, {V(2), V(3), I(0), I(1)}, - {V(3), V(4), I(0), I(2)}, + {V(3), V(5), I(0), I(2)}, {V(0), V(2), I(1), I(0)}, {V(2), V(3), I(1), I(1)}, - {V(3), V(5), I(1), I(2)}, + {V(3), V(4), I(1), I(2)}, {V(1), V(2), I(2), I(0)}, {V(2), V(3), I(2), I(1)}, - {V(3), V(4), I(2), I(2)}, + {V(3), V(5), I(2), I(2)}, {V(1), V(2), I(3), I(0)}, {V(2), V(3), I(3), I(1)}, - {V(3), V(5), I(3), I(2)}, + {V(3), V(4), I(3), I(2)}, }); std::vector sources{V(0), V(1)}; @@ -462,4 +460,3 @@ TEST(PathSearchTest, multiSourceMultiTargetallPaths) { ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } - From f952ed41968b035016b56f7fb4c80b2aed051d4c Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 13 Aug 2024 11:55:32 +0200 Subject: [PATCH 51/96] Switched path search implementation --- src/engine/PathSearch.cpp | 3 +- test/PathSearchTest.cpp | 104 +++++++++++++++++++++++++------------- 2 files changed, 72 insertions(+), 35 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index d5aaedf8a4..ce48fff7c4 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -352,7 +352,8 @@ std::vector PathSearch::allPaths(std::span sources, sources = binSearch.getSources(); } for (auto source : sources) { - for (auto path : findPaths(source, targetSet, binSearch)) { + for (auto path : binSearch.findPaths(source, targetSet)) { + path.reverse(); paths.push_back(path); } } diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 7dc7413688..70954b1cb6 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -133,6 +133,42 @@ TEST(PathSearchTest, singlePathWithProperties) { ::testing::UnorderedElementsAreArray(expected)); } +/** + * Graph: + * 0 + * /|\ + * / | \ + * 1 | 2 + * \ | / + * \|/ + * 3 + */ +// TEST(PathSearchTest, allPathsTriangle) { +// auto sub = makeIdTableFromVector({{0, 1}, {0, 2}, {0, 3}, {1, 3}, {1, 4}, {2, 3}, {2, 5}, {6, 0}}); +// auto expected = makeIdTableFromVector({ +// {V(0), V(1), I(0), I(0)}, +// {V(0), V(1), I(1), I(0)}, +// {V(0), V(1), I(2), I(0)}, +// {V(1), V(2), I(2), I(1)}, +// }); +// +// std::vector sources{V(6)}; +// std::vector targets{}; +// Vars vars = {Variable{"?start"}, Variable{"?end"}}; +// PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, +// sources, +// targets, +// Var{"?start"}, +// Var{"?end"}, +// Var{"?edgeIndex"}, +// Var{"?pathIndex"}, +// {}}; +// +// auto resultTable = performPathSearch(config, std::move(sub), vars); +// ASSERT_THAT(resultTable.idTable(), +// ::testing::UnorderedElementsAreArray(expected)); +// } + /** * Graph: * 0 @@ -144,10 +180,10 @@ TEST(PathSearchTest, singlePathWithProperties) { TEST(PathSearchTest, twoPathsOneTarget) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 3}, {3, 2}}); auto expected = makeIdTableFromVector({ - {V(0), V(3), I(0), I(0)}, - {V(3), V(2), I(0), I(1)}, - {V(0), V(1), I(1), I(0)}, - {V(1), V(2), I(1), I(1)}, + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(0), V(3), I(1), I(0)}, + {V(3), V(2), I(1), I(1)}, }); std::vector sources{V(0)}; @@ -178,10 +214,10 @@ TEST(PathSearchTest, twoPathsOneTarget) { TEST(PathSearchTest, twoPathsTwoTargets) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 3}, {3, 4}}); auto expected = makeIdTableFromVector({ - {V(0), V(3), I(0), I(0)}, - {V(3), V(4), I(0), I(1)}, - {V(0), V(1), I(1), I(0)}, - {V(1), V(2), I(1), I(1)}, + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(0), V(3), I(1), I(0)}, + {V(3), V(4), I(1), I(1)}, }); std::vector sources{V(0)}; @@ -245,11 +281,11 @@ TEST(PathSearchTest, twoCycle) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}, {1, 3}, {3, 0}}); auto expected = makeIdTableFromVector({ {V(0), V(1), I(0), I(0)}, - {V(1), V(3), I(0), I(1)}, - {V(3), V(0), I(0), I(2)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(0), I(0), I(2)}, {V(0), V(1), I(1), I(0)}, - {V(1), V(2), I(1), I(1)}, - {V(2), V(0), I(1), I(2)}, + {V(1), V(3), I(1), I(1)}, + {V(3), V(0), I(1), I(2)}, }); std::vector sources{V(0)}; @@ -280,14 +316,14 @@ TEST(PathSearchTest, twoCycle) { TEST(PathSearchTest, allPaths) { auto sub = makeIdTableFromVector({{0, 1}, {0, 2}, {1, 3}, {2, 3}, {2, 4}}); auto expected = makeIdTableFromVector({ - {V(0), V(2), I(0), I(0)}, - {V(0), V(2), I(1), I(0)}, - {V(2), V(4), I(1), I(1)}, + {V(0), V(1), I(0), I(0)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(3), I(1), I(1)}, {V(0), V(2), I(2), I(0)}, - {V(2), V(3), I(2), I(1)}, - {V(0), V(1), I(3), I(0)}, - {V(0), V(1), I(4), I(0)}, - {V(1), V(3), I(4), I(1)}, + {V(0), V(2), I(3), I(0)}, + {V(2), V(3), I(3), I(1)}, + {V(0), V(2), I(4), I(0)}, + {V(2), V(4), I(4), I(1)}, }); std::vector sources{V(0)}; @@ -314,14 +350,14 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { {2, 3, 40, 41}, {2, 4, 50, 51}}); auto expected = makeIdTableFromVector({ - {V(0), V(2), I(0), I(0), V(31), V(30)}, - {V(0), V(2), I(1), I(0), V(31), V(30)}, - {V(2), V(4), I(1), I(1), V(51), V(50)}, + {V(0), V(1), I(0), I(0), V(11), V(10)}, + {V(0), V(1), I(1), I(0), V(11), V(10)}, + {V(1), V(3), I(1), I(1), V(21), V(20)}, {V(0), V(2), I(2), I(0), V(31), V(30)}, - {V(2), V(3), I(2), I(1), V(41), V(40)}, - {V(0), V(1), I(3), I(0), V(11), V(10)}, - {V(0), V(1), I(4), I(0), V(11), V(10)}, - {V(1), V(3), I(4), I(1), V(21), V(20)}, + {V(0), V(2), I(3), I(0), V(31), V(30)}, + {V(2), V(3), I(3), I(1), V(41), V(40)}, + {V(0), V(2), I(4), I(0), V(31), V(30)}, + {V(2), V(4), I(4), I(1), V(51), V(50)}, }); std::vector sources{V(0)}; @@ -393,12 +429,12 @@ TEST(PathSearchTest, elongatedDiamond) { makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 4}, {3, 4}, {4, 5}}); auto expected = makeIdTableFromVector({ {V(0), V(1), I(0), I(0)}, - {V(1), V(3), I(0), I(1)}, - {V(3), V(4), I(0), I(2)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(4), I(0), I(2)}, {V(4), V(5), I(0), I(3)}, {V(0), V(1), I(1), I(0)}, - {V(1), V(2), I(1), I(1)}, - {V(2), V(4), I(1), I(2)}, + {V(1), V(3), I(1), I(1)}, + {V(3), V(4), I(1), I(2)}, {V(4), V(5), I(1), I(3)}, }); @@ -432,16 +468,16 @@ TEST(PathSearchTest, multiSourceMultiTargetallPaths) { auto expected = makeIdTableFromVector({ {V(0), V(2), I(0), I(0)}, {V(2), V(3), I(0), I(1)}, - {V(3), V(5), I(0), I(2)}, + {V(3), V(4), I(0), I(2)}, {V(0), V(2), I(1), I(0)}, {V(2), V(3), I(1), I(1)}, - {V(3), V(4), I(1), I(2)}, + {V(3), V(5), I(1), I(2)}, {V(1), V(2), I(2), I(0)}, {V(2), V(3), I(2), I(1)}, - {V(3), V(5), I(2), I(2)}, + {V(3), V(4), I(2), I(2)}, {V(1), V(2), I(3), I(0)}, {V(2), V(3), I(3), I(1)}, - {V(3), V(4), I(3), I(2)}, + {V(3), V(5), I(3), I(2)}, }); std::vector sources{V(0), V(1)}; From f80694b559f5766ad18fc7ead716e24b9a120d28 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Sat, 7 Sep 2024 07:49:00 +0200 Subject: [PATCH 52/96] Revert "Switched path search implementation" This reverts commit f952ed41968b035016b56f7fb4c80b2aed051d4c. --- src/engine/PathSearch.cpp | 3 +- test/PathSearchTest.cpp | 104 +++++++++++++------------------------- 2 files changed, 35 insertions(+), 72 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index ce48fff7c4..d5aaedf8a4 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -352,8 +352,7 @@ std::vector PathSearch::allPaths(std::span sources, sources = binSearch.getSources(); } for (auto source : sources) { - for (auto path : binSearch.findPaths(source, targetSet)) { - path.reverse(); + for (auto path : findPaths(source, targetSet, binSearch)) { paths.push_back(path); } } diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 70954b1cb6..7dc7413688 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -133,42 +133,6 @@ TEST(PathSearchTest, singlePathWithProperties) { ::testing::UnorderedElementsAreArray(expected)); } -/** - * Graph: - * 0 - * /|\ - * / | \ - * 1 | 2 - * \ | / - * \|/ - * 3 - */ -// TEST(PathSearchTest, allPathsTriangle) { -// auto sub = makeIdTableFromVector({{0, 1}, {0, 2}, {0, 3}, {1, 3}, {1, 4}, {2, 3}, {2, 5}, {6, 0}}); -// auto expected = makeIdTableFromVector({ -// {V(0), V(1), I(0), I(0)}, -// {V(0), V(1), I(1), I(0)}, -// {V(0), V(1), I(2), I(0)}, -// {V(1), V(2), I(2), I(1)}, -// }); -// -// std::vector sources{V(6)}; -// std::vector targets{}; -// Vars vars = {Variable{"?start"}, Variable{"?end"}}; -// PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, -// sources, -// targets, -// Var{"?start"}, -// Var{"?end"}, -// Var{"?edgeIndex"}, -// Var{"?pathIndex"}, -// {}}; -// -// auto resultTable = performPathSearch(config, std::move(sub), vars); -// ASSERT_THAT(resultTable.idTable(), -// ::testing::UnorderedElementsAreArray(expected)); -// } - /** * Graph: * 0 @@ -180,10 +144,10 @@ TEST(PathSearchTest, singlePathWithProperties) { TEST(PathSearchTest, twoPathsOneTarget) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 3}, {3, 2}}); auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0)}, - {V(1), V(2), I(0), I(1)}, - {V(0), V(3), I(1), I(0)}, - {V(3), V(2), I(1), I(1)}, + {V(0), V(3), I(0), I(0)}, + {V(3), V(2), I(0), I(1)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(2), I(1), I(1)}, }); std::vector sources{V(0)}; @@ -214,10 +178,10 @@ TEST(PathSearchTest, twoPathsOneTarget) { TEST(PathSearchTest, twoPathsTwoTargets) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 3}, {3, 4}}); auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0)}, - {V(1), V(2), I(0), I(1)}, - {V(0), V(3), I(1), I(0)}, - {V(3), V(4), I(1), I(1)}, + {V(0), V(3), I(0), I(0)}, + {V(3), V(4), I(0), I(1)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(2), I(1), I(1)}, }); std::vector sources{V(0)}; @@ -281,11 +245,11 @@ TEST(PathSearchTest, twoCycle) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}, {1, 3}, {3, 0}}); auto expected = makeIdTableFromVector({ {V(0), V(1), I(0), I(0)}, - {V(1), V(2), I(0), I(1)}, - {V(2), V(0), I(0), I(2)}, + {V(1), V(3), I(0), I(1)}, + {V(3), V(0), I(0), I(2)}, {V(0), V(1), I(1), I(0)}, - {V(1), V(3), I(1), I(1)}, - {V(3), V(0), I(1), I(2)}, + {V(1), V(2), I(1), I(1)}, + {V(2), V(0), I(1), I(2)}, }); std::vector sources{V(0)}; @@ -316,14 +280,14 @@ TEST(PathSearchTest, twoCycle) { TEST(PathSearchTest, allPaths) { auto sub = makeIdTableFromVector({{0, 1}, {0, 2}, {1, 3}, {2, 3}, {2, 4}}); auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0)}, - {V(0), V(1), I(1), I(0)}, - {V(1), V(3), I(1), I(1)}, + {V(0), V(2), I(0), I(0)}, + {V(0), V(2), I(1), I(0)}, + {V(2), V(4), I(1), I(1)}, {V(0), V(2), I(2), I(0)}, - {V(0), V(2), I(3), I(0)}, - {V(2), V(3), I(3), I(1)}, - {V(0), V(2), I(4), I(0)}, - {V(2), V(4), I(4), I(1)}, + {V(2), V(3), I(2), I(1)}, + {V(0), V(1), I(3), I(0)}, + {V(0), V(1), I(4), I(0)}, + {V(1), V(3), I(4), I(1)}, }); std::vector sources{V(0)}; @@ -350,14 +314,14 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { {2, 3, 40, 41}, {2, 4, 50, 51}}); auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0), V(11), V(10)}, - {V(0), V(1), I(1), I(0), V(11), V(10)}, - {V(1), V(3), I(1), I(1), V(21), V(20)}, + {V(0), V(2), I(0), I(0), V(31), V(30)}, + {V(0), V(2), I(1), I(0), V(31), V(30)}, + {V(2), V(4), I(1), I(1), V(51), V(50)}, {V(0), V(2), I(2), I(0), V(31), V(30)}, - {V(0), V(2), I(3), I(0), V(31), V(30)}, - {V(2), V(3), I(3), I(1), V(41), V(40)}, - {V(0), V(2), I(4), I(0), V(31), V(30)}, - {V(2), V(4), I(4), I(1), V(51), V(50)}, + {V(2), V(3), I(2), I(1), V(41), V(40)}, + {V(0), V(1), I(3), I(0), V(11), V(10)}, + {V(0), V(1), I(4), I(0), V(11), V(10)}, + {V(1), V(3), I(4), I(1), V(21), V(20)}, }); std::vector sources{V(0)}; @@ -429,12 +393,12 @@ TEST(PathSearchTest, elongatedDiamond) { makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 4}, {3, 4}, {4, 5}}); auto expected = makeIdTableFromVector({ {V(0), V(1), I(0), I(0)}, - {V(1), V(2), I(0), I(1)}, - {V(2), V(4), I(0), I(2)}, + {V(1), V(3), I(0), I(1)}, + {V(3), V(4), I(0), I(2)}, {V(4), V(5), I(0), I(3)}, {V(0), V(1), I(1), I(0)}, - {V(1), V(3), I(1), I(1)}, - {V(3), V(4), I(1), I(2)}, + {V(1), V(2), I(1), I(1)}, + {V(2), V(4), I(1), I(2)}, {V(4), V(5), I(1), I(3)}, }); @@ -468,16 +432,16 @@ TEST(PathSearchTest, multiSourceMultiTargetallPaths) { auto expected = makeIdTableFromVector({ {V(0), V(2), I(0), I(0)}, {V(2), V(3), I(0), I(1)}, - {V(3), V(4), I(0), I(2)}, + {V(3), V(5), I(0), I(2)}, {V(0), V(2), I(1), I(0)}, {V(2), V(3), I(1), I(1)}, - {V(3), V(5), I(1), I(2)}, + {V(3), V(4), I(1), I(2)}, {V(1), V(2), I(2), I(0)}, {V(2), V(3), I(2), I(1)}, - {V(3), V(4), I(2), I(2)}, + {V(3), V(5), I(2), I(2)}, {V(1), V(2), I(3), I(0)}, {V(2), V(3), I(3), I(1)}, - {V(3), V(5), I(3), I(2)}, + {V(3), V(4), I(3), I(2)}, }); std::vector sources{V(0), V(1)}; From c3fd4cda61f5fb77536ae1158cf47d8e4eb80d32 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Sat, 7 Sep 2024 08:08:24 +0200 Subject: [PATCH 53/96] Fixed iterative PathSearch --- src/engine/PathSearch.cpp | 77 ++++++++------------------------------- src/engine/PathSearch.h | 4 -- test/PathSearchTest.cpp | 56 ++++++++++++++++++++++++---- 3 files changed, 63 insertions(+), 74 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index d5aaedf8a4..de422f5d9f 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -4,6 +4,7 @@ #include "PathSearch.h" +#include #include #include "engine/CallFixedSize.h" @@ -33,33 +34,6 @@ std::vector BinSearchWrapper::outgoingEdes(const Id node) const { return edges; } -std::vector BinSearchWrapper::findPaths( - const Id& source, const std::unordered_set& targets) { - if (pathCache_.contains(source.getBits())) { - return pathCache_[source.getBits()]; - } - pathCache_[source.getBits()] = {}; - std::vector paths; - - auto edges = outgoingEdes(source); - for (const auto& edge : edges) { - if (targets.contains(edge.end_.getBits()) || targets.empty()) { - Path path; - path.push_back(edge); - paths.push_back(std::move(path)); - } - auto partialPaths = findPaths(edge.end_, targets); - for (auto path : partialPaths) { - path.push_back(edge); - paths.push_back(std::move(path)); - } - } - - pathCache_[source.getBits()].insert(pathCache_[source.getBits()].end(), - paths.begin(), paths.end()); - return paths; -} - // _____________________________________________________________________________ std::span BinSearchWrapper::getSources() const { auto startIds = table_.getColumn(startCol_); @@ -276,64 +250,42 @@ std::span PathSearch::handleSearchSide( std::vector PathSearch::findPaths( const Id source, const std::unordered_set& targets, const BinSearchWrapper& binSearch) const { - std::forward_list edgeStack; + std::vector edgeStack; Path currentPath; std::unordered_map> pathCache; + std::vector result; std::unordered_set visited; - auto addToCache = - [](std::unordered_map>& pathCache, - const Path& path, size_t stopIndex) { - for (size_t i = 0; i < stopIndex; i++) { - const auto& edge = path.edges_[i]; - auto startIndex = edge.start_.getBits(); - pathCache.try_emplace(startIndex, std::vector()); - pathCache[startIndex].push_back(path.startingAt(i)); - } - }; - visited.insert(source.getBits()); for (auto edge : binSearch.outgoingEdes(source)) { - edgeStack.push_front(std::move(edge)); + edgeStack.push_back(std::move(edge)); } while (!edgeStack.empty()) { - auto edge = edgeStack.front(); - edgeStack.pop_front(); + auto edge = edgeStack.back(); + edgeStack.pop_back(); + + visited.insert(edge.end_.getBits()); while (!currentPath.empty() && edge.start_ != currentPath.end()) { + visited.erase(currentPath.end().getBits()); currentPath.pop_back(); } currentPath.push_back(edge); - // TODO clean this up - if (pathCache.contains(edge.end_.getBits())) { - for (auto subPath : pathCache[edge.end_.getBits()]) { - if (subPath.first() == currentPath.first()) { - addToCache(pathCache, currentPath, currentPath.size()); - } else { - auto fullPath = currentPath.concat(subPath); - addToCache(pathCache, fullPath, currentPath.size()); - } - } - continue; - } - if (targets.empty() || targets.contains(edge.end_.getBits())) { - addToCache(pathCache, currentPath, currentPath.size()); + result.push_back(currentPath); } - if (!visited.contains(edge.end_.getBits())) { - for (auto outgoingEdge : binSearch.outgoingEdes(edge.end_)) { - edgeStack.push_front(outgoingEdge); + for (auto outgoingEdge : binSearch.outgoingEdes(edge.end_)) { + if (!visited.contains(outgoingEdge.end_.getBits())){ + edgeStack.push_back(outgoingEdge); } } - - visited.insert(edge.end_.getBits()); } - return pathCache[source.getBits()]; + return result; } // _____________________________________________________________________________ @@ -353,6 +305,7 @@ std::vector PathSearch::allPaths(std::span sources, } for (auto source : sources) { for (auto path : findPaths(source, targetSet, binSearch)) { + // path.reverse(); paths.push_back(path); } } diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 62cbe50091..4392f160e2 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -157,7 +157,6 @@ class BinSearchWrapper { size_t startCol_; size_t endCol_; std::vector edgeCols_; - std::unordered_map> pathCache_; public: BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, @@ -165,9 +164,6 @@ class BinSearchWrapper { std::vector outgoingEdes(const Id node) const; - std::vector findPaths(const Id& source, - const std::unordered_set& targets); - std::span getSources() const; private: diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 7dc7413688..fb9af52d69 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -213,15 +213,16 @@ TEST(PathSearchTest, cycle) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}}); auto expected = makeIdTableFromVector({ {V(0), V(1), I(0), I(0)}, - {V(1), V(2), I(0), I(1)}, - {V(2), V(0), I(0), I(2)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(2), I(1), I(1)}, }); std::vector sources{V(0)}; + std::vector targets{}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, sources, - sources, + targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, @@ -245,18 +246,18 @@ TEST(PathSearchTest, twoCycle) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}, {1, 3}, {3, 0}}); auto expected = makeIdTableFromVector({ {V(0), V(1), I(0), I(0)}, - {V(1), V(3), I(0), I(1)}, - {V(3), V(0), I(0), I(2)}, {V(0), V(1), I(1), I(0)}, - {V(1), V(2), I(1), I(1)}, - {V(2), V(0), I(1), I(2)}, + {V(1), V(3), I(1), I(1)}, + {V(0), V(1), I(2), I(0)}, + {V(1), V(2), I(2), I(1)}, }); std::vector sources{V(0)}; + std::vector targets{}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, sources, - sources, + targets, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, @@ -343,6 +344,45 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { ::testing::UnorderedElementsAreArray(expected)); } +/** + * Graph: + * + * 0 + * |\ + * | \ + * 1->2->3 + */ +TEST(PathSearchTest, allPathsPartial) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 2}, {2, 3}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0)}, + {V(0), V(2), I(1), I(0)}, + {V(2), V(3), I(1), I(1)}, + {V(0), V(1), I(2), I(0)}, + {V(0), V(1), I(3), I(0)}, + {V(1), V(2), I(3), I(1)}, + {V(0), V(1), I(4), I(0)}, + {V(1), V(2), I(4), I(1)}, + {V(2), V(3), I(4), I(2)}, + }); + + std::vector sources{V(0)}; + std::vector targets{}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + /** * Graph: * 0 -> 1 -> 2 -> 3 -> 4 From 18271538f7f6f2a3839f5f94d6b5fe2763d83e4a Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Sat, 7 Sep 2024 08:29:38 +0200 Subject: [PATCH 54/96] Added namespace pathSearch --- src/engine/PathSearch.cpp | 2 + src/engine/PathSearch.h | 153 +++++++++++++++++++------------------- 2 files changed, 78 insertions(+), 77 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index de422f5d9f..7195d6e23c 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -11,6 +11,8 @@ #include "engine/QueryExecutionTree.h" #include "engine/VariableToColumnMap.h" +using namespace pathSearch; + // _____________________________________________________________________________ BinSearchWrapper::BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, std::vector edgeCols) diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 4392f160e2..c13c9d86c8 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -5,88 +5,105 @@ #pragma once #include -#include #include -#include -#include #include #include #include "engine/Operation.h" -#include "engine/VariableToColumnMap.h" #include "global/Id.h" -#include "index/Vocabulary.h" enum class PathSearchAlgorithm { ALL_PATHS }; using TreeAndCol = std::pair, size_t>; using SearchSide = std::variant>; -/** - * @brief Represents an edge in the graph. - */ -struct Edge { - // The starting node ID. - Id start_; - - // The ending node ID. - Id end_; +namespace pathSearch { + /** + * @brief Represents an edge in the graph. + */ + struct Edge { + // The starting node ID. + Id start_; - // Properties associated with the edge. - std::vector edgeProperties_; -}; + // The ending node ID. + Id end_; -/** - * @brief Represents a path consisting of multiple edges. - */ -struct Path { - // The edges that make up the path. - std::vector edges_; + // Properties associated with the edge. + std::vector edgeProperties_; + }; /** - * @brief Checks if the path is empty. - * @return True if the path is empty, false otherwise. + * @brief Represents a path consisting of multiple edges. */ - bool empty() const { return edges_.empty(); } + struct Path { + // The edges that make up the path. + std::vector edges_; + + /** + * @brief Checks if the path is empty. + * @return True if the path is empty, false otherwise. + */ + bool empty() const { return edges_.empty(); } + + /** + * @brief Returns the number of edges in the path. + * @return The number of edges in the path. + */ + size_t size() const { return edges_.size(); } + + /** + * @brief Adds an edge to the end of the path. + * @param edge The edge to add. + */ + void push_back(const Edge& edge) { edges_.push_back(edge); } + + void pop_back() { edges_.pop_back(); } + + /** + * @brief Reverses the order of the edges in the path. + */ + void reverse() { std::ranges::reverse(edges_); } + + Path concat(const Path& other) const { + Path path; + path.edges_ = edges_; + path.edges_.insert(path.edges_.end(), other.edges_.begin(), + other.edges_.end()); + return path; + } - /** - * @brief Returns the number of edges in the path. - * @return The number of edges in the path. - */ - size_t size() const { return edges_.size(); } + const Id& end() { return edges_.back().end_; } + const Id& first() { return edges_.front().start_; } - /** - * @brief Adds an edge to the end of the path. - * @param edge The edge to add. - */ - void push_back(const Edge& edge) { edges_.push_back(edge); } + Path startingAt(size_t index) const { + std::vector edges; + for (size_t i = index; i < edges_.size(); i++) { + edges.push_back(edges_[i]); + } + return Path{edges}; + } + }; - void pop_back() { edges_.pop_back(); } + class BinSearchWrapper { + const IdTable& table_; + size_t startCol_; + size_t endCol_; + std::vector edgeCols_; - /** - * @brief Reverses the order of the edges in the path. - */ - void reverse() { std::ranges::reverse(edges_); } - - Path concat(const Path& other) const { - Path path; - path.edges_ = edges_; - path.edges_.insert(path.edges_.end(), other.edges_.begin(), - other.edges_.end()); - return path; - } + public: + BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, + std::vector edgeCols); - const Id& end() { return edges_.back().end_; } - const Id& first() { return edges_.front().start_; } + std::vector outgoingEdes(const Id node) const; - Path startingAt(size_t index) const { - std::vector edges; - for (size_t i = index; i < edges_.size(); i++) { - edges.push_back(edges_[i]); - } - return Path{edges}; - } -}; + std::span getSources() const; + + private: + Edge makeEdgeFromRow(size_t row) const; + }; +} + +using namespace pathSearch; /** * @brief Struct to hold configuration parameters for the path search. @@ -152,24 +169,6 @@ struct PathSearchConfiguration { } }; -class BinSearchWrapper { - const IdTable& table_; - size_t startCol_; - size_t endCol_; - std::vector edgeCols_; - - public: - BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, - std::vector edgeCols); - - std::vector outgoingEdes(const Id node) const; - - std::span getSources() const; - - private: - Edge makeEdgeFromRow(size_t row) const; -}; - /** * @brief Class to perform various path search algorithms on a graph. */ From a6a8957343311e09fd813b0b8f9a1837bbc142d7 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Sat, 7 Sep 2024 08:32:13 +0200 Subject: [PATCH 55/96] Use already started timer --- src/engine/PathSearch.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 7195d6e23c..2e604f3e99 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -181,8 +181,7 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { const IdTable& dynSub = subRes->idTable(); if (!dynSub.empty()) { - auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); - timer.start(); + auto timer = ad_utility::Timer(ad_utility::Timer::Started); auto subStartColumn = subtree_->getVariableColumn(config_.start_); auto subEndColumn = subtree_->getVariableColumn(config_.end_); From 71d891dc193c6f0cb90f78a2fee558fe4d7e9198 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 18 Sep 2024 14:54:28 +0200 Subject: [PATCH 56/96] Added bound sides to children --- src/engine/PathSearch.cpp | 18 +++++++++++++++++- test/QueryPlannerTest.cpp | 6 +++--- test/QueryPlannerTestHelpers.h | 6 ++++++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 2e604f3e99..18de0808a9 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -114,6 +114,15 @@ PathSearch::PathSearch(QueryExecutionContext* qec, std::vector PathSearch::getChildren() { std::vector res; res.push_back(subtree_.get()); + + if (boundSources_.has_value()) { + res.push_back(boundSources_->first.get()); + } + + if (boundTargets_.has_value()) { + res.push_back(boundTargets_->first.get()); + } + return res; }; @@ -156,7 +165,14 @@ float PathSearch::getMultiplicity(size_t col) { }; // _____________________________________________________________________________ -bool PathSearch::knownEmptyResult() { return subtree_->knownEmptyResult(); }; +bool PathSearch::knownEmptyResult() { + for (auto child: getChildren()) { + if (child->knownEmptyResult()) { + return true; + } + } + return false; +}; // _____________________________________________________________________________ vector PathSearch::resultSortedOn() const { return {}; }; diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 738f310297..a22fc31d31 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -957,7 +957,7 @@ TEST(QueryPlanner, PathSearchSourceBound) { "{SELECT * WHERE {" "?start

?end." "}}}}", - h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); + h::PathSearch(config, true, true, scan("?start", "

", "?end"), h::ValuesClause("VALUES (?source) { () }")), qec); } TEST(QueryPlanner, PathSearchTargetBound) { @@ -990,7 +990,7 @@ TEST(QueryPlanner, PathSearchTargetBound) { "{SELECT * WHERE {" "?start

?end." "}}}}", - h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); + h::PathSearch(config, true, true, scan("?start", "

", "?end"), h::ValuesClause("VALUES (?target) { () }")), qec); } TEST(QueryPlanner, PathSearchBothBound) { @@ -1023,7 +1023,7 @@ TEST(QueryPlanner, PathSearchBothBound) { "{SELECT * WHERE {" "?start

?end." "}}}}", - h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); + h::PathSearch(config, true, true, scan("?start", "

", "?end"), h::ValuesClause("VALUES (?source\t?target) { ( ) }"), h::ValuesClause("VALUES (?source\t?target) { ( ) }")), qec); } // __________________________________________________________________________ diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index a206d74a00..da483fa644 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -304,6 +304,12 @@ inline auto PathSearch = AD_PROPERTY(PathSearch, isTargetBound, Eq(targetBound)))); }; +inline auto ValuesClause = [](string cacheKey){ + return RootOperation<::Values>(AllOf( + AD_PROPERTY(Values, getCacheKey, cacheKey) + )); +}; + // Match a sort operation. Currently, this is only required by the binary search // version of the transitive path operation. This matcher checks only the // children of the sort operation. From 0af23f03ee5920306c9510c2ad86233eaa6ed56f Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 20 Sep 2024 09:47:56 +0200 Subject: [PATCH 57/96] Refactored search side handling --- src/engine/PathSearch.cpp | 70 +++++++++++++++++++++++++++------------ src/engine/PathSearch.h | 18 ++++++---- test/QueryPlannerTest.cpp | 34 +++++++++++++++++++ 3 files changed, 94 insertions(+), 28 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 18de0808a9..05a5c33cf1 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -115,12 +115,16 @@ std::vector PathSearch::getChildren() { std::vector res; res.push_back(subtree_.get()); - if (boundSources_.has_value()) { - res.push_back(boundSources_->first.get()); - } + if (sourceAndTargetTree_.has_value()) { + res.push_back(sourceAndTargetTree_.value().get()); + } else { + if (sourceTree_.has_value()) { + res.push_back(sourceTree_.value().get()); + } - if (boundTargets_.has_value()) { - res.push_back(boundTargets_->first.get()); + if (targetTree_.has_value()) { + res.push_back(targetTree_.value().get()); + } } return res; @@ -180,13 +184,23 @@ vector PathSearch::resultSortedOn() const { return {}; }; // _____________________________________________________________________________ void PathSearch::bindSourceSide(std::shared_ptr sourcesOp, size_t inputCol) { - boundSources_ = {sourcesOp, inputCol}; + sourceTree_ = sourcesOp; + sourceCol_ = inputCol; } // _____________________________________________________________________________ void PathSearch::bindTargetSide(std::shared_ptr targetsOp, size_t inputCol) { - boundTargets_ = {targetsOp, inputCol}; + targetTree_ = targetsOp; + targetCol_ = inputCol; +} + +// _____________________________________________________________________________ +void PathSearch::bindSourceAndTargetSide(std::shared_ptr sourceAndTargetOp, + size_t sourceCol, size_t targetCol) { + sourceAndTargetTree_ = sourceAndTargetOp; + sourceCol_ = sourceCol; + targetCol_ = targetCol; } // _____________________________________________________________________________ @@ -212,10 +226,7 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto buildingTime = timer.msecs(); timer.start(); - std::span sources = - handleSearchSide(config_.sources_, boundSources_); - std::span targets = - handleSearchSide(config_.targets_, boundTargets_); + auto [sources, targets] = handleSearchSides(); timer.stop(); auto sideTime = timer.msecs(); @@ -249,18 +260,35 @@ VariableToColumnMap PathSearch::computeVariableToColumnMap() const { return variableColumns_; }; -std::span PathSearch::handleSearchSide( - const SearchSide& side, const std::optional& binding) const { - std::span ids; - bool isVariable = std::holds_alternative(side); - if (isVariable && binding.has_value()) { - ids = binding->first->getResult()->idTable().getColumn(binding->second); - } else if (isVariable) { - return {}; +// _____________________________________________________________________________ +std::pair, std::span> PathSearch::handleSearchSides() const { + std::span sourceIds; + std::span targetIds; + + if (sourceAndTargetTree_.has_value()) { + auto resultTable = sourceAndTargetTree_.value()->getResult(); + sourceIds = resultTable->idTable().getColumn(sourceCol_.value()); + targetIds = resultTable->idTable().getColumn(targetCol_.value()); + return {sourceIds, targetIds}; + } + + if (sourceTree_.has_value()) { + sourceIds = sourceTree_.value()->getResult()->idTable().getColumn(sourceCol_.value()); + } else if (config_.sourceIsVariable()) { + sourceIds = {}; + } else { + sourceIds = std::get>(config_.sources_); + } + + if (targetTree_.has_value()) { + targetIds = targetTree_.value()->getResult()->idTable().getColumn(targetCol_.value()); + } else if (config_.targetIsVariable()) { + targetIds = {}; } else { - ids = std::get>(side); + targetIds = std::get>(config_.targets_); } - return ids; + + return {sourceIds, targetIds}; } // _____________________________________________________________________________ diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index c13c9d86c8..411796cafa 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -14,7 +14,6 @@ enum class PathSearchAlgorithm { ALL_PATHS }; -using TreeAndCol = std::pair, size_t>; using SearchSide = std::variant>; namespace pathSearch { @@ -180,8 +179,12 @@ class PathSearch : public Operation { // Configuration for the path search. PathSearchConfiguration config_; - std::optional boundSources_; - std::optional boundTargets_; + std::optional sourceCol_; + std::optional targetCol_; + + std::optional> sourceTree_; + std::optional> targetTree_; + std::optional> sourceAndTargetTree_; public: PathSearch(QueryExecutionContext* qec, @@ -222,12 +225,14 @@ class PathSearch : public Operation { void bindTargetSide(std::shared_ptr targetsOp, size_t inputCol); + void bindSourceAndTargetSide(std::shared_ptr sourceAndTargetOp, size_t sourceCol, size_t targetCol); + bool isSourceBound() const { - return boundSources_.has_value() || !config_.sourceIsVariable(); + return sourceTree_.has_value() || sourceAndTargetTree_.has_value() || !config_.sourceIsVariable(); } bool isTargetBound() const { - return boundTargets_.has_value() || !config_.targetIsVariable(); + return targetTree_.has_value() || sourceAndTargetTree_.has_value() || !config_.targetIsVariable(); } std::optional getSourceColumn() const { @@ -252,8 +257,7 @@ class PathSearch : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; private: - std::span handleSearchSide( - const SearchSide& side, const std::optional& binding) const; + std::pair, std::span> handleSearchSides() const; /** * @brief Finds paths based on the configured algorithm. diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index a22fc31d31..656ac5cb90 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -1026,6 +1026,40 @@ TEST(QueryPlanner, PathSearchBothBound) { h::PathSearch(config, true, true, scan("?start", "

", "?end"), h::ValuesClause("VALUES (?source\t?target) { ( ) }"), h::ValuesClause("VALUES (?source\t?target) { ( ) }")), qec); } +TEST(QueryPlanner, PathSearchBothBoundIndividually) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + Variable sources{"?source"}; + Variable targets{"?target"}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES (?source) {()}" + "VALUES (?target) {()}" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source ;" + "pathSearch:target ?target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end"), h::ValuesClause("VALUES (?source) { () }"), h::ValuesClause("VALUES (?target) { () }")), qec); +} + // __________________________________________________________________________ TEST(QueryPlanner, PathSearchMissingStart) { auto qec = ad_utility::testing::getQec("

.

"); From 1b3efa66c7db202ba534ba51b90e25b23a0c771b Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 20 Sep 2024 10:47:37 +0200 Subject: [PATCH 58/96] fixed create join with two columns at once --- src/engine/QueryPlanner.cpp | 29 +++++++++++++++++++++++++++-- test/QueryPlannerTest.cpp | 2 +- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 208ea79e55..4abb1dfe17 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1839,7 +1839,30 @@ auto QueryPlanner::createJoinWithPathSearch( auto sourceColumn = pathSearch->getSourceColumn(); auto targetColumn = pathSearch->getTargetColumn(); - for (auto jc : jcs) { + if (jcs.size() == 2) { + // To join source and target, both must be variables + if (!sourceColumn || !targetColumn) { + return std::nullopt; + } + + auto firstJc = jcs[0]; + auto firstCol = aRootOp ? firstJc[0] : firstJc[1]; + auto firstOtherCol = aRootOp ? firstJc[1]: firstJc[0]; + + auto secondJc = jcs[1]; + auto secondCol = aRootOp ? secondJc[0] : secondJc[1]; + auto secondOtherCol = aRootOp ? secondJc[1]: secondJc[0]; + + if (sourceColumn == firstCol && targetColumn == secondCol) { + pathSearch->bindSourceAndTargetSide( + sibling._qet, firstOtherCol, secondOtherCol); + } else if (sourceColumn == secondCol && targetColumn == firstCol) { + pathSearch->bindSourceAndTargetSide(sibling._qet, secondOtherCol, firstOtherCol); + } else { + return std::nullopt; + } + } else if (jcs.size() == 1) { + auto jc = jcs[0]; const size_t thisCol = aRootOp ? jc[0] : jc[1]; const size_t otherCol = aRootOp ? jc[1] : jc[0]; @@ -1849,7 +1872,9 @@ auto QueryPlanner::createJoinWithPathSearch( } else if (targetColumn && targetColumn == thisCol && !pathSearch->isTargetBound()) { pathSearch->bindTargetSide(sibling._qet, otherCol); - } + } + } else { + return std::nullopt; } SubtreePlan plan = makeSubtreePlan(pathSearch); diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 656ac5cb90..e9372a4121 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -1023,7 +1023,7 @@ TEST(QueryPlanner, PathSearchBothBound) { "{SELECT * WHERE {" "?start

?end." "}}}}", - h::PathSearch(config, true, true, scan("?start", "

", "?end"), h::ValuesClause("VALUES (?source\t?target) { ( ) }"), h::ValuesClause("VALUES (?source\t?target) { ( ) }")), qec); + h::PathSearch(config, true, true, scan("?start", "

", "?end"), h::ValuesClause("VALUES (?source\t?target) { ( ) }")), qec); } TEST(QueryPlanner, PathSearchBothBoundIndividually) { From 3433bd72ced8eafe0ee9033825fdb9c8c876f687 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 20 Sep 2024 15:34:53 +0200 Subject: [PATCH 59/96] Added option to do non-cartesian path search --- src/engine/PathSearch.cpp | 31 ++++++++++++++++++---------- src/engine/PathSearch.h | 4 +++- src/parser/GraphPatternOperation.cpp | 7 ++++++- src/parser/GraphPatternOperation.h | 1 + src/parser/TripleComponent.h | 6 ++++++ test/PathSearchTest.cpp | 28 +++++++++++++++++++++++++ 6 files changed, 64 insertions(+), 13 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 05a5c33cf1..d67d42b782 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -6,6 +6,7 @@ #include #include +#include #include "engine/CallFixedSize.h" #include "engine/QueryExecutionTree.h" @@ -232,7 +233,7 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto sideTime = timer.msecs(); timer.start(); - auto paths = allPaths(sources, targets, binSearch); + auto paths = allPaths(sources, targets, binSearch, config_.cartesian_); timer.stop(); auto searchTime = timer.msecs(); @@ -336,22 +337,30 @@ std::vector PathSearch::findPaths( // _____________________________________________________________________________ std::vector PathSearch::allPaths(std::span sources, std::span targets, - BinSearchWrapper& binSearch) const { + BinSearchWrapper& binSearch, + bool cartesian) const { std::vector paths; Path path; - std::unordered_set targetSet; - for (auto target : targets) { - targetSet.insert(target.getBits()); - } - if (sources.empty()) { sources = binSearch.getSources(); } - for (auto source : sources) { - for (auto path : findPaths(source, targetSet, binSearch)) { - // path.reverse(); - paths.push_back(path); + + if (cartesian || sources.size() != targets.size()) { + std::unordered_set targetSet; + for (auto target : targets) { + targetSet.insert(target.getBits()); + } + for (auto source : sources) { + for (auto path : findPaths(source, targetSet, binSearch)) { + paths.push_back(path); + } + } + } else { + for (size_t i = 0; i < sources.size(); i++){ + for (auto path : findPaths(sources[i], {targets[i].getBits()}, binSearch)) { + paths.push_back(path); + } } } return paths; diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 411796cafa..0869b09787 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -124,6 +124,7 @@ struct PathSearchConfiguration { Variable edgeColumn_; // Variables representing edge property columns. std::vector edgeProperties_; + bool cartesian_ = true; bool sourceIsVariable() const { return std::holds_alternative(sources_); @@ -273,7 +274,8 @@ class PathSearch : public Operation { */ std::vector allPaths(std::span sources, std::span targets, - BinSearchWrapper& binSearch) const; + BinSearchWrapper& binSearch, + bool cartesian) const; /** * @brief Converts paths to a result table with a specified width. diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index e5ea06481d..6154aabdcc 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -121,6 +121,11 @@ void PathQuery::addParameter(const SparqlTriple& triple) { setVariable("edgeColumn", object, edgeColumn_); } else if (predString.ends_with("edgeProperty>")) { edgeProperties_.push_back(getVariable("edgeProperty", object)); + } else if (predString.ends_with("cartesian>")) { + if (!object.isBool()) { + throw PathSearchException("The parameter 'cartesian' expects a boolean"); + } + cartesian_ = object.getBool(); } else if (predString.ends_with("algorithm>")) { if (!object.isIri()) { throw PathSearchException("The 'algorithm' value has to be an Iri"); @@ -193,7 +198,7 @@ PathSearchConfiguration PathQuery::toPathSearchConfiguration( return PathSearchConfiguration{ algorithm_, sources, targets, start_.value(), end_.value(), pathColumn_.value(), - edgeColumn_.value(), edgeProperties_}; + edgeColumn_.value(), edgeProperties_, cartesian_}; } // ____________________________________________________________________________ diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 40e5c38fd1..9ecc251790 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -165,6 +165,7 @@ struct PathQuery { PathSearchAlgorithm algorithm_; GraphPattern childGraphPattern_; + bool cartesian_ = true; void addParameter(const SparqlTriple& triple); void fromBasicPattern(const BasicGraphPattern& pattern); diff --git a/src/parser/TripleComponent.h b/src/parser/TripleComponent.h index 85fe3788f7..f71ff36f1f 100644 --- a/src/parser/TripleComponent.h +++ b/src/parser/TripleComponent.h @@ -136,6 +136,12 @@ class TripleComponent { return std::holds_alternative(_variant); } + [[nodiscard]] bool isBool() const { + return std::holds_alternative(_variant); + } + + bool getBool() const { return std::get(_variant); } + bool isLiteral() const { return std::holds_alternative(_variant); } Literal& getLiteral() { return std::get(_variant); } const Literal& getLiteral() const { return std::get(_variant); } diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index fb9af52d69..c0fe00cc2f 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -500,3 +500,31 @@ TEST(PathSearchTest, multiSourceMultiTargetallPaths) { ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } + +TEST(PathSearchTest, multiSourceMultiTargetallPathsNotCartesian) { + auto sub = makeIdTableFromVector({{0, 2}, {1, 2}, {2, 3}, {3, 4}, {3, 5}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0)}, + {V(2), V(3), I(0), I(1)}, + {V(3), V(4), I(0), I(2)}, + {V(1), V(2), I(1), I(0)}, + {V(2), V(3), I(1), I(1)}, + {V(3), V(5), I(1), I(2)}, + }); + + std::vector sources{V(0), V(1)}; + std::vector targets{V(4), V(5)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}, false}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} From 3f5ab2f56822553a474e2ebcb3344ddff37bb4bf Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 20 Sep 2024 15:38:05 +0200 Subject: [PATCH 60/96] Removed unused e2e test --- e2e/scientists_queries.yaml | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/e2e/scientists_queries.yaml b/e2e/scientists_queries.yaml index 9253104ef1..6d6309f3f1 100644 --- a/e2e/scientists_queries.yaml +++ b/e2e/scientists_queries.yaml @@ -1040,33 +1040,6 @@ queries: - num_cols: 4 - selected: ["?path", "?edge", "?start", "?end"] - # - query: path_search_shortest_paths - # type: no-text - # sparql: | - # PREFIX pathSearch: - # SELECT * WHERE { - # SERVICE pathSearch: { - # pathSearch: pathSearch:algorithm pathSearch:shortestPaths; - # pathSearch:source ; - # pathSearch:target ; - # pathSearch:pathColumn ?path; - # pathSearch:edgeColumn ?edge; - # pathSearch:start ?start; - # pathSearch:end ?end; - # {SELECT * WHERE { - # ?start ?end - # }} - # } - # } - # checks: - # - num_rows: 4 - # - num_cols: 4 - # - selected: ["?path", "?edge", "?start", "?end"] - # - contains_row: [0, 0, "", ""] - # - contains_row: [0, 1, "", ""] - # - contains_row: [1, 0, "", ""] - # - contains_row: [1, 1, "", ""] - - query : property_path_inverse type: no-text From 48d7d2ccd6bc640f215f4a3dc60d5be7da991d78 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 20 Sep 2024 15:38:54 +0200 Subject: [PATCH 61/96] Adjusted exception string when parsing --- src/parser/sparqlParser/SparqlQleverVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 28abb56cb4..7f5a9a344d 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -711,7 +711,7 @@ GraphPatternOperation Visitor::visitPathQuery( pathQuery.childGraphPattern_ = std::move(pattern._child); } else { throw parsedQuery::PathSearchException( - "Unsupported subquery in pathSearch." + "Unsupported element in pathSearch." "PathQuery may only consist of triples for configuration" "And a subquery specifying edges."); } From 27d8257fd34db8b8b386e16e5781a25c0dd497c3 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 20 Sep 2024 15:50:03 +0200 Subject: [PATCH 62/96] Renamed PathQuery::fromBasicPattern to addBasicPattern --- src/parser/GraphPatternOperation.cpp | 2 +- src/parser/GraphPatternOperation.h | 2 +- src/parser/sparqlParser/SparqlQleverVisitor.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index 6154aabdcc..92d91413b1 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -173,7 +173,7 @@ std::variant> PathQuery::toSearchSide( } // ____________________________________________________________________________ -void PathQuery::fromBasicPattern(const BasicGraphPattern& pattern) { +void PathQuery::addBasicPattern(const BasicGraphPattern& pattern) { for (SparqlTriple triple : pattern._triples) { addParameter(triple); } diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 9ecc251790..e495719f5d 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -168,7 +168,7 @@ struct PathQuery { bool cartesian_ = true; void addParameter(const SparqlTriple& triple); - void fromBasicPattern(const BasicGraphPattern& pattern); + void addBasicPattern(const BasicGraphPattern& pattern); std::variant> toSearchSide( std::vector side, const Index::Vocab& vocab) const; PathSearchConfiguration toPathSearchConfiguration( diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 7f5a9a344d..e3604d7a61 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -705,7 +705,7 @@ GraphPatternOperation Visitor::visitPathQuery( auto parsePathQuery = [](parsedQuery::PathQuery& pathQuery, const parsedQuery::GraphPatternOperation& op) { if (std::holds_alternative(op)) { - pathQuery.fromBasicPattern(std::get(op)); + pathQuery.addBasicPattern(std::get(op)); } else if (std::holds_alternative(op)) { auto pattern = std::get(op); pathQuery.childGraphPattern_ = std::move(pattern._child); From 0bd7622f184e72ace9119fc9fa30b18d0e07671d Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 20 Sep 2024 16:00:29 +0200 Subject: [PATCH 63/96] Made edgeproperty cols possibly undefined --- src/engine/PathSearch.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index d67d42b782..547d4f9e59 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -106,7 +106,9 @@ PathSearch::PathSearch(QueryExecutionContext* qec, } for (const auto& edgeProperty : config_.edgeProperties_) { - variableColumns_[edgeProperty] = makeAlwaysDefinedColumn(colIndex); + auto subVarCols = subtree_->getVariableColumns(); + auto colInfo = subVarCols[edgeProperty]; + variableColumns_[edgeProperty] = {colIndex, colInfo.mightContainUndef_}; colIndex++; } } From 495bfd8749f59b06f39f0b9446d960f60e4197c9 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 20 Sep 2024 16:01:15 +0200 Subject: [PATCH 64/96] Added pathSearch identifier to cache key --- src/engine/PathSearch.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 547d4f9e59..46f5e774ee 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -136,6 +136,7 @@ std::vector PathSearch::getChildren() { // _____________________________________________________________________________ std::string PathSearch::getCacheKeyImpl() const { std::ostringstream os; + os << "PathSearch:\n"; os << config_.toString(); AD_CORRECTNESS_CHECK(subtree_); From 7a082b72d41d9beefb8a03be016449026629509c Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 20 Sep 2024 16:13:03 +0200 Subject: [PATCH 65/96] Simplified PathSearchConfig test matcher --- test/QueryPlannerTest.cpp | 2 +- test/QueryPlannerTestHelpers.h | 20 ++------------------ 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index e9372a4121..e604cc1b3f 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -890,7 +890,7 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { auto getId = ad_utility::testing::makeGetId(qec->getIndex()); std::vector sources{getId("")}; - std::vector targets{getId(""), getId("")}; + std::vector targets{getId(""), getId("")}; PathSearchConfiguration config{ PathSearchAlgorithm::ALL_PATHS, sources, diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index da483fa644..5938a4f087 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -263,24 +263,8 @@ inline auto TransitivePath = }; inline auto PathSearchConfigMatcher = [](PathSearchConfiguration config) { - auto sourceMatcher = - std::holds_alternative(config.sources_) - ? AD_FIELD( - PathSearchConfiguration, sources_, - VariantWith(Eq(std::get(config.sources_)))) - : AD_FIELD( - PathSearchConfiguration, sources_, - VariantWith>(UnorderedElementsAreArray( - std::get>(config.sources_)))); - auto targetMatcher = - std::holds_alternative(config.targets_) - ? AD_FIELD( - PathSearchConfiguration, targets_, - VariantWith(Eq(std::get(config.targets_)))) - : AD_FIELD( - PathSearchConfiguration, targets_, - VariantWith>(UnorderedElementsAreArray( - std::get>(config.targets_)))); + auto sourceMatcher = AD_FIELD(PathSearchConfiguration, sources_, Eq(config.sources_)); + auto targetMatcher = AD_FIELD(PathSearchConfiguration, targets_, Eq(config.targets_)); return AllOf( AD_FIELD(PathSearchConfiguration, algorithm_, Eq(config.algorithm_)), sourceMatcher, targetMatcher, From 54b2be203229b0ea1fc703d793334fe76df1d339 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 20 Sep 2024 16:24:09 +0200 Subject: [PATCH 66/96] Use string_views for parameters --- src/parser/GraphPatternOperation.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index 92d91413b1..2bdfb1c4ee 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -6,6 +6,7 @@ #include "parser/GraphPatternOperation.h" #include +#include #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" @@ -81,26 +82,26 @@ void PathQuery::addParameter(const SparqlTriple& triple) { throw PathSearchException("Predicates must be IRIs"); } - auto getVariable = [](std::string parameter, const TripleComponent& object) { + auto getVariable = [](std::string_view parameter, const TripleComponent& object) { if (!object.isVariable()) { - throw PathSearchException("The value " + object.toString() + - " for parameter '" + parameter + - "' has to be a variable"); + throw PathSearchException(absl::StrCat("The value ", object.toString(), + " for parameter '", parameter, + "' has to be a variable")); } return object.getVariable(); }; - auto setVariable = [&](std::string parameter, const TripleComponent& object, + auto setVariable = [&](std::string_view parameter, const TripleComponent& object, std::optional& existingValue) { auto variable = getVariable(parameter, object); if (existingValue.has_value()) { - throw PathSearchException("The parameter '" + parameter + - "' has already been set to variable: '" + - existingValue.value().toSparql() + - "'. New variable: '" + object.toString() + - "'."); + throw PathSearchException(absl::StrCat("The parameter '", parameter, + "' has already been set to variable: '", + existingValue.value().toSparql(), + "'. New variable: '", object.toString(), + "'.")); } existingValue = object.getVariable(); From ee811e4622077c8c32228a129d5fff342098c7b0 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 20 Sep 2024 17:49:41 +0200 Subject: [PATCH 67/96] implment addGraph --- src/parser/GraphPatternOperation.cpp | 8 ++++++++ src/parser/GraphPatternOperation.h | 1 + src/parser/sparqlParser/SparqlQleverVisitor.cpp | 3 +-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index 2bdfb1c4ee..11670c6b02 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -180,6 +180,14 @@ void PathQuery::addBasicPattern(const BasicGraphPattern& pattern) { } } +// ____________________________________________________________________________ +void PathQuery::addGraph(const GraphPatternOperation& op) { + if (childGraphPattern_._graphPatterns.empty()) { + auto pattern = std::get(op); + childGraphPattern_ = std::move(pattern._child); + } +} + // ____________________________________________________________________________ PathSearchConfiguration PathQuery::toPathSearchConfiguration( const Index::Vocab& vocab) const { diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index e495719f5d..b1b7ef73e9 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -169,6 +169,7 @@ struct PathQuery { void addParameter(const SparqlTriple& triple); void addBasicPattern(const BasicGraphPattern& pattern); + void addGraph(const GraphPatternOperation& childGraphPattern); std::variant> toSearchSide( std::vector side, const Index::Vocab& vocab) const; PathSearchConfiguration toPathSearchConfiguration( diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index e3604d7a61..397c4930a9 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -707,8 +707,7 @@ GraphPatternOperation Visitor::visitPathQuery( if (std::holds_alternative(op)) { pathQuery.addBasicPattern(std::get(op)); } else if (std::holds_alternative(op)) { - auto pattern = std::get(op); - pathQuery.childGraphPattern_ = std::move(pattern._child); + pathQuery.addGraph(op); } else { throw parsedQuery::PathSearchException( "Unsupported element in pathSearch." From 6520019da1d1fd1384aab2efc99ea8649a8c3be4 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 25 Sep 2024 11:03:44 +0200 Subject: [PATCH 68/96] Improved documentation --- src/engine/PathSearch.h | 67 ++++++++++++---------------- src/parser/GraphPatternOperation.cpp | 1 + src/parser/GraphPatternOperation.h | 45 +++++++++++++++++++ 3 files changed, 75 insertions(+), 38 deletions(-) diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 0869b09787..9fd8e847c0 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -14,53 +14,32 @@ enum class PathSearchAlgorithm { ALL_PATHS }; +/** + * @brief Represents the source or target side of a PathSearch. + * The side can either be a variable or a list of Ids. + */ using SearchSide = std::variant>; namespace pathSearch { - /** - * @brief Represents an edge in the graph. - */ struct Edge { - // The starting node ID. Id start_; - // The ending node ID. Id end_; - // Properties associated with the edge. std::vector edgeProperties_; }; - /** - * @brief Represents a path consisting of multiple edges. - */ struct Path { - // The edges that make up the path. std::vector edges_; - /** - * @brief Checks if the path is empty. - * @return True if the path is empty, false otherwise. - */ bool empty() const { return edges_.empty(); } - /** - * @brief Returns the number of edges in the path. - * @return The number of edges in the path. - */ size_t size() const { return edges_.size(); } - /** - * @brief Adds an edge to the end of the path. - * @param edge The edge to add. - */ void push_back(const Edge& edge) { edges_.push_back(edge); } void pop_back() { edges_.pop_back(); } - /** - * @brief Reverses the order of the edges in the path. - */ void reverse() { std::ranges::reverse(edges_); } Path concat(const Path& other) const { @@ -83,6 +62,13 @@ namespace pathSearch { } }; + /** + * @class BinSearchWrapper + * @brief Encapsulates logic for binary search of edges in + * an IdTable. It provides methods to find outgoing edges from + * a node and retrie + * + */ class BinSearchWrapper { const IdTable& table_; size_t startCol_; @@ -93,8 +79,19 @@ namespace pathSearch { BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, std::vector edgeCols); + /** + * @brief Return all outgoing edges of a node + * + * @param node The start node of the outgoing edges + */ std::vector outgoingEdes(const Id node) const; + /** + * @brief Returns the start nodes of all edges. + * In case the sources field for the path search is empty, + * the search starts from all possible sources (i.e. all + * start nodes). Returns only unique start nodes. + */ std::span getSources() const; private: @@ -104,25 +101,14 @@ namespace pathSearch { using namespace pathSearch; -/** - * @brief Struct to hold configuration parameters for the path search. - */ struct PathSearchConfiguration { - // The path search algorithm to use. PathSearchAlgorithm algorithm_; - // The source node ID. SearchSide sources_; - // A list of target node IDs. SearchSide targets_; - // Variable representing the start column in the result. Variable start_; - // Variable representing the end column in the result. Variable end_; - // Variable representing the path column in the result. Variable pathColumn_; - // Variable representing the edge column in the result. Variable edgeColumn_; - // Variables representing edge property columns. std::vector edgeProperties_; bool cartesian_ = true; @@ -170,16 +156,21 @@ struct PathSearchConfiguration { }; /** - * @brief Class to perform various path search algorithms on a graph. + * @class PathSearch + * @brief Main class implementing the path search operation. + * It manages the configuration, executes the search and + * builds the ResultTable. + * */ class PathSearch : public Operation { std::shared_ptr subtree_; size_t resultWidth_; VariableToColumnMap variableColumns_; - // Configuration for the path search. PathSearchConfiguration config_; + // The following optional fields are filled, depending + // on how the PathSearch is bound. std::optional sourceCol_; std::optional targetCol_; diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index 11670c6b02..d65612d87a 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -151,6 +151,7 @@ void PathQuery::addParameter(const SparqlTriple& triple) { } } +// ____________________________________________________________________________ std::variant> PathQuery::toSearchSide( std::vector side, const Index::Vocab& vocab) const { if (side.size() == 1 && side[0].isVariable()) { diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index b1b7ef73e9..143f0219ba 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -167,11 +167,56 @@ struct PathQuery { GraphPattern childGraphPattern_; bool cartesian_ = true; + /** + * @brief Add a parameter to the PathQuery from the given triple. + * The predicate of the triple determines the parameter name and the object + * of the triple determines the parameter value. The subject is ignored. + * Throws a PathSearchException if an unsupported algorithm is given or if the + * predicate contains an unknown parameter name. + * + * @param triple A SparqlTriple that contains the parameter info + */ void addParameter(const SparqlTriple& triple); + + /** + * @brief Add the parameters from a BasicGraphPattern to the PathQuery + * + * @param pattern + */ void addBasicPattern(const BasicGraphPattern& pattern); + + /** + * @brief Add a GraphPatternOperation to the PathQuery. The pattern specifies + * the edges of the graph that is used by the path search + * + * @param childGraphPattern + */ void addGraph(const GraphPatternOperation& childGraphPattern); + + /** + * @brief Convert the vector of triple components into a SearchSide + * The SeachSide can either be a variable or a list of Ids. + * A PathSearchException is thrown if more than one variable is given. + * + * @param side A vector of TripleComponents, containing either exactly one + * Variable or zero or more ValueIds + * @param vocab A Vocabulary containing the Ids of the TripleComponents. + * The Vocab is only used if the given vector contains IRIs. + */ std::variant> toSearchSide( std::vector side, const Index::Vocab& vocab) const; + + /** + * @brief Convert this PathQuery into a PathSearchConfiguration object. + * This method checks if all required parameters are set and converts + * the PathSearch sources and targets into SearchSides. + * A PathSearchException is thrown if required paramaters are missing. + * The required parameters are start, end, pathColumn and edgeColumn. + * + * @param vocab A vocab containing the Ids of the IRIs in + * sources_ and targets_ + * @return A valid PathSearchConfiguration + */ PathSearchConfiguration toPathSearchConfiguration( const Index::Vocab& vocab) const; }; From 0a833dc975cd0088dee2369e96da459d12e25ada Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 25 Sep 2024 12:43:37 +0200 Subject: [PATCH 69/96] Fixed join on edge property --- src/engine/QueryPlanner.cpp | 40 ++++++++++++++++++++++++++++-------- test/QueryPlannerTest.cpp | 41 +++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 9 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 4abb1dfe17..5aa56f5800 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -35,6 +35,7 @@ #include "engine/TransitivePathBase.h" #include "engine/Union.h" #include "engine/Values.h" +#include "global/Id.h" #include "parser/Alias.h" #include "parser/SparqlParserHelpers.h" #include "util/Exception.h" @@ -1832,6 +1833,13 @@ auto QueryPlanner::createJoinWithPathSearch( auto pathSearch = aRootOp ? aRootOp : bRootOp; auto sibling = bRootOp ? a : b; + auto decideColumns = [aRootOp](std::array joinColumns) -> std::pair { + auto thisCol = aRootOp ? joinColumns[0] : joinColumns[1]; + auto otherCol = aRootOp ? joinColumns[1] : joinColumns[0]; + return {thisCol, otherCol}; + }; + + // Only source and target may be bound directly if (jcs.size() > 2) { return std::nullopt; @@ -1839,19 +1847,31 @@ auto QueryPlanner::createJoinWithPathSearch( auto sourceColumn = pathSearch->getSourceColumn(); auto targetColumn = pathSearch->getTargetColumn(); + + // Either source or target column have to be a variable to create a join + if (!sourceColumn && !targetColumn) { + return std::nullopt; + } + + // A join on an edge property column should not create any candidates + auto isJoinOnSourceOrTarget = [sourceColumn, targetColumn](size_t joinColumn) -> bool { + return ((sourceColumn && sourceColumn.value() == joinColumn) || + (targetColumn && targetColumn.value() == joinColumn)); + }; + if (jcs.size() == 2) { // To join source and target, both must be variables if (!sourceColumn || !targetColumn) { return std::nullopt; } - auto firstJc = jcs[0]; - auto firstCol = aRootOp ? firstJc[0] : firstJc[1]; - auto firstOtherCol = aRootOp ? firstJc[1]: firstJc[0]; + auto [firstCol, firstOtherCol] = decideColumns(jcs[0]); + + auto [secondCol, secondOtherCol] = decideColumns(jcs[1]); - auto secondJc = jcs[1]; - auto secondCol = aRootOp ? secondJc[0] : secondJc[1]; - auto secondOtherCol = aRootOp ? secondJc[1]: secondJc[0]; + if (!isJoinOnSourceOrTarget(firstCol) && !isJoinOnSourceOrTarget(secondCol)) { + return std::nullopt; + } if (sourceColumn == firstCol && targetColumn == secondCol) { pathSearch->bindSourceAndTargetSide( @@ -1862,9 +1882,11 @@ auto QueryPlanner::createJoinWithPathSearch( return std::nullopt; } } else if (jcs.size() == 1) { - auto jc = jcs[0]; - const size_t thisCol = aRootOp ? jc[0] : jc[1]; - const size_t otherCol = aRootOp ? jc[1] : jc[0]; + auto [thisCol, otherCol] = decideColumns(jcs[0]); + + if (!isJoinOnSourceOrTarget(thisCol)) { + return std::nullopt; + } if (sourceColumn && sourceColumn == thisCol && !pathSearch->isSourceBound()) { diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index e604cc1b3f..88a70f8c4d 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -927,6 +927,47 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { qec); } +TEST(QueryPlanner, PathSearchJoinOnEdgeProperty) { + auto scan = h::IndexScanFromStrings; + auto join = h::Join; + auto qec = ad_utility::testing::getQec( + " . . . "); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + std::vector targets{getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {Variable("?middle")}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES ?middle {} " + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:edgeProperty ?middle;" + "{SELECT * WHERE {" + "?start ?middle." + "?middle ?end." + "}}}}", + join(h::Sort(h::ValuesClause("VALUES (?middle) { () }")), + h::Sort(h::PathSearch(config, true, true, + h::Sort(join(scan("?start", "", "?middle"), + scan("?middle", "", "?end")))))), + qec); +} + TEST(QueryPlanner, PathSearchSourceBound) { auto scan = h::IndexScanFromStrings; auto qec = ad_utility::testing::getQec("

.

"); From f3aa985b4536c0a2a089473ffe123065b38e4eb9 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 25 Sep 2024 13:14:46 +0200 Subject: [PATCH 70/96] Added row check to path search e2e tests --- e2e/scientists_queries.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/e2e/scientists_queries.yaml b/e2e/scientists_queries.yaml index 6d6309f3f1..1fc78430be 100644 --- a/e2e/scientists_queries.yaml +++ b/e2e/scientists_queries.yaml @@ -1039,6 +1039,11 @@ queries: - num_rows: 17 - num_cols: 4 - selected: ["?path", "?edge", "?start", "?end"] + - contains_row: ["0", "0", "", ""] + - contains_row: ["0", "1", "", ""] + - contains_row: ["0", "2", "", ""] + - contains_row: ["4", "0", "", ""] + - contains_row: ["4", "1", "", ""] - query : property_path_inverse From e34f0ae8f3adcfd2aeb5e4c8901dabb8b5480cf6 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 25 Sep 2024 14:26:26 +0200 Subject: [PATCH 71/96] spell fix --- src/parser/GraphPatternOperation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index c783158525..eadc19f5c2 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -212,7 +212,7 @@ struct PathQuery { * @brief Convert this PathQuery into a PathSearchConfiguration object. * This method checks if all required parameters are set and converts * the PathSearch sources and targets into SearchSides. - * A PathSearchException is thrown if required paramaters are missing. + * A PathSearchException is thrown if required parameters are missing. * The required parameters are start, end, pathColumn and edgeColumn. * * @param vocab A vocab containing the Ids of the IRIs in From cf2eaae6036ba5325acfd8d97e0d2c51cc51895d Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 25 Sep 2024 14:28:51 +0200 Subject: [PATCH 72/96] Format fix --- src/engine/PathSearch.cpp | 29 ++-- src/engine/PathSearch.h | 135 +++++++++--------- src/engine/QueryPlanner.cpp | 19 +-- src/parser/GraphPatternOperation.cpp | 23 +-- src/parser/GraphPatternOperation.h | 6 +- .../sparqlParser/SparqlQleverVisitor.cpp | 9 +- test/PathSearchTest.cpp | 3 +- test/QueryPlannerTest.cpp | 24 +++- test/QueryPlannerTestHelpers.h | 13 +- 9 files changed, 143 insertions(+), 118 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 46f5e774ee..4805adbc58 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -4,9 +4,9 @@ #include "PathSearch.h" +#include #include #include -#include #include "engine/CallFixedSize.h" #include "engine/QueryExecutionTree.h" @@ -173,8 +173,8 @@ float PathSearch::getMultiplicity(size_t col) { }; // _____________________________________________________________________________ -bool PathSearch::knownEmptyResult() { - for (auto child: getChildren()) { +bool PathSearch::knownEmptyResult() { + for (auto child : getChildren()) { if (child->knownEmptyResult()) { return true; } @@ -200,8 +200,9 @@ void PathSearch::bindTargetSide(std::shared_ptr targetsOp, } // _____________________________________________________________________________ -void PathSearch::bindSourceAndTargetSide(std::shared_ptr sourceAndTargetOp, - size_t sourceCol, size_t targetCol) { +void PathSearch::bindSourceAndTargetSide( + std::shared_ptr sourceAndTargetOp, size_t sourceCol, + size_t targetCol) { sourceAndTargetTree_ = sourceAndTargetOp; sourceCol_ = sourceCol; targetCol_ = targetCol; @@ -265,7 +266,8 @@ VariableToColumnMap PathSearch::computeVariableToColumnMap() const { }; // _____________________________________________________________________________ -std::pair, std::span> PathSearch::handleSearchSides() const { +std::pair, std::span> +PathSearch::handleSearchSides() const { std::span sourceIds; std::span targetIds; @@ -277,7 +279,8 @@ std::pair, std::span> PathSearch::handleSearchSide } if (sourceTree_.has_value()) { - sourceIds = sourceTree_.value()->getResult()->idTable().getColumn(sourceCol_.value()); + sourceIds = sourceTree_.value()->getResult()->idTable().getColumn( + sourceCol_.value()); } else if (config_.sourceIsVariable()) { sourceIds = {}; } else { @@ -285,13 +288,14 @@ std::pair, std::span> PathSearch::handleSearchSide } if (targetTree_.has_value()) { - targetIds = targetTree_.value()->getResult()->idTable().getColumn(targetCol_.value()); + targetIds = targetTree_.value()->getResult()->idTable().getColumn( + targetCol_.value()); } else if (config_.targetIsVariable()) { targetIds = {}; } else { targetIds = std::get>(config_.targets_); } - + return {sourceIds, targetIds}; } @@ -328,7 +332,7 @@ std::vector PathSearch::findPaths( } for (auto outgoingEdge : binSearch.outgoingEdes(edge.end_)) { - if (!visited.contains(outgoingEdge.end_.getBits())){ + if (!visited.contains(outgoingEdge.end_.getBits())) { edgeStack.push_back(outgoingEdge); } } @@ -360,8 +364,9 @@ std::vector PathSearch::allPaths(std::span sources, } } } else { - for (size_t i = 0; i < sources.size(); i++){ - for (auto path : findPaths(sources[i], {targets[i].getBits()}, binSearch)) { + for (size_t i = 0; i < sources.size(); i++) { + for (auto path : + findPaths(sources[i], {targets[i].getBits()}, binSearch)) { paths.push_back(path); } } diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 9fd8e847c0..0bdb060c02 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -21,83 +21,83 @@ enum class PathSearchAlgorithm { ALL_PATHS }; using SearchSide = std::variant>; namespace pathSearch { - struct Edge { - Id start_; +struct Edge { + Id start_; - Id end_; + Id end_; - std::vector edgeProperties_; - }; + std::vector edgeProperties_; +}; - struct Path { - std::vector edges_; +struct Path { + std::vector edges_; - bool empty() const { return edges_.empty(); } + bool empty() const { return edges_.empty(); } - size_t size() const { return edges_.size(); } + size_t size() const { return edges_.size(); } - void push_back(const Edge& edge) { edges_.push_back(edge); } + void push_back(const Edge& edge) { edges_.push_back(edge); } - void pop_back() { edges_.pop_back(); } + void pop_back() { edges_.pop_back(); } - void reverse() { std::ranges::reverse(edges_); } + void reverse() { std::ranges::reverse(edges_); } - Path concat(const Path& other) const { - Path path; - path.edges_ = edges_; - path.edges_.insert(path.edges_.end(), other.edges_.begin(), - other.edges_.end()); - return path; - } + Path concat(const Path& other) const { + Path path; + path.edges_ = edges_; + path.edges_.insert(path.edges_.end(), other.edges_.begin(), + other.edges_.end()); + return path; + } - const Id& end() { return edges_.back().end_; } - const Id& first() { return edges_.front().start_; } + const Id& end() { return edges_.back().end_; } + const Id& first() { return edges_.front().start_; } - Path startingAt(size_t index) const { - std::vector edges; - for (size_t i = index; i < edges_.size(); i++) { - edges.push_back(edges_[i]); - } - return Path{edges}; + Path startingAt(size_t index) const { + std::vector edges; + for (size_t i = index; i < edges_.size(); i++) { + edges.push_back(edges_[i]); } - }; + return Path{edges}; + } +}; + +/** + * @class BinSearchWrapper + * @brief Encapsulates logic for binary search of edges in + * an IdTable. It provides methods to find outgoing edges from + * a node and retrie + * + */ +class BinSearchWrapper { + const IdTable& table_; + size_t startCol_; + size_t endCol_; + std::vector edgeCols_; + + public: + BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, + std::vector edgeCols); /** - * @class BinSearchWrapper - * @brief Encapsulates logic for binary search of edges in - * an IdTable. It provides methods to find outgoing edges from - * a node and retrie + * @brief Return all outgoing edges of a node * + * @param node The start node of the outgoing edges + */ + std::vector outgoingEdes(const Id node) const; + + /** + * @brief Returns the start nodes of all edges. + * In case the sources field for the path search is empty, + * the search starts from all possible sources (i.e. all + * start nodes). Returns only unique start nodes. */ - class BinSearchWrapper { - const IdTable& table_; - size_t startCol_; - size_t endCol_; - std::vector edgeCols_; - - public: - BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, - std::vector edgeCols); - - /** - * @brief Return all outgoing edges of a node - * - * @param node The start node of the outgoing edges - */ - std::vector outgoingEdes(const Id node) const; - - /** - * @brief Returns the start nodes of all edges. - * In case the sources field for the path search is empty, - * the search starts from all possible sources (i.e. all - * start nodes). Returns only unique start nodes. - */ - std::span getSources() const; - - private: - Edge makeEdgeFromRow(size_t row) const; - }; -} + std::span getSources() const; + + private: + Edge makeEdgeFromRow(size_t row) const; +}; +} // namespace pathSearch using namespace pathSearch; @@ -217,14 +217,18 @@ class PathSearch : public Operation { void bindTargetSide(std::shared_ptr targetsOp, size_t inputCol); - void bindSourceAndTargetSide(std::shared_ptr sourceAndTargetOp, size_t sourceCol, size_t targetCol); + void bindSourceAndTargetSide( + std::shared_ptr sourceAndTargetOp, size_t sourceCol, + size_t targetCol); bool isSourceBound() const { - return sourceTree_.has_value() || sourceAndTargetTree_.has_value() || !config_.sourceIsVariable(); + return sourceTree_.has_value() || sourceAndTargetTree_.has_value() || + !config_.sourceIsVariable(); } bool isTargetBound() const { - return targetTree_.has_value() || sourceAndTargetTree_.has_value() || !config_.targetIsVariable(); + return targetTree_.has_value() || sourceAndTargetTree_.has_value() || + !config_.targetIsVariable(); } std::optional getSourceColumn() const { @@ -265,8 +269,7 @@ class PathSearch : public Operation { */ std::vector allPaths(std::span sources, std::span targets, - BinSearchWrapper& binSearch, - bool cartesian) const; + BinSearchWrapper& binSearch, bool cartesian) const; /** * @brief Converts paths to a result table with a specified width. diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index b7f0b553b6..c36a5b1fbd 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1876,13 +1876,13 @@ auto QueryPlanner::createJoinWithPathSearch( auto pathSearch = aRootOp ? aRootOp : bRootOp; auto sibling = bRootOp ? a : b; - auto decideColumns = [aRootOp](std::array joinColumns) -> std::pair { + auto decideColumns = [aRootOp](std::array joinColumns) + -> std::pair { auto thisCol = aRootOp ? joinColumns[0] : joinColumns[1]; auto otherCol = aRootOp ? joinColumns[1] : joinColumns[0]; return {thisCol, otherCol}; }; - // Only source and target may be bound directly if (jcs.size() > 2) { return std::nullopt; @@ -1897,7 +1897,8 @@ auto QueryPlanner::createJoinWithPathSearch( } // A join on an edge property column should not create any candidates - auto isJoinOnSourceOrTarget = [sourceColumn, targetColumn](size_t joinColumn) -> bool { + auto isJoinOnSourceOrTarget = [sourceColumn, + targetColumn](size_t joinColumn) -> bool { return ((sourceColumn && sourceColumn.value() == joinColumn) || (targetColumn && targetColumn.value() == joinColumn)); }; @@ -1912,15 +1913,17 @@ auto QueryPlanner::createJoinWithPathSearch( auto [secondCol, secondOtherCol] = decideColumns(jcs[1]); - if (!isJoinOnSourceOrTarget(firstCol) && !isJoinOnSourceOrTarget(secondCol)) { + if (!isJoinOnSourceOrTarget(firstCol) && + !isJoinOnSourceOrTarget(secondCol)) { return std::nullopt; } if (sourceColumn == firstCol && targetColumn == secondCol) { - pathSearch->bindSourceAndTargetSide( - sibling._qet, firstOtherCol, secondOtherCol); + pathSearch->bindSourceAndTargetSide(sibling._qet, firstOtherCol, + secondOtherCol); } else if (sourceColumn == secondCol && targetColumn == firstCol) { - pathSearch->bindSourceAndTargetSide(sibling._qet, secondOtherCol, firstOtherCol); + pathSearch->bindSourceAndTargetSide(sibling._qet, secondOtherCol, + firstOtherCol); } else { return std::nullopt; } @@ -1937,7 +1940,7 @@ auto QueryPlanner::createJoinWithPathSearch( } else if (targetColumn && targetColumn == thisCol && !pathSearch->isTargetBound()) { pathSearch->bindTargetSide(sibling._qet, otherCol); - } + } } else { return std::nullopt; } diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index d65612d87a..efceda159c 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -82,26 +82,27 @@ void PathQuery::addParameter(const SparqlTriple& triple) { throw PathSearchException("Predicates must be IRIs"); } - auto getVariable = [](std::string_view parameter, const TripleComponent& object) { + auto getVariable = [](std::string_view parameter, + const TripleComponent& object) { if (!object.isVariable()) { throw PathSearchException(absl::StrCat("The value ", object.toString(), - " for parameter '", parameter, - "' has to be a variable")); + " for parameter '", parameter, + "' has to be a variable")); } return object.getVariable(); }; - auto setVariable = [&](std::string_view parameter, const TripleComponent& object, + auto setVariable = [&](std::string_view parameter, + const TripleComponent& object, std::optional& existingValue) { auto variable = getVariable(parameter, object); if (existingValue.has_value()) { - throw PathSearchException(absl::StrCat("The parameter '", parameter, - "' has already been set to variable: '", - existingValue.value().toSparql(), - "'. New variable: '", object.toString(), - "'.")); + throw PathSearchException(absl::StrCat( + "The parameter '", parameter, "' has already been set to variable: '", + existingValue.value().toSparql(), "'. New variable: '", + object.toString(), "'.")); } existingValue = object.getVariable(); @@ -206,8 +207,8 @@ PathSearchConfiguration PathQuery::toPathSearchConfiguration( } return PathSearchConfiguration{ - algorithm_, sources, targets, - start_.value(), end_.value(), pathColumn_.value(), + algorithm_, sources, targets, + start_.value(), end_.value(), pathColumn_.value(), edgeColumn_.value(), edgeProperties_, cartesian_}; } diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index eadc19f5c2..2bb951b376 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -183,7 +183,7 @@ struct PathQuery { /** * @brief Add the parameters from a BasicGraphPattern to the PathQuery * - * @param pattern + * @param pattern */ void addBasicPattern(const BasicGraphPattern& pattern); @@ -191,7 +191,7 @@ struct PathQuery { * @brief Add a GraphPatternOperation to the PathQuery. The pattern specifies * the edges of the graph that is used by the path search * - * @param childGraphPattern + * @param childGraphPattern */ void addGraph(const GraphPatternOperation& childGraphPattern); @@ -203,7 +203,7 @@ struct PathQuery { * @param side A vector of TripleComponents, containing either exactly one * Variable or zero or more ValueIds * @param vocab A Vocabulary containing the Ids of the TripleComponents. - * The Vocab is only used if the given vector contains IRIs. + * The Vocab is only used if the given vector contains IRIs. */ std::variant> toSearchSide( std::vector side, const Index::Vocab& vocab) const; diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index f337f5af2b..159873970a 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -711,7 +711,7 @@ GraphPatternOperation Visitor::visitPathQuery( if (std::holds_alternative(op)) { pathQuery.addBasicPattern(std::get(op)); } else if (std::holds_alternative(op)) { - pathQuery.addGraph(op); + pathQuery.addGraph(op); } else { throw parsedQuery::PathSearchException( "Unsupported element in pathSearch." @@ -768,9 +768,10 @@ GraphPatternOperation Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { visibleVariablesServiceQuery.begin(), visibleVariablesServiceQuery.end()); // Create suitable `parsedQuery::Service` object and return it. - return parsedQuery::Service{std::move(visibleVariablesServiceQuery), std::move(serviceIri), - prologueString_, getOriginalInputForContext(ctx->groupGraphPattern()), - static_cast(ctx->SILENT())}; + return parsedQuery::Service{ + std::move(visibleVariablesServiceQuery), std::move(serviceIri), + prologueString_, getOriginalInputForContext(ctx->groupGraphPattern()), + static_cast(ctx->SILENT())}; } // ____________________________________________________________________________ diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index c0fe00cc2f..ed518c62ed 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -522,7 +522,8 @@ TEST(PathSearchTest, multiSourceMultiTargetallPathsNotCartesian) { Var{"?end"}, Var{"?edgeIndex"}, Var{"?pathIndex"}, - {}, false}; + {}, + false}; auto resultTable = performPathSearch(config, std::move(sub), vars); ASSERT_THAT(resultTable.idTable(), diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 87992f584b..38e1d35e6d 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -964,9 +964,10 @@ TEST(QueryPlanner, PathSearchJoinOnEdgeProperty) { "?middle ?end." "}}}}", join(h::Sort(h::ValuesClause("VALUES (?middle) { () }")), - h::Sort(h::PathSearch(config, true, true, - h::Sort(join(scan("?start", "", "?middle"), - scan("?middle", "", "?end")))))), + h::Sort( + h::PathSearch(config, true, true, + h::Sort(join(scan("?start", "", "?middle"), + scan("?middle", "", "?end")))))), qec); } @@ -1000,7 +1001,9 @@ TEST(QueryPlanner, PathSearchSourceBound) { "{SELECT * WHERE {" "?start

?end." "}}}}", - h::PathSearch(config, true, true, scan("?start", "

", "?end"), h::ValuesClause("VALUES (?source) { () }")), qec); + h::PathSearch(config, true, true, scan("?start", "

", "?end"), + h::ValuesClause("VALUES (?source) { () }")), + qec); } TEST(QueryPlanner, PathSearchTargetBound) { @@ -1033,7 +1036,9 @@ TEST(QueryPlanner, PathSearchTargetBound) { "{SELECT * WHERE {" "?start

?end." "}}}}", - h::PathSearch(config, true, true, scan("?start", "

", "?end"), h::ValuesClause("VALUES (?target) { () }")), qec); + h::PathSearch(config, true, true, scan("?start", "

", "?end"), + h::ValuesClause("VALUES (?target) { () }")), + qec); } TEST(QueryPlanner, PathSearchBothBound) { @@ -1066,7 +1071,9 @@ TEST(QueryPlanner, PathSearchBothBound) { "{SELECT * WHERE {" "?start

?end." "}}}}", - h::PathSearch(config, true, true, scan("?start", "

", "?end"), h::ValuesClause("VALUES (?source\t?target) { ( ) }")), qec); + h::PathSearch(config, true, true, scan("?start", "

", "?end"), + h::ValuesClause("VALUES (?source\t?target) { ( ) }")), + qec); } TEST(QueryPlanner, PathSearchBothBoundIndividually) { @@ -1100,7 +1107,10 @@ TEST(QueryPlanner, PathSearchBothBoundIndividually) { "{SELECT * WHERE {" "?start

?end." "}}}}", - h::PathSearch(config, true, true, scan("?start", "

", "?end"), h::ValuesClause("VALUES (?source) { () }"), h::ValuesClause("VALUES (?target) { () }")), qec); + h::PathSearch(config, true, true, scan("?start", "

", "?end"), + h::ValuesClause("VALUES (?source) { () }"), + h::ValuesClause("VALUES (?target) { () }")), + qec); } // __________________________________________________________________________ diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index 30e6e06013..a0a2e7a324 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -269,8 +269,10 @@ inline auto TransitivePath = }; inline auto PathSearchConfigMatcher = [](PathSearchConfiguration config) { - auto sourceMatcher = AD_FIELD(PathSearchConfiguration, sources_, Eq(config.sources_)); - auto targetMatcher = AD_FIELD(PathSearchConfiguration, targets_, Eq(config.targets_)); + auto sourceMatcher = + AD_FIELD(PathSearchConfiguration, sources_, Eq(config.sources_)); + auto targetMatcher = + AD_FIELD(PathSearchConfiguration, targets_, Eq(config.targets_)); return AllOf( AD_FIELD(PathSearchConfiguration, algorithm_, Eq(config.algorithm_)), sourceMatcher, targetMatcher, @@ -294,10 +296,9 @@ inline auto PathSearch = AD_PROPERTY(PathSearch, isTargetBound, Eq(targetBound)))); }; -inline auto ValuesClause = [](string cacheKey){ - return RootOperation<::Values>(AllOf( - AD_PROPERTY(Values, getCacheKey, cacheKey) - )); +inline auto ValuesClause = [](string cacheKey) { + return RootOperation<::Values>( + AllOf(AD_PROPERTY(Values, getCacheKey, cacheKey))); }; // Match a sort operation. Currently, this is only required by the binary search From 2fcacd50720a7fe742994c612ec05b7fbcdc0f46 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 25 Sep 2024 17:05:08 +0200 Subject: [PATCH 73/96] Added PathSearch tests --- test/QueryPlannerTest.cpp | 50 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 38e1d35e6d..519148ba90 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -1163,6 +1163,56 @@ TEST(QueryPlanner, PathSearchMultipleStarts) { parsedQuery::PathSearchException); } +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchMissingEnd) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(h::parseAndPlan(std::move(query), qec), + HasSubstr("Missing parameter 'end'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchMultipleEnds) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end1;" + "pathSearch:end ?end2;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("parameter 'end' has already been set " + "to variable: '?end1'. New variable: '?end2'"), + parsedQuery::PathSearchException); +} + // __________________________________________________________________________ TEST(QueryPlanner, PathSearchStartNotVariable) { auto qec = ad_utility::testing::getQec("

.

"); From 24975e8135dbdcc51df5c99cbb9901e475c36444 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 27 Sep 2024 08:33:11 +0200 Subject: [PATCH 74/96] Sonar fixes --- src/engine/PathSearch.cpp | 8 ++++---- src/engine/PathSearch.h | 18 +++++++----------- src/engine/QueryPlanner.cpp | 2 +- src/parser/GraphPatternOperation.h | 2 +- 4 files changed, 13 insertions(+), 17 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 4805adbc58..03e68917d2 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -331,7 +331,7 @@ std::vector PathSearch::findPaths( result.push_back(currentPath); } - for (auto outgoingEdge : binSearch.outgoingEdes(edge.end_)) { + for (const auto& outgoingEdge : binSearch.outgoingEdes(edge.end_)) { if (!visited.contains(outgoingEdge.end_.getBits())) { edgeStack.push_back(outgoingEdge); } @@ -344,7 +344,7 @@ std::vector PathSearch::findPaths( // _____________________________________________________________________________ std::vector PathSearch::allPaths(std::span sources, std::span targets, - BinSearchWrapper& binSearch, + const BinSearchWrapper& binSearch, bool cartesian) const { std::vector paths; Path path; @@ -359,13 +359,13 @@ std::vector PathSearch::allPaths(std::span sources, targetSet.insert(target.getBits()); } for (auto source : sources) { - for (auto path : findPaths(source, targetSet, binSearch)) { + for (const auto& path : findPaths(source, targetSet, binSearch)) { paths.push_back(path); } } } else { for (size_t i = 0; i < sources.size(); i++) { - for (auto path : + for (const auto& path : findPaths(sources[i], {targets[i].getBits()}, binSearch)) { paths.push_back(path); } diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 0bdb060c02..dfe0d316dd 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -99,8 +99,6 @@ class BinSearchWrapper { }; } // namespace pathSearch -using namespace pathSearch; - struct PathSearchConfiguration { PathSearchAlgorithm algorithm_; SearchSide sources_; @@ -132,10 +130,8 @@ struct PathSearchConfiguration { std::string toString() const { std::ostringstream os; - switch (algorithm_) { - case PathSearchAlgorithm::ALL_PATHS: - os << "Algorithm: All paths" << '\n'; - break; + if (algorithm_ == PathSearchAlgorithm::ALL_PATHS) { + os << "Algorithm: All paths" << '\n'; } os << "Source: " << searchSideToString(sources_) << '\n'; @@ -259,17 +255,17 @@ class PathSearch : public Operation { * @brief Finds paths based on the configured algorithm. * @return A vector of paths. */ - std::vector findPaths(const Id source, + std::vector findPaths(const Id source, const std::unordered_set& targets, - const BinSearchWrapper& binSearch) const; + const pathSearch::BinSearchWrapper& binSearch) const; /** * @brief Finds all paths in the graph. * @return A vector of all paths. */ - std::vector allPaths(std::span sources, + std::vector allPaths(std::span sources, std::span targets, - BinSearchWrapper& binSearch, bool cartesian) const; + const pathSearch::BinSearchWrapper& binSearch, bool cartesian) const; /** * @brief Converts paths to a result table with a specified width. @@ -278,5 +274,5 @@ class PathSearch : public Operation { * @param paths The vector of paths to convert. */ template - void pathsToResultTable(IdTable& tableDyn, std::vector& paths) const; + void pathsToResultTable(IdTable& tableDyn, std::vector& paths) const; }; diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index c36a5b1fbd..caf0d54c93 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1898,7 +1898,7 @@ auto QueryPlanner::createJoinWithPathSearch( // A join on an edge property column should not create any candidates auto isJoinOnSourceOrTarget = [sourceColumn, - targetColumn](size_t joinColumn) -> bool { + targetColumn](size_t joinColumn) { return ((sourceColumn && sourceColumn.value() == joinColumn) || (targetColumn && targetColumn.value() == joinColumn)); }; diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 2bb951b376..ed7f0d8df5 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -143,7 +143,7 @@ class PathSearchException : public std::exception { std::string message_; public: - PathSearchException(std::string message) : message_(message) {} + explicit PathSearchException(const std::string& message) : message_(message) {} const char* what() const noexcept override { return message_.data(); } }; From 949455bcab8e947500f44b3223e3156871eaa9bf Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 1 Oct 2024 15:45:16 +0200 Subject: [PATCH 75/96] Fixed merge error --- test/QueryPlannerTestHelpers.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index d9c85fb08e..e3cb240005 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -290,7 +290,6 @@ inline auto TransitivePath = TransitivePathSideMatcher(right)))); }; -<<<<<<< HEAD inline auto PathSearchConfigMatcher = [](PathSearchConfiguration config) { auto sourceMatcher = AD_FIELD(PathSearchConfiguration, sources_, Eq(config.sources_)); @@ -324,8 +323,6 @@ inline auto ValuesClause = [](string cacheKey) { AllOf(AD_PROPERTY(Values, getCacheKey, cacheKey))); }; -||||||| bed8b297 -======= // Match a SpatialJoin operation inline auto SpatialJoin = [](long long maxDist, @@ -336,7 +333,6 @@ inline auto SpatialJoin = AD_PROPERTY(SpatialJoin, getMaxDist, Eq(maxDist)))); }; ->>>>>>> master // Match a sort operation. Currently, this is only required by the binary search // version of the transitive path operation. This matcher checks only the // children of the sort operation. From b539bcf3f87d16400d684635116deede6156bad8 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 1 Oct 2024 16:11:18 +0200 Subject: [PATCH 76/96] Remove unused functions --- src/engine/PathSearch.h | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index dfe0d316dd..cc6c73767c 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -40,26 +40,7 @@ struct Path { void pop_back() { edges_.pop_back(); } - void reverse() { std::ranges::reverse(edges_); } - - Path concat(const Path& other) const { - Path path; - path.edges_ = edges_; - path.edges_.insert(path.edges_.end(), other.edges_.begin(), - other.edges_.end()); - return path; - } - const Id& end() { return edges_.back().end_; } - const Id& first() { return edges_.front().start_; } - - Path startingAt(size_t index) const { - std::vector edges; - for (size_t i = index; i < edges_.size(); i++) { - edges.push_back(edges_[i]); - } - return Path{edges}; - } }; /** From cad6f51f656703b766b81c818ccf00902b6d6f62 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 1 Oct 2024 16:12:56 +0200 Subject: [PATCH 77/96] Fix paths to result table --- src/engine/PathSearch.cpp | 28 +++++++++++++++++++++++++++- src/engine/PathSearch.h | 15 +++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 03e68917d2..0bbf51a610 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -4,6 +4,7 @@ #include "PathSearch.h" +#include #include #include #include @@ -380,9 +381,26 @@ void PathSearch::pathsToResultTable(IdTable& tableDyn, std::vector& paths) const { IdTableStatic table = std::move(tableDyn).toStatic(); + std::vector edgePropertyCols; + for (const auto& edgeVar: config_.edgeProperties_) { + auto edgePropertyCol = variableColumns_.at(edgeVar).columnIndex_; + edgePropertyCols.push_back(edgePropertyCol); + } + size_t rowIndex = 0; for (size_t pathIndex = 0; pathIndex < paths.size(); pathIndex++) { auto path = paths[pathIndex]; + + std::optional sourceId = std::nullopt; + if (config_.sourceIsVariable()) { + sourceId = path.edges_.front().start_; + } + + std::optional targetId = std::nullopt; + if (config_.targetIsVariable()) { + targetId = path.edges_.back().end_; + } + for (size_t edgeIndex = 0; edgeIndex < path.size(); edgeIndex++) { auto edge = path.edges_[edgeIndex]; table.emplace_back(); @@ -391,10 +409,18 @@ void PathSearch::pathsToResultTable(IdTable& tableDyn, table(rowIndex, getPathIndex()) = Id::makeFromInt(pathIndex); table(rowIndex, getEdgeIndex()) = Id::makeFromInt(edgeIndex); + if (sourceId) { + table(rowIndex, getSourceIndex().value()) = sourceId.value(); + } + + if (targetId) { + table(rowIndex, getTargetIndex().value()) = targetId.value(); + } + for (size_t edgePropertyIndex = 0; edgePropertyIndex < edge.edgeProperties_.size(); edgePropertyIndex++) { - table(rowIndex, 4 + edgePropertyIndex) = + table(rowIndex, edgePropertyCols[edgePropertyIndex]) = edge.edgeProperties_[edgePropertyIndex]; } diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index cc6c73767c..928e412e02 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include @@ -176,6 +177,20 @@ class PathSearch : public Operation { ColumnIndex getEdgeIndex() const { return variableColumns_.at(config_.edgeColumn_).columnIndex_; } + std::optional getSourceIndex() const { + if (!config_.sourceIsVariable()) { + return std::nullopt; + } + auto sourceVar = std::get(config_.sources_); + return variableColumns_.at(sourceVar).columnIndex_; + } + std::optional getTargetIndex() const { + if (!config_.targetIsVariable()) { + return std::nullopt; + } + auto targetVar = std::get(config_.targets_); + return variableColumns_.at(targetVar).columnIndex_; + } string getCacheKeyImpl() const override; string getDescriptor() const override; From ede7fd35f80a8d1b1162931e955a1c119333dfa1 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 1 Oct 2024 16:13:55 +0200 Subject: [PATCH 78/96] Fix source as variable case --- src/engine/PathSearch.cpp | 27 ++---- src/engine/PathSearch.h | 4 +- test/PathSearchTest.cpp | 172 ++++++++++++++++++++++++++++---------- 3 files changed, 136 insertions(+), 67 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 0bbf51a610..5e9d507e6d 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -40,20 +40,7 @@ std::vector BinSearchWrapper::outgoingEdes(const Id node) const { // _____________________________________________________________________________ std::span BinSearchWrapper::getSources() const { - auto startIds = table_.getColumn(startCol_); - std::vector sources; - - size_t index = 0; - Id lastId; - while (index < startIds.size()) { - lastId = startIds[index]; - sources.push_back(lastId); - while (lastId == startIds[index]) { - index++; - } - } - - return sources; + return table_.getColumn(startCol_); } // _____________________________________________________________________________ @@ -232,7 +219,7 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto buildingTime = timer.msecs(); timer.start(); - auto [sources, targets] = handleSearchSides(); + auto [sources, targets] = handleSearchSides(binSearch); timer.stop(); auto sideTime = timer.msecs(); @@ -268,7 +255,7 @@ VariableToColumnMap PathSearch::computeVariableToColumnMap() const { // _____________________________________________________________________________ std::pair, std::span> -PathSearch::handleSearchSides() const { +PathSearch::handleSearchSides(const BinSearchWrapper& binSearch) const { std::span sourceIds; std::span targetIds; @@ -283,7 +270,7 @@ PathSearch::handleSearchSides() const { sourceIds = sourceTree_.value()->getResult()->idTable().getColumn( sourceCol_.value()); } else if (config_.sourceIsVariable()) { - sourceIds = {}; + sourceIds = binSearch.getSources(); } else { sourceIds = std::get>(config_.sources_); } @@ -302,7 +289,7 @@ PathSearch::handleSearchSides() const { // _____________________________________________________________________________ std::vector PathSearch::findPaths( - const Id source, const std::unordered_set& targets, + const Id& source, const std::unordered_set& targets, const BinSearchWrapper& binSearch) const { std::vector edgeStack; Path currentPath; @@ -350,10 +337,6 @@ std::vector PathSearch::allPaths(std::span sources, std::vector paths; Path path; - if (sources.empty()) { - sources = binSearch.getSources(); - } - if (cartesian || sources.size() != targets.size()) { std::unordered_set targetSet; for (auto target : targets) { diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 928e412e02..df6865e9d7 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -245,13 +245,13 @@ class PathSearch : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; private: - std::pair, std::span> handleSearchSides() const; + std::pair, std::span> handleSearchSides(const pathSearch::BinSearchWrapper& binSearch) const; /** * @brief Finds paths based on the configured algorithm. * @return A vector of paths. */ - std::vector findPaths(const Id source, + std::vector findPaths(const Id& source, const std::unordered_set& targets, const pathSearch::BinSearchWrapper& binSearch) const; diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index ed518c62ed..a79473454f 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -133,6 +133,68 @@ TEST(PathSearchTest, singlePathWithProperties) { ::testing::UnorderedElementsAreArray(expected)); } +TEST(PathSearchTest, singlePathAllSources) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(0)}, + {V(1), V(2), I(0), I(1), V(0)}, + {V(2), V(3), I(0), I(2), V(0)}, + {V(3), V(4), I(0), I(3), V(0)}, + {V(1), V(2), I(1), I(0), V(1)}, + {V(2), V(3), I(1), I(1), V(1)}, + {V(3), V(4), I(1), I(2), V(1)}, + {V(2), V(3), I(2), I(0), V(2)}, + {V(3), V(4), I(2), I(1), V(2)}, + {V(3), V(4), I(3), I(0), V(3)}, + }); + + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + Var{"?sources"}, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, singlePathAllTargets) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(1)}, + {V(0), V(1), I(1), I(0), V(2)}, + {V(1), V(2), I(1), I(1), V(2)}, + {V(0), V(1), I(2), I(0), V(3)}, + {V(1), V(2), I(2), I(1), V(3)}, + {V(2), V(3), I(2), I(2), V(3)}, + {V(0), V(1), I(3), I(0), V(4)}, + {V(1), V(2), I(3), I(1), V(4)}, + {V(2), V(3), I(3), I(2), V(4)}, + {V(3), V(4), I(3), I(3), V(4)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + /** * Graph: * 0 @@ -212,17 +274,16 @@ TEST(PathSearchTest, twoPathsTwoTargets) { TEST(PathSearchTest, cycle) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}}); auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0)}, - {V(0), V(1), I(1), I(0)}, - {V(1), V(2), I(1), I(1)}, + {V(0), V(1), I(0), I(0), V(1)}, + {V(0), V(1), I(1), I(0), V(2)}, + {V(1), V(2), I(1), I(1), V(2)}, }); std::vector sources{V(0)}; - std::vector targets{}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, sources, - targets, + Var{"?targets"}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, @@ -245,19 +306,18 @@ TEST(PathSearchTest, cycle) { TEST(PathSearchTest, twoCycle) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}, {1, 3}, {3, 0}}); auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0)}, - {V(0), V(1), I(1), I(0)}, - {V(1), V(3), I(1), I(1)}, - {V(0), V(1), I(2), I(0)}, - {V(1), V(2), I(2), I(1)}, + {V(0), V(1), I(0), I(0), V(1)}, + {V(0), V(1), I(1), I(0), V(3)}, + {V(1), V(3), I(1), I(1), V(3)}, + {V(0), V(1), I(2), I(0), V(2)}, + {V(1), V(2), I(2), I(1), V(2)} }); std::vector sources{V(0)}; - std::vector targets{}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, sources, - targets, + Var{"?targets"}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, @@ -281,22 +341,21 @@ TEST(PathSearchTest, twoCycle) { TEST(PathSearchTest, allPaths) { auto sub = makeIdTableFromVector({{0, 1}, {0, 2}, {1, 3}, {2, 3}, {2, 4}}); auto expected = makeIdTableFromVector({ - {V(0), V(2), I(0), I(0)}, - {V(0), V(2), I(1), I(0)}, - {V(2), V(4), I(1), I(1)}, - {V(0), V(2), I(2), I(0)}, - {V(2), V(3), I(2), I(1)}, - {V(0), V(1), I(3), I(0)}, - {V(0), V(1), I(4), I(0)}, - {V(1), V(3), I(4), I(1)}, + {V(0), V(2), I(0), I(0), V(2)}, + {V(0), V(2), I(1), I(0), V(4)}, + {V(2), V(4), I(1), I(1), V(4)}, + {V(0), V(2), I(2), I(0), V(3)}, + {V(2), V(3), I(2), I(1), V(3)}, + {V(0), V(1), I(3), I(0), V(1)}, + {V(0), V(1), I(4), I(0), V(3)}, + {V(1), V(3), I(4), I(1), V(3)}, }); std::vector sources{V(0)}; - std::vector targets{}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, sources, - targets, + Var{"?targets"}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, @@ -315,24 +374,23 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { {2, 3, 40, 41}, {2, 4, 50, 51}}); auto expected = makeIdTableFromVector({ - {V(0), V(2), I(0), I(0), V(31), V(30)}, - {V(0), V(2), I(1), I(0), V(31), V(30)}, - {V(2), V(4), I(1), I(1), V(51), V(50)}, - {V(0), V(2), I(2), I(0), V(31), V(30)}, - {V(2), V(3), I(2), I(1), V(41), V(40)}, - {V(0), V(1), I(3), I(0), V(11), V(10)}, - {V(0), V(1), I(4), I(0), V(11), V(10)}, - {V(1), V(3), I(4), I(1), V(21), V(20)}, + {V(0), V(2), I(0), I(0), V(2), V(31), V(30)}, + {V(0), V(2), I(1), I(0), V(4), V(31), V(30)}, + {V(2), V(4), I(1), I(1), V(4), V(51), V(50)}, + {V(0), V(2), I(2), I(0), V(3), V(31), V(30)}, + {V(2), V(3), I(2), I(1), V(3), V(41), V(40)}, + {V(0), V(1), I(3), I(0), V(1), V(11), V(10)}, + {V(0), V(1), I(4), I(0), V(3), V(11), V(10)}, + {V(1), V(3), I(4), I(1), V(3), V(21), V(20)}, }); std::vector sources{V(0)}; - std::vector targets{}; Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty1"}, Variable{"?edgeProperty2"}}; PathSearchConfiguration config{ PathSearchAlgorithm::ALL_PATHS, sources, - targets, + Var{"?targets"}, Var{"?start"}, Var{"?end"}, Var{"?edgeIndex"}, @@ -352,25 +410,53 @@ TEST(PathSearchTest, allPathsWithPropertiesSwitched) { * | \ * 1->2->3 */ -TEST(PathSearchTest, allPathsPartial) { +TEST(PathSearchTest, allPathsPartialAllTargets) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 2}, {2, 3}}); auto expected = makeIdTableFromVector({ - {V(0), V(2), I(0), I(0)}, - {V(0), V(2), I(1), I(0)}, - {V(2), V(3), I(1), I(1)}, - {V(0), V(1), I(2), I(0)}, - {V(0), V(1), I(3), I(0)}, - {V(1), V(2), I(3), I(1)}, - {V(0), V(1), I(4), I(0)}, - {V(1), V(2), I(4), I(1)}, - {V(2), V(3), I(4), I(2)}, + {V(0), V(2), I(0), I(0), V(2)}, + {V(0), V(2), I(1), I(0), V(3)}, + {V(2), V(3), I(1), I(1), V(3)}, + {V(0), V(1), I(2), I(0), V(1)}, + {V(0), V(1), I(3), I(0), V(2)}, + {V(1), V(2), I(3), I(1), V(2)}, + {V(0), V(1), I(4), I(0), V(3)}, + {V(1), V(2), I(4), I(1), V(3)}, + {V(2), V(3), I(4), I(2), V(3)}, }); std::vector sources{V(0)}; - std::vector targets{}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, allPathsPartialAllSources) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 2}, {2, 3}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0), V(0)}, + {V(2), V(3), I(0), I(1), V(0)}, + {V(0), V(1), I(1), I(0), V(0)}, + {V(1), V(2), I(1), I(1), V(0)}, + {V(2), V(3), I(1), I(2), V(0)}, + {V(1), V(2), I(2), I(0), V(1)}, + {V(2), V(3), I(2), I(1), V(1)}, + {V(2), V(3), I(3), I(0), V(2)}, + }); + + std::vector targets{V(3)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + Var{"?sources"}, targets, Var{"?start"}, Var{"?end"}, From e91adf21bafd17f5c5fbc05f77bdaadecd9478b5 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 2 Oct 2024 11:49:04 +0200 Subject: [PATCH 79/96] Fixed lifetime issue with empty sources --- src/engine/PathSearch.cpp | 37 +++++++++++++++++++++++++++++++------ src/engine/PathSearch.h | 4 ++-- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 5e9d507e6d..1214b28bfd 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -5,6 +5,8 @@ #include "PathSearch.h" #include +#include +#include #include #include #include @@ -39,8 +41,25 @@ std::vector BinSearchWrapper::outgoingEdes(const Id node) const { } // _____________________________________________________________________________ -std::span BinSearchWrapper::getSources() const { - return table_.getColumn(startCol_); +std::vector BinSearchWrapper::getSources() const { + auto startIds = table_.getColumn(startCol_); + // std::vector sources; + // std::ranges::unique_copy(startIds, std::back_inserter(sources)); + // + // return sources; auto startIds = table_.getColumn(startCol_); + std::vector sources; + + size_t index = 0; + Id lastId; + while (index < startIds.size()) { + lastId = startIds[index]; + sources.push_back(lastId); + while (lastId == startIds[index]) { + index++; + } + } + + return sources; } // _____________________________________________________________________________ @@ -219,13 +238,19 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto buildingTime = timer.msecs(); timer.start(); - auto [sources, targets] = handleSearchSides(binSearch); + auto [sources, targets] = handleSearchSides(); timer.stop(); auto sideTime = timer.msecs(); timer.start(); - auto paths = allPaths(sources, targets, binSearch, config_.cartesian_); + std::vector paths; + if (sources.empty()) { + paths = allPaths(binSearch.getSources(), targets, binSearch, config_.cartesian_); + } else { + paths = allPaths(sources, targets, binSearch, config_.cartesian_); + } + timer.stop(); auto searchTime = timer.msecs(); @@ -255,7 +280,7 @@ VariableToColumnMap PathSearch::computeVariableToColumnMap() const { // _____________________________________________________________________________ std::pair, std::span> -PathSearch::handleSearchSides(const BinSearchWrapper& binSearch) const { +PathSearch::handleSearchSides() const { std::span sourceIds; std::span targetIds; @@ -270,7 +295,7 @@ PathSearch::handleSearchSides(const BinSearchWrapper& binSearch) const { sourceIds = sourceTree_.value()->getResult()->idTable().getColumn( sourceCol_.value()); } else if (config_.sourceIsVariable()) { - sourceIds = binSearch.getSources(); + sourceIds = {}; } else { sourceIds = std::get>(config_.sources_); } diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index df6865e9d7..b949698d52 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -74,7 +74,7 @@ class BinSearchWrapper { * the search starts from all possible sources (i.e. all * start nodes). Returns only unique start nodes. */ - std::span getSources() const; + std::vector getSources() const; private: Edge makeEdgeFromRow(size_t row) const; @@ -245,7 +245,7 @@ class PathSearch : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; private: - std::pair, std::span> handleSearchSides(const pathSearch::BinSearchWrapper& binSearch) const; + std::pair, std::span> handleSearchSides() const; /** * @brief Finds paths based on the configured algorithm. From dcc5925e6a15f9427db1fbc9df8d701009504d86 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 2 Oct 2024 12:24:13 +0200 Subject: [PATCH 80/96] Improved error message --- src/parser/sparqlParser/SparqlQleverVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index a78b0d2195..37f3a1e4ac 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -731,7 +731,7 @@ GraphPatternOperation Visitor::visitPathQuery( throw parsedQuery::PathSearchException( "Unsupported element in pathSearch." "PathQuery may only consist of triples for configuration" - "And a subquery specifying edges."); + "And a { group graph pattern } specifying edges."); } }; From 54d40a9284d081266ab7056271f1eaef9a5c8b6b Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 2 Oct 2024 12:24:26 +0200 Subject: [PATCH 81/96] Added allocator at important points --- src/engine/PathSearch.cpp | 16 ++++++++-------- src/engine/PathSearch.h | 13 +++++++++---- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 1214b28bfd..e6308edd83 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -244,7 +244,7 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { auto sideTime = timer.msecs(); timer.start(); - std::vector paths; + PathsLimited paths{allocator()}; if (sources.empty()) { paths = allPaths(binSearch.getSources(), targets, binSearch, config_.cartesian_); } else { @@ -313,13 +313,13 @@ PathSearch::handleSearchSides() const { } // _____________________________________________________________________________ -std::vector PathSearch::findPaths( +PathsLimited PathSearch::findPaths( const Id& source, const std::unordered_set& targets, const BinSearchWrapper& binSearch) const { std::vector edgeStack; - Path currentPath; + Path currentPath{EdgesLimited(allocator())}; std::unordered_map> pathCache; - std::vector result; + PathsLimited result{allocator()}; std::unordered_set visited; visited.insert(source.getBits()); @@ -355,12 +355,12 @@ std::vector PathSearch::findPaths( } // _____________________________________________________________________________ -std::vector PathSearch::allPaths(std::span sources, +PathsLimited PathSearch::allPaths(std::span sources, std::span targets, const BinSearchWrapper& binSearch, bool cartesian) const { - std::vector paths; - Path path; + PathsLimited paths{allocator()}; + Path path{EdgesLimited(allocator())}; if (cartesian || sources.size() != targets.size()) { std::unordered_set targetSet; @@ -386,7 +386,7 @@ std::vector PathSearch::allPaths(std::span sources, // _____________________________________________________________________________ template void PathSearch::pathsToResultTable(IdTable& tableDyn, - std::vector& paths) const { + PathsLimited& paths) const { IdTableStatic table = std::move(tableDyn).toStatic(); std::vector edgePropertyCols; diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index b949698d52..fef63faa09 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -12,6 +12,7 @@ #include "engine/Operation.h" #include "global/Id.h" +#include "util/AllocatorWithLimit.h" enum class PathSearchAlgorithm { ALL_PATHS }; @@ -30,8 +31,10 @@ struct Edge { std::vector edgeProperties_; }; +using EdgesLimited = std::vector>; + struct Path { - std::vector edges_; + EdgesLimited edges_; bool empty() const { return edges_.empty(); } @@ -44,6 +47,8 @@ struct Path { const Id& end() { return edges_.back().end_; } }; +using PathsLimited = std::vector>; + /** * @class BinSearchWrapper * @brief Encapsulates logic for binary search of edges in @@ -251,7 +256,7 @@ class PathSearch : public Operation { * @brief Finds paths based on the configured algorithm. * @return A vector of paths. */ - std::vector findPaths(const Id& source, + pathSearch::PathsLimited findPaths(const Id& source, const std::unordered_set& targets, const pathSearch::BinSearchWrapper& binSearch) const; @@ -259,7 +264,7 @@ class PathSearch : public Operation { * @brief Finds all paths in the graph. * @return A vector of all paths. */ - std::vector allPaths(std::span sources, + pathSearch::PathsLimited allPaths(std::span sources, std::span targets, const pathSearch::BinSearchWrapper& binSearch, bool cartesian) const; @@ -270,5 +275,5 @@ class PathSearch : public Operation { * @param paths The vector of paths to convert. */ template - void pathsToResultTable(IdTable& tableDyn, std::vector& paths) const; + void pathsToResultTable(IdTable& tableDyn, pathSearch::PathsLimited& paths) const; }; From d26da4d2d71c9ff431363f688395dd97c89426a8 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 2 Oct 2024 12:25:49 +0200 Subject: [PATCH 82/96] Format fix --- src/engine/PathSearch.cpp | 20 ++++++++++---------- src/engine/PathSearch.h | 15 ++++++++------- src/engine/QueryPlanner.cpp | 2 +- src/parser/GraphPatternOperation.h | 3 ++- test/PathSearchTest.cpp | 12 +++++------- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index e6308edd83..873556099b 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -4,9 +4,9 @@ #include "PathSearch.h" -#include #include #include +#include #include #include #include @@ -246,12 +246,12 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { PathsLimited paths{allocator()}; if (sources.empty()) { - paths = allPaths(binSearch.getSources(), targets, binSearch, config_.cartesian_); + paths = allPaths(binSearch.getSources(), targets, binSearch, + config_.cartesian_); } else { paths = allPaths(sources, targets, binSearch, config_.cartesian_); } - timer.stop(); auto searchTime = timer.msecs(); timer.start(); @@ -313,9 +313,9 @@ PathSearch::handleSearchSides() const { } // _____________________________________________________________________________ -PathsLimited PathSearch::findPaths( - const Id& source, const std::unordered_set& targets, - const BinSearchWrapper& binSearch) const { +PathsLimited PathSearch::findPaths(const Id& source, + const std::unordered_set& targets, + const BinSearchWrapper& binSearch) const { std::vector edgeStack; Path currentPath{EdgesLimited(allocator())}; std::unordered_map> pathCache; @@ -356,9 +356,9 @@ PathsLimited PathSearch::findPaths( // _____________________________________________________________________________ PathsLimited PathSearch::allPaths(std::span sources, - std::span targets, - const BinSearchWrapper& binSearch, - bool cartesian) const { + std::span targets, + const BinSearchWrapper& binSearch, + bool cartesian) const { PathsLimited paths{allocator()}; Path path{EdgesLimited(allocator())}; @@ -390,7 +390,7 @@ void PathSearch::pathsToResultTable(IdTable& tableDyn, IdTableStatic table = std::move(tableDyn).toStatic(); std::vector edgePropertyCols; - for (const auto& edgeVar: config_.edgeProperties_) { + for (const auto& edgeVar : config_.edgeProperties_) { auto edgePropertyCol = variableColumns_.at(edgeVar).columnIndex_; edgePropertyCols.push_back(edgePropertyCol); } diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index fef63faa09..422fd233c6 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -256,17 +256,17 @@ class PathSearch : public Operation { * @brief Finds paths based on the configured algorithm. * @return A vector of paths. */ - pathSearch::PathsLimited findPaths(const Id& source, - const std::unordered_set& targets, - const pathSearch::BinSearchWrapper& binSearch) const; + pathSearch::PathsLimited findPaths( + const Id& source, const std::unordered_set& targets, + const pathSearch::BinSearchWrapper& binSearch) const; /** * @brief Finds all paths in the graph. * @return A vector of all paths. */ - pathSearch::PathsLimited allPaths(std::span sources, - std::span targets, - const pathSearch::BinSearchWrapper& binSearch, bool cartesian) const; + pathSearch::PathsLimited allPaths( + std::span sources, std::span targets, + const pathSearch::BinSearchWrapper& binSearch, bool cartesian) const; /** * @brief Converts paths to a result table with a specified width. @@ -275,5 +275,6 @@ class PathSearch : public Operation { * @param paths The vector of paths to convert. */ template - void pathsToResultTable(IdTable& tableDyn, pathSearch::PathsLimited& paths) const; + void pathsToResultTable(IdTable& tableDyn, + pathSearch::PathsLimited& paths) const; }; diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 5e53391061..fcbc37a278 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -36,9 +36,9 @@ #include "engine/TransitivePathBase.h" #include "engine/Union.h" #include "engine/Values.h" -#include "global/Id.h" #include "engine/sparqlExpressions/LiteralExpression.h" #include "engine/sparqlExpressions/RelationalExpressions.h" +#include "global/Id.h" #include "parser/Alias.h" #include "parser/SparqlParserHelpers.h" #include "util/Exception.h" diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index 0673a6b53f..6367d4e510 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -148,7 +148,8 @@ class PathSearchException : public std::exception { std::string message_; public: - explicit PathSearchException(const std::string& message) : message_(message) {} + explicit PathSearchException(const std::string& message) + : message_(message) {} const char* what() const noexcept override { return message_.data(); } }; diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index a79473454f..326495b58d 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -305,13 +305,11 @@ TEST(PathSearchTest, cycle) { */ TEST(PathSearchTest, twoCycle) { auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}, {1, 3}, {3, 0}}); - auto expected = makeIdTableFromVector({ - {V(0), V(1), I(0), I(0), V(1)}, - {V(0), V(1), I(1), I(0), V(3)}, - {V(1), V(3), I(1), I(1), V(3)}, - {V(0), V(1), I(2), I(0), V(2)}, - {V(1), V(2), I(2), I(1), V(2)} - }); + auto expected = makeIdTableFromVector({{V(0), V(1), I(0), I(0), V(1)}, + {V(0), V(1), I(1), I(0), V(3)}, + {V(1), V(3), I(1), I(1), V(3)}, + {V(0), V(1), I(2), I(0), V(2)}, + {V(1), V(2), I(2), I(1), V(2)}}); std::vector sources{V(0)}; Vars vars = {Variable{"?start"}, Variable{"?end"}}; From 5c9c8c0a5d2e1a562c8307afb9a0f06c382ac365 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 2 Oct 2024 16:10:45 +0200 Subject: [PATCH 83/96] Fix lifetime issue for certain platforms --- src/engine/PathSearch.cpp | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 873556099b..f953e44857 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -43,21 +43,8 @@ std::vector BinSearchWrapper::outgoingEdes(const Id node) const { // _____________________________________________________________________________ std::vector BinSearchWrapper::getSources() const { auto startIds = table_.getColumn(startCol_); - // std::vector sources; - // std::ranges::unique_copy(startIds, std::back_inserter(sources)); - // - // return sources; auto startIds = table_.getColumn(startCol_); std::vector sources; - - size_t index = 0; - Id lastId; - while (index < startIds.size()) { - lastId = startIds[index]; - sources.push_back(lastId); - while (lastId == startIds[index]) { - index++; - } - } + std::ranges::unique_copy(startIds, std::back_inserter(sources)); return sources; } @@ -245,12 +232,12 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { timer.start(); PathsLimited paths{allocator()}; + std::vector allSources; if (sources.empty()) { - paths = allPaths(binSearch.getSources(), targets, binSearch, - config_.cartesian_); - } else { - paths = allPaths(sources, targets, binSearch, config_.cartesian_); + allSources = binSearch.getSources(); + sources = allSources; } + paths = allPaths(sources, targets, binSearch, config_.cartesian_); timer.stop(); auto searchTime = timer.msecs(); From 73f4ead7c249fa0fb893c85c7f18dbfb174ca9e4 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 4 Oct 2024 09:54:44 +0200 Subject: [PATCH 84/96] Added tests for multi source query planning --- test/QueryPlannerTest.cpp | 103 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index f1c355d6cc..1cd71832da 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -840,6 +840,109 @@ TEST(QueryPlanner, PathSearchMultipleTargets) { h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); } +TEST(QueryPlanner, PathSearchMultipleSourcesAndTargets) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId(""), getId("")}; + std::vector targets{getId(""), getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} + +TEST(QueryPlanner, PathSearchMultipleSourcesAndTargetsCartesian) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId(""), getId("")}; + std::vector targets{getId(""), getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:cartesian true;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} +TEST(QueryPlanner, PathSearchMultipleSourcesAndTargetsNonCartesian) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId(""), getId("")}; + std::vector targets{getId(""), getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}, false}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:cartesian false;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} + TEST(QueryPlanner, PathSearchWithEdgeProperties) { auto scan = h::IndexScanFromStrings; auto join = h::Join; From 57ffd2ac149162220e8b4fd80bde77c2b8cf39af Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 4 Oct 2024 10:03:52 +0200 Subject: [PATCH 85/96] format fix --- test/QueryPlannerTest.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 1cd71832da..9c101fdc69 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -842,7 +842,8 @@ TEST(QueryPlanner, PathSearchMultipleTargets) { TEST(QueryPlanner, PathSearchMultipleSourcesAndTargets) { auto scan = h::IndexScanFromStrings; - auto qec = ad_utility::testing::getQec("

.

.

"); + auto qec = + ad_utility::testing::getQec("

.

.

"); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); std::vector sources{getId(""), getId("")}; @@ -876,7 +877,8 @@ TEST(QueryPlanner, PathSearchMultipleSourcesAndTargets) { TEST(QueryPlanner, PathSearchMultipleSourcesAndTargetsCartesian) { auto scan = h::IndexScanFromStrings; - auto qec = ad_utility::testing::getQec("

.

.

"); + auto qec = + ad_utility::testing::getQec("

.

.

"); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); std::vector sources{getId(""), getId("")}; @@ -910,7 +912,8 @@ TEST(QueryPlanner, PathSearchMultipleSourcesAndTargetsCartesian) { } TEST(QueryPlanner, PathSearchMultipleSourcesAndTargetsNonCartesian) { auto scan = h::IndexScanFromStrings; - auto qec = ad_utility::testing::getQec("

.

.

"); + auto qec = + ad_utility::testing::getQec("

.

.

"); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); std::vector sources{getId(""), getId("")}; @@ -922,7 +925,8 @@ TEST(QueryPlanner, PathSearchMultipleSourcesAndTargetsNonCartesian) { Variable("?end"), Variable("?path"), Variable("?edge"), - {}, false}; + {}, + false}; h::expect( "PREFIX pathSearch: " "SELECT ?start ?end ?path ?edge WHERE {" From 7aaee345d7127834f9ea7d339fbbe4b3607e1195 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 4 Oct 2024 10:13:32 +0200 Subject: [PATCH 86/96] Sonar fixes --- src/engine/PathSearch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 422fd233c6..a43786e75b 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -186,14 +186,14 @@ class PathSearch : public Operation { if (!config_.sourceIsVariable()) { return std::nullopt; } - auto sourceVar = std::get(config_.sources_); + const auto& sourceVar = std::get(config_.sources_); return variableColumns_.at(sourceVar).columnIndex_; } std::optional getTargetIndex() const { if (!config_.targetIsVariable()) { return std::nullopt; } - auto targetVar = std::get(config_.targets_); + const auto& targetVar = std::get(config_.targets_); return variableColumns_.at(targetVar).columnIndex_; } From 8cb4c0404afc957eb29ff0dc6ab90869391a9764 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 4 Oct 2024 12:14:01 +0200 Subject: [PATCH 87/96] Added PathSearch documentation --- docs/path_search.md | 290 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 docs/path_search.md diff --git a/docs/path_search.md b/docs/path_search.md new file mode 100644 index 0000000000..10ae4e0f51 --- /dev/null +++ b/docs/path_search.md @@ -0,0 +1,290 @@ +# Path Search Feature Documentation for SPARQL Engine + +## Overview + +The Path Search feature in this SPARQL engine allows users to perform advanced queries +to find paths between sources and targets in a graph. It supports a variety of configurations, +including single or multiple source and target nodes, optional edge properties, and +custom algorithms for path discovery. This feature is accessed using the `SERVICE` keyword +and the service IRI ``. + +## Basic Syntax + +The general structure of a Path Search query is as follows: + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; # Specify the algorithm + pathSearch:source ; # Specify the source node(s) + pathSearch:target ; # Specify the target node(s) + pathSearch:pathColumn ?path ; # Bind the path variable + pathSearch:edgeColumn ?edge ; # Bind the edge variable + pathSearch:start ?start ; # Bind the edge start variable + pathSearch:end ?end ; # Bind the edge end variable + {SELECT * WHERE { + ?start ?end. # Define the edge pattern + }} + } +} +``` + +### Parameters + +- **pathSearch:algorithm**: Defines the algorithm used to search paths. Currently, only `pathSearch:allPaths` is supported. +- **pathSearch:source**: Defines the source node(s) of the search. +- **pathSearch:target** (optional): Defines the target node(s) of the search. +- **pathSearch:pathColumn**: Defines the variable for the path. +- **pathSearch:edgeColumn**: Defines the variable for the edge. +- **pathSearch:start**: Defines the variable for the start of the edges. +- **pathSearch:end**: Defines the variable for the end of the edges. +- **pathSearch:edgeProperty** (optional): Specifies properties for the edges in the path. +- **pathSearch:cartesian** (optional): Controls the behaviour of path searches between + source and target nodes. Expects a boolean. The default is `true`. + - If set to `true`, the search will compute the paths from each source to **all targets** + - If set to `false`, the search will compute the paths from each source to exactly + **one target**. Sources and targets are paired based on their index (i.e. the paths + from the first source to the first target are searched, then the second source and + target, and so on). + + +### Example 1: Single Source and Target + +The simplest case is searching for paths between a single source and a single target: + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start ?end. + } + } + } +} +``` + +### Example 2: Multiple Sources or Targets + +It is possible to specify a set of sources or targets for the path search. + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:source ; + pathSearch:target ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start ?end. + } + } + } +} +``` + +This query will search forall between all sources and all targets, i.e. +- (``, ``) +- (``, ``) +- (``, ``) +- (``, ``) + +It is possible to specify, whether the sources and targets should be combined according +to the cartesian product (as seen above) or if they should be matched up pairwise, i.e. +- (``, ``) +- (``, ``) + +This can be done with the parameter `pathSearch:cartesian`. This parameter expects a +boolean. If set to `true`, then the cartesian product is used to match the sources with +the targets. +If set to `false`, then the sources and targets are matched pairwise. If left +unspecified, then the default `true` is used. + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:source ; + pathSearch:target ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + pathSearch:cartesian false; + { + SELECT * WHERE { + ?start ?end. + } + } + } +} +``` + +### Example 3: Edge Properties + +You can also include edge properties in the path search to further refine the results: + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:edgeProperty ?middle ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start ?middle. + ?middle ?end. + } + } + } +} +``` + +This is esecially useful for [N-ary relations](https://www.w3.org/TR/swbp-n-aryRelations/). +Considering the example above, it is possible to query additional relations of `?middle`: + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:edgeProperty ?middle ; + pathSearch:edgeProperty ?edgeInfo ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start ?middle. + ?middle ?end. + ?middle ?edgeInfo. + } + } + } +} +``` + +This makes it possible to query additional properties of the edge between `?start` and `?end` (such as `?edgeInfo` in the example above). + + +### Example 4: Source or Target as Variables + +You can also bind the source and/or target dynamically using variables. The examples +below use `VALUES` clauses, which can be convenient to specify sources and targets. +However, the source/target variables can also be bound using any regular SPARQL construct. + +#### Source Variable + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + VALUES ?source {} + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ?source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start

?end. + } + } + } +} +``` + +#### Target Variable + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + VALUES ?target {} + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ?target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start

?end. + } + } + } +} +``` + +## Error Handling + +The Path Search feature will throw errors in the following scenarios: + +- **Missing Start Parameter**: If the `start` parameter is not specified, an error will be raised. +- **Multiple Start or End Variables**: If multiple `start` or `end` variables are defined, an error is raised. +- **Invalid Non-Variable Start/End**: If the `start` or `end` parameter is not bound to a variable, the query will fail. +- **Unsupported Argument**: Arguments other than those listed (like custom user arguments) will cause an error. +- **Non-IRI Predicate**: Predicates must be IRIs. If not, an error will occur. + +### Example: Missing Start Parameter + +```sparql +PREFIX pathSearch: +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:end ?end ; # Missing start + { + SELECT * WHERE { + ?start

?end. + } + } + } +} +``` + +This query would fail with a "Missing parameter 'start'" error. + From bf2f3a4130fe7a708e3ae3bcaaa3f74b356c500c Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 4 Oct 2024 18:07:37 +0200 Subject: [PATCH 88/96] Added PathTests for bound path search --- test/PathSearchTest.cpp | 107 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp index 326495b58d..da8bd31c94 100644 --- a/test/PathSearchTest.cpp +++ b/test/PathSearchTest.cpp @@ -613,3 +613,110 @@ TEST(PathSearchTest, multiSourceMultiTargetallPathsNotCartesian) { ASSERT_THAT(resultTable.idTable(), ::testing::UnorderedElementsAreArray(expected)); } + +TEST(PathSearchTest, sourceBound) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto sourceTable = makeIdTableFromVector({{0}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(0)}, + {V(1), V(2), I(0), I(1), V(0)}, + {V(2), V(3), I(0), I(2), V(0)}, + {V(3), V(4), I(0), I(3), V(0)}, + }); + + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + Var{"?source"}, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(sub), vars); + auto pathSearch = PathSearch(qec, std::move(subtree), std::move(config)); + + Vars sourceTreeVars = {Var{"?source"}}; + auto sourceTree = ad_utility::makeExecutionTree( + qec, std::move(sourceTable), sourceTreeVars); + pathSearch.bindSourceSide(sourceTree, 0); + + auto resultTable = pathSearch.computeResult(false); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, targetBound) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto targetTable = makeIdTableFromVector({{4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(4)}, + {V(1), V(2), I(0), I(1), V(4)}, + {V(2), V(3), I(0), I(2), V(4)}, + {V(3), V(4), I(0), I(3), V(4)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?target"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(sub), vars); + auto pathSearch = PathSearch(qec, std::move(subtree), std::move(config)); + + Vars targetTreeVars = {Var{"?target"}}; + auto targetTree = ad_utility::makeExecutionTree( + qec, std::move(targetTable), targetTreeVars); + pathSearch.bindTargetSide(targetTree, 0); + + auto resultTable = pathSearch.computeResult(false); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, sourceAndTargetBound) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto sideTable = makeIdTableFromVector({{0, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(0), V(4)}, + {V(1), V(2), I(0), I(1), V(0), V(4)}, + {V(2), V(3), I(0), I(2), V(0), V(4)}, + {V(3), V(4), I(0), I(3), V(0), V(4)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + Var{"?source"}, + Var{"?target"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(sub), vars); + auto pathSearch = PathSearch(qec, std::move(subtree), std::move(config)); + + Vars sideTreeVars = {Var{"?source"}, Var{"?target"}}; + auto sideTree = ad_utility::makeExecutionTree( + qec, std::move(sideTable), sideTreeVars); + pathSearch.bindSourceAndTargetSide(sideTree, 0, 1); + + auto resultTable = pathSearch.computeResult(false); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} From bc4732f2daf159a531f16cbec357ed513fb7360c Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 15 Oct 2024 11:53:47 +0200 Subject: [PATCH 89/96] Fixed merge and format --- src/engine/CMakeLists.txt | 4 ++-- test/QueryPlannerTest.cpp | 2 +- test/QueryPlannerTestHelpers.h | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 85537cee0f..7540b1bbac 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -12,7 +12,7 @@ add_library(engine Values.cpp Bind.cpp Minus.cpp RuntimeInformation.cpp CheckUsePatternTrick.cpp VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp - TextLimit.cpp LocalVocabEntry.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp - SpatialJoin.cpp CountConnectedSubgraphs.cpp PathSearch.cpp) + TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp + CountConnectedSubgraphs.cpp PathSearch.cpp) qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2) diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 95879fc0cf..1a0a01dbcf 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -6,8 +6,8 @@ #include "QueryPlannerTestHelpers.h" #include "engine/QueryPlanner.h" -#include "parser/GraphPatternOperation.h" #include "engine/SpatialJoin.h" +#include "parser/GraphPatternOperation.h" #include "parser/SparqlParser.h" #include "parser/data/Variable.h" #include "util/TripleComponentTestHelpers.h" diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index 63e6b20376..98a8db78d6 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -7,9 +7,8 @@ #include #include -#include - #include +#include #include "./util/GTestHelpers.h" #include "engine/Bind.h" From dd76bdb5e28567b9ba156dff65ae30e7f2022a1b Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 15 Oct 2024 13:12:14 +0200 Subject: [PATCH 90/96] Added allocators to visited and path cache --- src/engine/PathSearch.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index f953e44857..ae86ba1c9c 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -5,6 +5,7 @@ #include "PathSearch.h" #include +#include #include #include #include @@ -14,6 +15,7 @@ #include "engine/CallFixedSize.h" #include "engine/QueryExecutionTree.h" #include "engine/VariableToColumnMap.h" +#include "util/AllocatorWithLimit.h" using namespace pathSearch; @@ -305,9 +307,14 @@ PathsLimited PathSearch::findPaths(const Id& source, const BinSearchWrapper& binSearch) const { std::vector edgeStack; Path currentPath{EdgesLimited(allocator())}; - std::unordered_map> pathCache; + std::unordered_map< + uint64_t, PathsLimited, std::hash, std::equal_to, + ad_utility::AllocatorWithLimit>> + pathCache{allocator()}; PathsLimited result{allocator()}; - std::unordered_set visited; + std::unordered_set, std::equal_to, + ad_utility::AllocatorWithLimit> + visited{allocator()}; visited.insert(source.getBits()); for (auto edge : binSearch.outgoingEdes(source)) { From 42be542995c7e7c7b877a6318d0bb40f1ec528be Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 15 Oct 2024 13:16:41 +0200 Subject: [PATCH 91/96] Added cancellation checks --- src/engine/PathSearch.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index ae86ba1c9c..f3dc646aac 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -322,6 +322,7 @@ PathsLimited PathSearch::findPaths(const Id& source, } while (!edgeStack.empty()) { + checkCancellation(); auto edge = edgeStack.back(); edgeStack.pop_back(); @@ -404,6 +405,7 @@ void PathSearch::pathsToResultTable(IdTable& tableDyn, } for (size_t edgeIndex = 0; edgeIndex < path.size(); edgeIndex++) { + checkCancellation(); auto edge = path.edges_[edgeIndex]; table.emplace_back(); table(rowIndex, getStartIndex()) = edge.start_; From dcdeb033fb372471a5cbf618e609edbfc0202de2 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 15 Oct 2024 14:02:56 +0200 Subject: [PATCH 92/96] Fixed test --- test/QueryPlannerTest.cpp | 6 +++--- test/QueryPlannerTestHelpers.h | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 1a0a01dbcf..bd10b3534a 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -998,7 +998,7 @@ TEST(QueryPlanner, PathSearchWithEdgeProperties) { TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { auto scan = h::IndexScanFromStrings; - auto join = h::Join; + auto join = h::UnorderedJoins; auto qec = ad_utility::testing::getQec( " ." " ." @@ -1041,8 +1041,8 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { h::PathSearch( config, true, true, h::Sort(join(scan("?start", "", "?middle"), - join(scan("?middle", "", "?middleAttribute"), - scan("?middle", "", "?end"))))), + scan("?middle", "", "?middleAttribute"), + scan("?middle", "", "?end")))), qec); } diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index 98a8db78d6..2457284c87 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -315,8 +315,7 @@ inline auto PathSearch = [](PathSearchConfiguration config, bool sourceBound, bool targetBound, const std::same_as auto&... childMatchers) { return RootOperation<::PathSearch>(AllOf( - Property("getChildren", &Operation::getChildren, - ElementsAre(Pointee(childMatchers)...)), + children(childMatchers...), AD_PROPERTY(PathSearch, getConfig, PathSearchConfigMatcher(config)), AD_PROPERTY(PathSearch, isSourceBound, Eq(sourceBound)), AD_PROPERTY(PathSearch, isTargetBound, Eq(targetBound)))); From 79a8bcda44021560e770c95aef745732e22d4b0b Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 15 Oct 2024 14:05:18 +0200 Subject: [PATCH 93/96] format fix --- test/QueryPlannerTest.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index bd10b3534a..029c3ad4ee 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -1038,11 +1038,10 @@ TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { "?middle ?middleAttribute." "?middle ?end." "}}}}", - h::PathSearch( - config, true, true, - h::Sort(join(scan("?start", "", "?middle"), - scan("?middle", "", "?middleAttribute"), - scan("?middle", "", "?end")))), + h::PathSearch(config, true, true, + h::Sort(join(scan("?start", "", "?middle"), + scan("?middle", "", "?middleAttribute"), + scan("?middle", "", "?end")))), qec); } From c710553c81a5e5dffd172e61d12519520a2ac601 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 16 Oct 2024 00:28:10 +0200 Subject: [PATCH 94/96] Replaced std::vector in Edge with row index --- src/engine/PathSearch.cpp | 26 +++++++++++++++++--------- src/engine/PathSearch.h | 8 +++++--- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index f3dc646aac..af80a3769a 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -51,15 +51,22 @@ std::vector BinSearchWrapper::getSources() const { return sources; } +// _____________________________________________________________________________ +std::vector BinSearchWrapper::getEdgeProperties(const Edge& edge) const { + std::vector edgeProperties; + for (auto edgeCol : edgeCols_) { + edgeProperties.push_back(table_(edge.edgeRow_, edgeCol)); + } + return edgeProperties; +} + // _____________________________________________________________________________ Edge BinSearchWrapper::makeEdgeFromRow(size_t row) const { Edge edge; edge.start_ = table_(row, startCol_); edge.end_ = table_(row, endCol_); + edge.edgeRow_ = row; - for (auto edgeCol : edgeCols_) { - edge.edgeProperties_.push_back(table_(row, edgeCol)); - } return edge; } @@ -246,7 +253,8 @@ Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { timer.start(); CALL_FIXED_SIZE(std::array{getResultWidth()}, - &PathSearch::pathsToResultTable, this, idTable, paths); + &PathSearch::pathsToResultTable, this, idTable, paths, + binSearch); timer.stop(); auto fillTime = timer.msecs(); @@ -380,8 +388,8 @@ PathsLimited PathSearch::allPaths(std::span sources, // _____________________________________________________________________________ template -void PathSearch::pathsToResultTable(IdTable& tableDyn, - PathsLimited& paths) const { +void PathSearch::pathsToResultTable(IdTable& tableDyn, PathsLimited& paths, + const BinSearchWrapper& binSearch) const { IdTableStatic table = std::move(tableDyn).toStatic(); std::vector edgePropertyCols; @@ -421,11 +429,11 @@ void PathSearch::pathsToResultTable(IdTable& tableDyn, table(rowIndex, getTargetIndex().value()) = targetId.value(); } + auto edgeProperties = binSearch.getEdgeProperties(edge); for (size_t edgePropertyIndex = 0; - edgePropertyIndex < edge.edgeProperties_.size(); - edgePropertyIndex++) { + edgePropertyIndex < edgeProperties.size(); edgePropertyIndex++) { table(rowIndex, edgePropertyCols[edgePropertyIndex]) = - edge.edgeProperties_[edgePropertyIndex]; + edgeProperties[edgePropertyIndex]; } rowIndex++; diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index a43786e75b..9e330d1d4e 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -28,7 +28,7 @@ struct Edge { Id end_; - std::vector edgeProperties_; + size_t edgeRow_; }; using EdgesLimited = std::vector>; @@ -81,6 +81,8 @@ class BinSearchWrapper { */ std::vector getSources() const; + std::vector getEdgeProperties(const Edge& edge) const; + private: Edge makeEdgeFromRow(size_t row) const; }; @@ -275,6 +277,6 @@ class PathSearch : public Operation { * @param paths The vector of paths to convert. */ template - void pathsToResultTable(IdTable& tableDyn, - pathSearch::PathsLimited& paths) const; + void pathsToResultTable(IdTable& tableDyn, pathSearch::PathsLimited& paths, + const pathSearch::BinSearchWrapper& binSearch) const; }; From 08ef8744d131253c29989c61b397c6338467d878 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 16 Oct 2024 02:20:28 +0200 Subject: [PATCH 95/96] Added tests to improve coverage --- test/QueryPlannerTest.cpp | 53 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 029c3ad4ee..5db2454f1a 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -1430,6 +1430,59 @@ TEST(QueryPlanner, PathSearchTwoVariablesForSource) { parsedQuery::PathSearchException); } +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchUnsupportedElement) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source1 ;" + "pathSearch:source ?source2 ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "VALUES ?middle {}" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("Unsupported element in pathSearch"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchUnsupportedAlgorithm) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:shortestPath ;" + "pathSearch:source ?source1 ;" + "pathSearch:source ?source2 ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("Unsupported algorithm in pathSearch"), + parsedQuery::PathSearchException); +} + TEST(QueryPlanner, SpatialJoinViaMaxDistPredicate) { auto scan = h::IndexScanFromStrings; h::expect( From aa632a1802ca4c230493585b9b8e622d22f560fa Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 16 Oct 2024 10:58:16 +0200 Subject: [PATCH 96/96] Added source/target subtrees to cachekey --- src/engine/PathSearch.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index af80a3769a..50f10210a6 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -144,6 +144,22 @@ std::string PathSearch::getCacheKeyImpl() const { AD_CORRECTNESS_CHECK(subtree_); os << "Subtree:\n" << subtree_->getCacheKey() << '\n'; + + if (sourceTree_.has_value()) { + os << "Source Side subtree:\n" + << sourceTree_.value()->getCacheKey() << '\n'; + } + + if (targetTree_.has_value()) { + os << "Target Side subtree:\n" + << targetTree_.value()->getCacheKey() << '\n'; + } + + if (sourceAndTargetTree_.has_value()) { + os << "Source And Target Side subtree:\n" + << sourceAndTargetTree_.value()->getCacheKey() << '\n'; + } + return std::move(os).str(); };