diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 239e10dc56..403e90c342 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -7,7 +7,8 @@ add_library(engine Distinct.cpp OrderBy.cpp Filter.cpp Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp OptionalJoin.cpp CountAvailablePredicates.cpp GroupBy.cpp HasPredicateScan.cpp - Union.cpp MultiColumnJoin.cpp TransitivePath.cpp Service.cpp + Union.cpp MultiColumnJoin.cpp TransitivePathBase.cpp + TransitivePathHashMap.cpp TransitivePathBinSearch.cpp Service.cpp Values.cpp Bind.cpp Minus.cpp RuntimeInformation.cpp CheckUsePatternTrick.cpp VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index 67b6b05e0e..831193d386 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -30,7 +30,7 @@ #include "engine/Sort.h" #include "engine/TextIndexScanForEntity.h" #include "engine/TextIndexScanForWord.h" -#include "engine/TransitivePath.h" +#include "engine/TransitivePathBase.h" #include "engine/Union.h" #include "engine/Values.h" #include "engine/ValuesForTesting.h" @@ -158,7 +158,7 @@ void QueryExecutionTree::setOperation(std::shared_ptr operation) { type_ = VALUES; } else if constexpr (std::is_same_v) { type_ = SERVICE; - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { type_ = TRANSITIVE_PATH; } else if constexpr (std::is_same_v) { type_ = ORDER_BY; @@ -204,7 +204,8 @@ template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); +template void QueryExecutionTree::setOperation( + std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation( diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 38db32771a..3f56dcaeea 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -28,7 +28,7 @@ #include "engine/Sort.h" #include "engine/TextIndexScanForEntity.h" #include "engine/TextIndexScanForWord.h" -#include "engine/TransitivePath.h" +#include "engine/TransitivePathBase.h" #include "engine/Union.h" #include "engine/Values.h" #include "parser/Alias.h" @@ -435,8 +435,11 @@ std::vector QueryPlanner::optimize( right.value_ = getSideValue(arg._right); size_t min = arg._min; size_t max = arg._max; - auto plan = makeSubtreePlan(_qec, sub._qet, left, - right, min, max); + auto transitivePath = TransitivePathBase::makeTransitivePath( + _qec, std::move(sub._qet), std::move(left), std::move(right), min, + max); + auto plan = + makeSubtreePlan(std::move(transitivePath)); candidatesOut.push_back(std::move(plan)); } joinCandidates(std::move(candidatesOut)); @@ -1890,7 +1893,7 @@ auto QueryPlanner::createJoinWithTransitivePath( std::shared_ptr otherTree = aIsTransPath ? b._qet : a._qet; auto& transPathTree = aIsTransPath ? a._qet : b._qet; - auto transPathOperation = std::dynamic_pointer_cast( + auto transPathOperation = std::dynamic_pointer_cast( transPathTree->getRootOperation()); // TODO: Handle the case of two or more common variables diff --git a/src/engine/TransitivePath.cpp b/src/engine/TransitivePath.cpp deleted file mode 100644 index 073085b89a..0000000000 --- a/src/engine/TransitivePath.cpp +++ /dev/null @@ -1,564 +0,0 @@ -// Copyright 2019, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) - -#include "TransitivePath.h" - -#include - -#include "engine/CallFixedSize.h" -#include "engine/ExportQueryExecutionTrees.h" -#include "engine/IndexScan.h" -#include "util/Exception.h" - -// _____________________________________________________________________________ -TransitivePath::TransitivePath(QueryExecutionContext* qec, - std::shared_ptr child, - TransitivePathSide leftSide, - TransitivePathSide rightSide, size_t minDist, - size_t maxDist) - : Operation(qec), - subtree_(child - ? QueryExecutionTree::createSortedTree(std::move(child), {0}) - : nullptr), - lhs_(std::move(leftSide)), - rhs_(std::move(rightSide)), - minDist_(minDist), - maxDist_(maxDist) { - AD_CORRECTNESS_CHECK(qec != nullptr); - if (lhs_.isVariable()) { - variableColumns_[std::get(lhs_.value_)] = - makeAlwaysDefinedColumn(0); - } - if (rhs_.isVariable()) { - variableColumns_[std::get(rhs_.value_)] = - makeAlwaysDefinedColumn(1); - } - - lhs_.outputCol_ = 0; - rhs_.outputCol_ = 1; -} - -// _____________________________________________________________________________ -std::string TransitivePath::getCacheKeyImpl() const { - std::ostringstream os; - os << " minDist " << minDist_ << " maxDist " << maxDist_ << "\n"; - - os << "Left side:\n"; - os << lhs_.getCacheKey(); - - os << "Right side:\n"; - os << rhs_.getCacheKey(); - - AD_CORRECTNESS_CHECK(subtree_); - os << "Subtree:\n" << subtree_->getCacheKey() << '\n'; - - return std::move(os).str(); -} - -// _____________________________________________________________________________ -std::string TransitivePath::getDescriptor() const { - std::ostringstream os; - os << "TransitivePath "; - // If not full transitive hull, show interval as [min, max]. - if (minDist_ > 1 || maxDist_ < std::numeric_limits::max()) { - os << "[" << minDist_ << ", " << maxDist_ << "] "; - } - auto getName = [this](ValueId id) { - auto optStringAndType = - ExportQueryExecutionTrees::idToStringAndType(getIndex(), id, {}); - if (optStringAndType.has_value()) { - return optStringAndType.value().first; - } else { - return absl::StrCat("#", id.getBits()); - } - }; - // Left variable or entity name. - if (lhs_.isVariable()) { - os << std::get(lhs_.value_).name(); - } else { - os << getName(std::get(lhs_.value_)); - } - // The predicate. - auto scanOperation = - std::dynamic_pointer_cast(subtree_->getRootOperation()); - if (scanOperation != nullptr) { - os << " " << scanOperation->getPredicate() << " "; - } else { - // Escaped the question marks to avoid a warning about ignored trigraphs. - os << R"( )"; - } - // Right variable or entity name. - if (rhs_.isVariable()) { - os << std::get(rhs_.value_).name(); - } else { - os << getName(std::get(rhs_.value_)); - } - return std::move(os).str(); -} - -// _____________________________________________________________________________ -size_t TransitivePath::getResultWidth() const { return resultWidth_; } - -// _____________________________________________________________________________ -vector TransitivePath::resultSortedOn() const { - if (lhs_.isSortedOnInputCol()) { - return {0}; - } - if (rhs_.isSortedOnInputCol()) { - return {1}; - } - - return {}; -} - -// _____________________________________________________________________________ -VariableToColumnMap TransitivePath::computeVariableToColumnMap() const { - return variableColumns_; -} - -// _____________________________________________________________________________ -void TransitivePath::setTextLimit(size_t limit) { - for (auto child : getChildren()) { - child->setTextLimit(limit); - } -} - -// _____________________________________________________________________________ -bool TransitivePath::knownEmptyResult() { return subtree_->knownEmptyResult(); } - -// _____________________________________________________________________________ -float TransitivePath::getMultiplicity(size_t col) { - (void)col; - // The multiplicities are not known. - return 1; -} - -// _____________________________________________________________________________ -uint64_t TransitivePath::getSizeEstimateBeforeLimit() { - if (std::holds_alternative(lhs_.value_) || - std::holds_alternative(rhs_.value_)) { - // If the subject or object is fixed, assume that the number of matching - // triples is 1000. This will usually be an overestimate, but it will do the - // job of avoiding query plans that first generate large intermediate - // results and only then merge them with a triple such as this. In the - // lhs_.isVar && rhs_.isVar case below, we assume a worst-case blowup of - // 10000; see the comment there. - return 1000; - } - if (lhs_.treeAndCol_.has_value()) { - return lhs_.treeAndCol_.value().first->getSizeEstimate(); - } - if (rhs_.treeAndCol_.has_value()) { - return rhs_.treeAndCol_.value().first->getSizeEstimate(); - } - // Set costs to something very large, so that we never compute the complete - // transitive hull (unless the variables on both sides are not bound in any - // other way, so that the only possible query plan is to compute the complete - // transitive hull). - // - // NOTE: _subtree->getSizeEstimateBeforeLimit() is the number of triples of - // the predicate, for which the transitive hull operator (+) is specified. On - // Wikidata, the predicate with the largest blowup when taking the - // transitive hull is wdt:P2789 (connects with). The blowup is then from 90K - // (without +) to 110M (with +), so about 1000 times larger. - AD_CORRECTNESS_CHECK(lhs_.isVariable() && rhs_.isVariable()); - return subtree_->getSizeEstimate() * 10000; -} - -// _____________________________________________________________________________ -size_t TransitivePath::getCostEstimate() { - // We assume that the cost of computing the transitive path is proportional to - // the result size. - auto costEstimate = getSizeEstimateBeforeLimit(); - // Add the cost for the index scan of the predicate involved. - for (auto* ptr : getChildren()) { - if (ptr) { - costEstimate += ptr->getCostEstimate(); - } - } - return costEstimate; -} - -// _____________________________________________________________________________ -template -void TransitivePath::computeTransitivePathBound( - IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, const IdTable& startSideTable) const { - IdTableStatic res = std::move(*dynRes).toStatic(); - - auto [edges, nodes] = setupMapAndNodes( - dynSub, startSide, targetSide, startSideTable); - - Map hull(allocator()); - if (!targetSide.isVariable()) { - hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); - } else { - hull = transitiveHull(edges, nodes, std::nullopt); - } - - TransitivePath::fillTableWithHull( - res, hull, nodes, startSide.outputCol_, targetSide.outputCol_, - startSideTable, startSide.treeAndCol_.value().second); - - *dynRes = std::move(res).toDynamic(); -} - -// _____________________________________________________________________________ -template -void TransitivePath::computeTransitivePath( - IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const { - IdTableStatic res = std::move(*dynRes).toStatic(); - - auto [edges, nodes] = - setupMapAndNodes(dynSub, startSide, targetSide); - - Map hull{allocator()}; - if (!targetSide.isVariable()) { - hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); - } else { - hull = transitiveHull(edges, nodes, std::nullopt); - } - - TransitivePath::fillTableWithHull(res, hull, startSide.outputCol_, - targetSide.outputCol_); - - *dynRes = std::move(res).toDynamic(); -} - -// _____________________________________________________________________________ -ResultTable TransitivePath::computeResult() { - if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() && - rhs_.isVariable()) { - AD_THROW( - "This query might have to evalute the empty path, which is currently " - "not supported"); - } - shared_ptr subRes = subtree_->getResult(); - - IdTable idTable{allocator()}; - - idTable.setNumColumns(getResultWidth()); - - size_t subWidth = subRes->idTable().numColumns(); - - auto computeForOneSide = [this, &idTable, subRes, subWidth]( - auto& boundSide, - auto& otherSide) -> ResultTable { - shared_ptr sideRes = - boundSide.treeAndCol_.value().first->getResult(); - size_t sideWidth = sideRes->idTable().numColumns(); - - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), - &TransitivePath::computeTransitivePathBound, this, &idTable, - subRes->idTable(), boundSide, otherSide, - sideRes->idTable()); - - return {std::move(idTable), resultSortedOn(), - ResultTable::getMergedLocalVocab(*sideRes, *subRes)}; - }; - - if (lhs_.isBoundVariable()) { - return computeForOneSide(lhs_, rhs_); - } else if (rhs_.isBoundVariable()) { - return computeForOneSide(rhs_, lhs_); - // Right side is an Id - } else if (!rhs_.isVariable()) { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePath::computeTransitivePath, this, &idTable, - subRes->idTable(), rhs_, lhs_); - // No side is a bound variable, the right side is an unbound variable - // and the left side is either an unbound Variable or an ID. - } else { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePath::computeTransitivePath, this, &idTable, - subRes->idTable(), lhs_, rhs_); - } - - // NOTE: The only place, where the input to a transitive path operation is not - // an index scan (which has an empty local vocabulary by default) is the - // `LocalVocabTest`. But it doesn't harm to propagate the local vocab here - // either. - return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; -} - -// _____________________________________________________________________________ -std::shared_ptr TransitivePath::bindLeftSide( - std::shared_ptr leftop, size_t inputCol) const { - return bindLeftOrRightSide(std::move(leftop), inputCol, true); -} - -// _____________________________________________________________________________ -std::shared_ptr TransitivePath::bindRightSide( - std::shared_ptr rightop, size_t inputCol) const { - return bindLeftOrRightSide(std::move(rightop), inputCol, false); -} - -// _____________________________________________________________________________ -std::shared_ptr TransitivePath::bindLeftOrRightSide( - std::shared_ptr leftOrRightOp, size_t inputCol, - bool isLeft) const { - // Enforce required sorting of `leftOrRightOp`. - leftOrRightOp = QueryExecutionTree::createSortedTree(std::move(leftOrRightOp), - {inputCol}); - // Create a copy of this. - // - // NOTE: The RHS used to be `std::make_shared()`, which is - // wrong because it first calls the copy constructor of the base class - // `Operation`, which would then ignore the changes in `variableColumnMap_` - // made below (see `Operation::getInternallyVisibleVariableColumns` and - // `Operation::getExternallyVariableColumns`). - std::shared_ptr p = std::make_shared( - getExecutionContext(), subtree_, lhs_, rhs_, minDist_, maxDist_); - if (isLeft) { - p->lhs_.treeAndCol_ = {leftOrRightOp, inputCol}; - } else { - p->rhs_.treeAndCol_ = {leftOrRightOp, inputCol}; - } - - // Note: The `variable` in the following structured binding is `const`, even - // if we bind by value. We deliberately make one unnecessary copy of the - // `variable` to keep the code simpler. - for (auto [variable, columnIndexWithType] : - leftOrRightOp->getVariableColumns()) { - ColumnIndex columnIndex = columnIndexWithType.columnIndex_; - if (columnIndex == inputCol) { - continue; - } - - columnIndexWithType.columnIndex_ += columnIndex > inputCol ? 1 : 2; - - p->variableColumns_[variable] = columnIndexWithType; - p->resultWidth_++; - } - return p; -} - -// _____________________________________________________________________________ -bool TransitivePath::isBoundOrId() const { - return lhs_.isBoundVariable() || rhs_.isBoundVariable() || - !lhs_.isVariable() || !rhs_.isVariable(); -} - -// _____________________________________________________________________________ -TransitivePath::Map TransitivePath::transitiveHull( - const Map& edges, const std::vector& startNodes, - std::optional target) const { - using MapIt = Map::const_iterator; - // For every node do a dfs on the graph - Map hull{allocator()}; - - // Stores nodes we already have a path to. This avoids cycles. - ad_utility::HashSetWithMemoryLimit marks{ - getExecutionContext()->getAllocator()}; - - // The stack used to store the dfs' progress - std::vector positions; - - // Used to store all edges leading away from a node for every level. - // Reduces access to the hashmap, and is safe as the map will not - // be modified after this point. - std::vector edgeCache; - - for (Id currentStartNode : startNodes) { - if (hull.contains(currentStartNode)) { - // We have already computed the hull for this node - continue; - } - - // Reset for this iteration - marks.clear(); - - MapIt rootEdges = edges.find(currentStartNode); - if (rootEdges != edges.end()) { - positions.push_back(rootEdges->second.begin()); - edgeCache.push_back(&rootEdges->second); - } - if (minDist_ == 0 && - (!target.has_value() || currentStartNode == target.value())) { - insertIntoMap(hull, currentStartNode, currentStartNode); - } - - // While we have not found the entire transitive hull and have not reached - // the max step limit - while (!positions.empty()) { - checkCancellation(); - size_t stackIndex = positions.size() - 1; - // Process the next child of the node at the top of the stack - Set::const_iterator& pos = positions[stackIndex]; - const Set* nodeEdges = edgeCache.back(); - - if (pos == nodeEdges->end()) { - // We finished processing this node - positions.pop_back(); - edgeCache.pop_back(); - continue; - } - - Id child = *pos; - ++pos; - size_t childDepth = positions.size(); - if (childDepth <= maxDist_ && marks.count(child) == 0) { - // process the child - if (childDepth >= minDist_) { - marks.insert(child); - if (!target.has_value() || child == target.value()) { - insertIntoMap(hull, currentStartNode, child); - } - } - // Add the child to the stack - MapIt it = edges.find(child); - if (it != edges.end()) { - positions.push_back(it->second.begin()); - edgeCache.push_back(&it->second); - } - } - } - } - return hull; -} - -// _____________________________________________________________________________ -template -void TransitivePath::fillTableWithHull(IdTableStatic& table, - const Map& hull, std::vector& nodes, - size_t startSideCol, - size_t targetSideCol, - const IdTable& startSideTable, - size_t skipCol) { - IdTableView startView = - startSideTable.asStaticView(); - - size_t rowIndex = 0; - for (size_t i = 0; i < nodes.size(); i++) { - Id node = nodes[i]; - auto it = hull.find(node); - if (it == hull.end()) { - continue; - } - - for (Id otherNode : it->second) { - table.emplace_back(); - table(rowIndex, startSideCol) = node; - table(rowIndex, targetSideCol) = otherNode; - - TransitivePath::copyColumns(startView, table, i, - rowIndex, skipCol); - - rowIndex++; - } - } -} - -// _____________________________________________________________________________ -template -void TransitivePath::fillTableWithHull(IdTableStatic& table, - const Map& hull, size_t startSideCol, - size_t targetSideCol) { - size_t rowIndex = 0; - for (auto const& [node, linkedNodes] : hull) { - for (Id linkedNode : linkedNodes) { - table.emplace_back(); - table(rowIndex, startSideCol) = node; - table(rowIndex, targetSideCol) = linkedNode; - - rowIndex++; - } - } -} - -// _____________________________________________________________________________ -template -std::pair> -TransitivePath::setupMapAndNodes(const IdTable& sub, - const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, - const IdTable& startSideTable) const { - std::vector nodes; - Map edges = setupEdgesMap(sub, startSide, targetSide); - - // Bound -> var|id - std::span startNodes = setupNodes( - startSideTable, startSide.treeAndCol_.value().second); - nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); - - return {std::move(edges), std::move(nodes)}; -} - -// _____________________________________________________________________________ -template -std::pair> -TransitivePath::setupMapAndNodes(const IdTable& sub, - const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const { - std::vector nodes; - Map edges = setupEdgesMap(sub, startSide, targetSide); - - // id -> var|id - if (!startSide.isVariable()) { - nodes.push_back(std::get(startSide.value_)); - // var -> var - } else { - std::span startNodes = - setupNodes(sub, startSide.subCol_); - nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); - if (minDist_ == 0) { - std::span targetNodes = - setupNodes(sub, targetSide.subCol_); - nodes.insert(nodes.end(), targetNodes.begin(), targetNodes.end()); - } - } - - return {std::move(edges), std::move(nodes)}; -} - -// _____________________________________________________________________________ -template -TransitivePath::Map TransitivePath::setupEdgesMap( - const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const { - const IdTableView sub = dynSub.asStaticView(); - Map edges{allocator()}; - decltype(auto) startCol = sub.getColumn(startSide.subCol_); - decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); - - for (size_t i = 0; i < sub.size(); i++) { - checkCancellation(); - insertIntoMap(edges, startCol[i], targetCol[i]); - } - return edges; -} - -// _____________________________________________________________________________ -template -std::span TransitivePath::setupNodes(const IdTable& table, - size_t col) { - return table.getColumn(col); -} - -// _____________________________________________________________________________ -template -void TransitivePath::copyColumns(const IdTableView& inputTable, - IdTableStatic& outputTable, - size_t inputRow, size_t outputRow, - size_t skipCol) { - size_t inCol = 0; - size_t outCol = 2; - while (inCol < inputTable.numColumns() && outCol < outputTable.numColumns()) { - if (skipCol == inCol) { - inCol++; - continue; - } - - outputTable(outputRow, outCol) = inputTable(inputRow, inCol); - inCol++; - outCol++; - } -} - -// _____________________________________________________________________________ -void TransitivePath::insertIntoMap(Map& map, Id key, Id value) const { - auto [it, success] = map.try_emplace(key, allocator()); - it->second.insert(value); -} diff --git a/src/engine/TransitivePath.h b/src/engine/TransitivePath.h deleted file mode 100644 index ebae62d3fa..0000000000 --- a/src/engine/TransitivePath.h +++ /dev/null @@ -1,320 +0,0 @@ -// Copyright 2019, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) - -#pragma once - -#include - -#include "engine/Operation.h" -#include "engine/QueryExecutionTree.h" -#include "engine/idTable/IdTable.h" - -using TreeAndCol = std::pair, size_t>; -struct TransitivePathSide { - // treeAndCol contains the QueryExecutionTree of this side and the column - // where the Ids of this side are located. This member only has a value if - // this side was bound. - std::optional treeAndCol_; - // Column of the sub table where the Ids of this side are located - size_t subCol_; - std::variant value_; - // The column in the ouput table where this side Ids are written to. - // This member is set by the TransitivePath class - size_t outputCol_ = 0; - - bool isVariable() const { return std::holds_alternative(value_); }; - - bool isBoundVariable() const { return treeAndCol_.has_value(); }; - - std::string getCacheKey() const { - std::ostringstream os; - if (!isVariable()) { - os << "Id: " << std::get(value_); - } - - os << ", subColumn: " << subCol_ << "to " << outputCol_; - - if (treeAndCol_.has_value()) { - const auto& [tree, col] = treeAndCol_.value(); - os << ", Subtree:\n"; - os << tree->getCacheKey() << "with join column " << col << "\n"; - } - return std::move(os).str(); - } - - bool isSortedOnInputCol() const { - if (!treeAndCol_.has_value()) { - return false; - } - - auto [tree, col] = treeAndCol_.value(); - const std::vector& sortedOn = - tree->getRootOperation()->getResultSortedOn(); - // TODO use std::ranges::starts_with - return (!sortedOn.empty() && sortedOn[0] == col); - } -}; - -class TransitivePath : public Operation { - // We deliberately use the `std::` variants of a hash set and hash map because - // `absl`s types are not exception safe. - constexpr static auto hash = [](Id id) { - return std::hash{}(id.getBits()); - }; - using Set = std::unordered_set, - ad_utility::AllocatorWithLimit>; - using Map = std::unordered_map< - Id, Set, decltype(hash), std::equal_to, - ad_utility::AllocatorWithLimit>>; - - std::shared_ptr subtree_; - TransitivePathSide lhs_; - TransitivePathSide rhs_; - size_t resultWidth_ = 2; - size_t minDist_; - size_t maxDist_; - VariableToColumnMap variableColumns_; - - public: - TransitivePath(QueryExecutionContext* qec, - std::shared_ptr child, - TransitivePathSide leftSide, TransitivePathSide rightSide, - size_t minDist, size_t maxDist); - - /** - * Returns a new TransitivePath operation that uses the fact that leftop - * generates all possible values for the left side of the paths. If the - * results of leftop is smaller than all possible values this will result in a - * faster transitive path operation (as the transitive paths has to be - * computed for fewer elements). - */ - std::shared_ptr bindLeftSide( - std::shared_ptr leftop, size_t inputCol) const; - - /** - * Returns a new TransitivePath operation that uses the fact that rightop - * generates all possible values for the right side of the paths. If the - * results of rightop is smaller than all possible values this will result in - * a faster transitive path operation (as the transitive paths has to be - * computed for fewer elements). - */ - std::shared_ptr bindRightSide( - std::shared_ptr rightop, size_t inputCol) const; - - bool isBoundOrId() const; - - /** - * Getters, mainly necessary for testing - */ - size_t getMinDist() const { return minDist_; } - size_t getMaxDist() const { return maxDist_; } - const TransitivePathSide& getLeft() const { return lhs_; } - const TransitivePathSide& getRight() const { return rhs_; } - - protected: - virtual std::string getCacheKeyImpl() const override; - - public: - virtual std::string getDescriptor() const override; - - virtual size_t getResultWidth() const override; - - virtual vector resultSortedOn() const override; - - virtual void setTextLimit(size_t limit) override; - - virtual bool knownEmptyResult() override; - - virtual float getMultiplicity(size_t col) override; - - private: - uint64_t getSizeEstimateBeforeLimit() override; - - public: - virtual size_t getCostEstimate() override; - - vector getChildren() override { - std::vector res; - auto addChildren = [](std::vector& res, - TransitivePathSide side) { - if (side.treeAndCol_.has_value()) { - res.push_back(side.treeAndCol_.value().first.get()); - } - }; - addChildren(res, lhs_); - addChildren(res, rhs_); - res.push_back(subtree_.get()); - return res; - } - - /** - * @brief Compute the transitive hull with a bound side. - * This function is called when the startSide is bound and - * it is a variable. The other IdTable contains the result - * of the start side and will be used to get the start nodes. - * - * @tparam RES_WIDTH Number of columns of the result table - * @tparam SUB_WIDTH Number of columns of the sub table - * @tparam SIDE_WIDTH Number of columns of the - * @param res The result table which will be filled in-place - * @param sub The IdTable for the sub result - * @param startSide The start side for the transitive hull - * @param targetSide The target side for the transitive hull - * @param startSideTable The IdTable of the startSide - */ - template - void computeTransitivePathBound(IdTable* res, const IdTable& sub, - const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, - const IdTable& startSideTable) const; - - /** - * @brief Compute the transitive hull. - * This function is called when no side is bound (or an id). - * - * @tparam RES_WIDTH Number of columns of the result table - * @tparam SUB_WIDTH Number of columns of the sub table - * @param res The result table which will be filled in-place - * @param sub The IdTable for the sub result - * @param startSide The start side for the transitive hull - * @param targetSide The target side for the transitive hull - */ - template - void computeTransitivePath(IdTable* res, const IdTable& sub, - const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const; - - private: - /** - * @brief Compute the result for this TransitivePath operation - * This function chooses the start and target side for the transitive - * hull computation. This choice of the start side has a large impact - * on the time it takes to compute the hull. The set of nodes on the - * start side should be as small as possible. - * - * @return ResultTable The result of the TransitivePath operation - */ - virtual ResultTable computeResult() override; - - VariableToColumnMap computeVariableToColumnMap() const override; - - // The internal implementation of `bindLeftSide` and `bindRightSide` which - // share a lot of code. - std::shared_ptr bindLeftOrRightSide( - std::shared_ptr leftOrRightOp, size_t inputCol, - bool isLeft) const; - - /** - * @brief Compute the transitive hull starting at the given nodes, - * using the given Map. - * - * @param edges Adjacency lists, mapping Ids (nodes) to their connected - * Ids. - * @param nodes A list of Ids. These Ids are used as starting points for the - * transitive hull. Thus, this parameter guides the performance of this - * algorithm. - * @param target Optional target Id. If supplied, only paths which end - * in this Id are added to the hull. - * @return Map Maps each Id to its connected Ids in the transitive hull - */ - Map transitiveHull(const Map& edges, const std::vector& startNodes, - std::optional target) const; - - /** - * @brief Fill the given table with the transitive hull and use the - * startSideTable to fill in the rest of the columns. - * This function is called if the start side is bound and a variable. - * - * @tparam WIDTH The number of columns of the result table. - * @tparam START_WIDTH The number of columns of the start table. - * @param table The result table which will be filled. - * @param hull The transitive hull. - * @param nodes The start nodes of the transitive hull. These need to be in - * the same order and amount as the starting side nodes in the startTable. - * @param startSideCol The column of the result table for the startSide of the - * hull - * @param targetSideCol The column of the result table for the targetSide of - * the hull - * @param startSideTable An IdTable that holds other results. The other - * results will be transferred to the new result table. - * @param skipCol This column contains the Ids of the start side in the - * startSideTable and will be skipped. - */ - template - static void fillTableWithHull(IdTableStatic& table, const Map& hull, - std::vector& nodes, size_t startSideCol, - size_t targetSideCol, - const IdTable& startSideTable, size_t skipCol); - - /** - * @brief Fill the given table with the transitive hull. - * This function is called if the sides are unbound or ids. - * - * @tparam WIDTH The number of columns of the result table. - * @param table The result table which will be filled. - * @param hull The transitive hull. - * @param startSideCol The column of the result table for the startSide of the - * hull - * @param targetSideCol The column of the result table for the targetSide of - * the hull - */ - template - static void fillTableWithHull(IdTableStatic& table, const Map& hull, - size_t startSideCol, size_t targetSideCol); - - /** - * @brief Prepare a Map and a nodes vector for the transitive hull - * computation. - * - * @tparam SUB_WIDTH Number of columns of the sub table - * @tparam SIDE_WIDTH Number of columns of the startSideTable - * @param sub The sub table result - * @param startSide The TransitivePathSide where the edges start - * @param targetSide The TransitivePathSide where the edges end - * @param startSideTable An IdTable containing the Ids for the startSide - * @return std::pair> A Map and Id vector (nodes) for the - * transitive hull computation - */ - template - std::pair> setupMapAndNodes( - const IdTable& sub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, - const IdTable& startSideTable) const; - - /** - * @brief Prepare a Map and a nodes vector for the transitive hull - * computation. - * - * @tparam SUB_WIDTH Number of columns of the sub table - * @param sub The sub table result - * @param startSide The TransitivePathSide where the edges start - * @param targetSide The TransitivePathSide where the edges end - * @return std::pair> A Map and Id vector (nodes) for the - * transitive hull computation - */ - template - std::pair> setupMapAndNodes( - const IdTable& sub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const; - - // initialize the map from the subresult - template - Map setupEdgesMap(const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const; - - // initialize a vector for the starting nodes (Ids) - template - static std::span setupNodes(const IdTable& table, size_t col); - - // Copy the columns from the input table to the output table - template - static void copyColumns(const IdTableView& inputTable, - IdTableStatic& outputTable, - size_t inputRow, size_t outputRow, size_t skipCol); - - // A small helper function: Insert the `value` to the set at `map[key]`. - // As the sets all have an allocator with memory limit, this construction is a - // little bit more involved, so this can be a separate helper function. - void insertIntoMap(Map& map, Id key, Id value) const; -}; diff --git a/src/engine/TransitivePathBase.cpp b/src/engine/TransitivePathBase.cpp new file mode 100644 index 0000000000..df3a0ac5d0 --- /dev/null +++ b/src/engine/TransitivePathBase.cpp @@ -0,0 +1,410 @@ +// Copyright 2019, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) +// Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#include "TransitivePathBase.h" + +#include +#include +#include +#include + +#include "engine/CallFixedSize.h" +#include "engine/ExportQueryExecutionTrees.h" +#include "engine/IndexScan.h" +#include "engine/TransitivePathBinSearch.h" +#include "engine/TransitivePathHashMap.h" +#include "global/RuntimeParameters.h" +#include "util/Exception.h" + +// _____________________________________________________________________________ +TransitivePathBase::TransitivePathBase( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist) + : Operation(qec), + subtree_(child + ? QueryExecutionTree::createSortedTree(std::move(child), {0}) + : nullptr), + lhs_(std::move(leftSide)), + rhs_(std::move(rightSide)), + minDist_(minDist), + maxDist_(maxDist) { + AD_CORRECTNESS_CHECK(qec != nullptr); + if (lhs_.isVariable()) { + variableColumns_[std::get(lhs_.value_)] = + makeAlwaysDefinedColumn(0); + } + if (rhs_.isVariable()) { + variableColumns_[std::get(rhs_.value_)] = + makeAlwaysDefinedColumn(1); + } + + lhs_.outputCol_ = 0; + rhs_.outputCol_ = 1; +} + +// _____________________________________________________________________________ +TransitivePathBase::~TransitivePathBase() = default; + +// _____________________________________________________________________________ +std::pair +TransitivePathBase::decideDirection() { + if (lhs_.isBoundVariable()) { + LOG(DEBUG) << "Computing TransitivePath left to right" << std::endl; + return {lhs_, rhs_}; + } else if (rhs_.isBoundVariable() || !rhs_.isVariable()) { + LOG(DEBUG) << "Computing TransitivePath right to left" << std::endl; + return {rhs_, lhs_}; + } + LOG(DEBUG) << "Computing TransitivePath left to right" << std::endl; + return {lhs_, rhs_}; +} + +// _____________________________________________________________________________ +void TransitivePathBase::fillTableWithHull(IdTable& table, const Map& hull, + std::vector& nodes, + size_t startSideCol, + size_t targetSideCol, + const IdTable& startSideTable, + size_t skipCol) const { + CALL_FIXED_SIZE((std::array{table.numColumns(), startSideTable.numColumns()}), + &TransitivePathBase::fillTableWithHullImpl, this, table, hull, + nodes, startSideCol, targetSideCol, startSideTable, skipCol); +} + +// _____________________________________________________________________________ +template +void TransitivePathBase::fillTableWithHullImpl( + IdTable& tableDyn, const Map& hull, std::vector& nodes, + size_t startSideCol, size_t targetSideCol, const IdTable& startSideTable, + size_t skipCol) const { + IdTableStatic table = std::move(tableDyn).toStatic(); + IdTableView startView = + startSideTable.asStaticView(); + + size_t rowIndex = 0; + for (size_t i = 0; i < nodes.size(); i++) { + Id node = nodes[i]; + auto it = hull.find(node); + if (it == hull.end()) { + continue; + } + + for (Id otherNode : it->second) { + table.emplace_back(); + table(rowIndex, startSideCol) = node; + table(rowIndex, targetSideCol) = otherNode; + + copyColumns(startView, table, i, rowIndex, skipCol); + + rowIndex++; + } + } + + tableDyn = std::move(table).toDynamic(); +} + +// _____________________________________________________________________________ +void TransitivePathBase::fillTableWithHull(IdTable& table, const Map& hull, + size_t startSideCol, + size_t targetSideCol) const { + CALL_FIXED_SIZE((std::array{table.numColumns()}), + &TransitivePathBase::fillTableWithHullImpl, this, table, hull, + startSideCol, targetSideCol); +} + +// _____________________________________________________________________________ +template +void TransitivePathBase::fillTableWithHullImpl(IdTable& tableDyn, + const Map& hull, + size_t startSideCol, + size_t targetSideCol) const { + IdTableStatic table = std::move(tableDyn).toStatic(); + size_t rowIndex = 0; + for (auto const& [node, linkedNodes] : hull) { + for (Id linkedNode : linkedNodes) { + table.emplace_back(); + table(rowIndex, startSideCol) = node; + table(rowIndex, targetSideCol) = linkedNode; + + rowIndex++; + } + } + tableDyn = std::move(table).toDynamic(); +} + +// _____________________________________________________________________________ +std::string TransitivePathBase::getCacheKeyImpl() const { + std::ostringstream os; + os << " minDist " << minDist_ << " maxDist " << maxDist_ << "\n"; + + os << "Left side:\n"; + os << lhs_.getCacheKey(); + + os << "Right side:\n"; + os << rhs_.getCacheKey(); + + AD_CORRECTNESS_CHECK(subtree_); + os << "Subtree:\n" << subtree_->getCacheKey() << '\n'; + + return std::move(os).str(); +} + +// _____________________________________________________________________________ +std::string TransitivePathBase::getDescriptor() const { + std::ostringstream os; + os << "TransitivePath "; + // If not full transitive hull, show interval as [min, max]. + if (minDist_ > 1 || maxDist_ < std::numeric_limits::max()) { + os << "[" << minDist_ << ", " << maxDist_ << "] "; + } + auto getName = [this](ValueId id) { + auto optStringAndType = + ExportQueryExecutionTrees::idToStringAndType(getIndex(), id, {}); + if (optStringAndType.has_value()) { + return optStringAndType.value().first; + } else { + return absl::StrCat("#", id.getBits()); + } + }; + // Left variable or entity name. + if (lhs_.isVariable()) { + os << std::get(lhs_.value_).name(); + } else { + os << getName(std::get(lhs_.value_)); + } + // The predicate. + auto scanOperation = + std::dynamic_pointer_cast(subtree_->getRootOperation()); + if (scanOperation != nullptr) { + os << " " << scanOperation->getPredicate() << " "; + } else { + // Escaped the question marks to avoid a warning about ignored trigraphs. + os << R"( )"; + } + // Right variable or entity name. + if (rhs_.isVariable()) { + os << std::get(rhs_.value_).name(); + } else { + os << getName(std::get(rhs_.value_)); + } + return std::move(os).str(); +} + +// _____________________________________________________________________________ +size_t TransitivePathBase::getResultWidth() const { return resultWidth_; } + +// _____________________________________________________________________________ +vector TransitivePathBase::resultSortedOn() const { + if (lhs_.isSortedOnInputCol()) { + return {0}; + } + if (rhs_.isSortedOnInputCol()) { + return {1}; + } + + return {}; +} + +// _____________________________________________________________________________ +VariableToColumnMap TransitivePathBase::computeVariableToColumnMap() const { + return variableColumns_; +} + +// _____________________________________________________________________________ +void TransitivePathBase::setTextLimit(size_t limit) { + for (auto child : getChildren()) { + child->setTextLimit(limit); + } +} + +// _____________________________________________________________________________ +bool TransitivePathBase::knownEmptyResult() { + return subtree_->knownEmptyResult(); +} + +// _____________________________________________________________________________ +float TransitivePathBase::getMultiplicity(size_t col) { + (void)col; + // The multiplicities are not known. + return 1; +} + +// _____________________________________________________________________________ +uint64_t TransitivePathBase::getSizeEstimateBeforeLimit() { + if (std::holds_alternative(lhs_.value_) || + std::holds_alternative(rhs_.value_)) { + // If the subject or object is fixed, assume that the number of matching + // triples is 1000. This will usually be an overestimate, but it will do the + // job of avoiding query plans that first generate large intermediate + // results and only then merge them with a triple such as this. In the + // lhs_.isVar && rhs_.isVar case below, we assume a worst-case blowup of + // 10000; see the comment there. + return 1000; + } + if (lhs_.treeAndCol_.has_value()) { + return lhs_.treeAndCol_.value().first->getSizeEstimate(); + } + if (rhs_.treeAndCol_.has_value()) { + return rhs_.treeAndCol_.value().first->getSizeEstimate(); + } + // Set costs to something very large, so that we never compute the complete + // transitive hull (unless the variables on both sides are not bound in any + // other way, so that the only possible query plan is to compute the complete + // transitive hull). + // + // NOTE: _subtree->getSizeEstimateBeforeLimit() is the number of triples of + // the predicate, for which the transitive hull operator (+) is specified. On + // Wikidata, the predicate with the largest blowup when taking the + // transitive hull is wdt:P2789 (connects with). The blowup is then from 90K + // (without +) to 110M (with +), so about 1000 times larger. + AD_CORRECTNESS_CHECK(lhs_.isVariable() && rhs_.isVariable()); + return subtree_->getSizeEstimate() * 10000; +} + +// _____________________________________________________________________________ +size_t TransitivePathBase::getCostEstimate() { + // We assume that the cost of computing the transitive path is proportional to + // the result size. + auto costEstimate = getSizeEstimateBeforeLimit(); + // Add the cost for the index scan of the predicate involved. + for (auto* ptr : getChildren()) { + if (ptr) { + costEstimate += ptr->getCostEstimate(); + } + } + return costEstimate; +} + +// _____________________________________________________________________________ +std::shared_ptr TransitivePathBase::makeTransitivePath( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist) { + bool useBinSearch = + RuntimeParameters().get<"use-binsearch-transitive-path">(); + return makeTransitivePath(qec, std::move(child), std::move(leftSide), + std::move(rightSide), minDist, maxDist, + useBinSearch); +} + +// _____________________________________________________________________________ +std::shared_ptr TransitivePathBase::makeTransitivePath( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist, bool useBinSearch) { + if (useBinSearch) { + return std::make_shared( + qec, std::move(child), std::move(leftSide), std::move(rightSide), + minDist, maxDist); + } else { + return std::make_shared( + qec, std::move(child), std::move(leftSide), std::move(rightSide), + minDist, maxDist); + } +} + +// _____________________________________________________________________________ +vector TransitivePathBase::getChildren() { + std::vector res; + auto addChildren = [](std::vector& res, + TransitivePathSide side) { + if (side.treeAndCol_.has_value()) { + res.push_back(side.treeAndCol_.value().first.get()); + } + }; + addChildren(res, lhs_); + addChildren(res, rhs_); + res.push_back(subtree_.get()); + return res; +} + +// _____________________________________________________________________________ +std::shared_ptr TransitivePathBase::bindLeftSide( + std::shared_ptr leftop, size_t inputCol) const { + return bindLeftOrRightSide(std::move(leftop), inputCol, true); +} + +// _____________________________________________________________________________ +std::shared_ptr TransitivePathBase::bindRightSide( + std::shared_ptr rightop, size_t inputCol) const { + return bindLeftOrRightSide(std::move(rightop), inputCol, false); +} + +// _____________________________________________________________________________ +std::shared_ptr TransitivePathBase::bindLeftOrRightSide( + std::shared_ptr leftOrRightOp, size_t inputCol, + bool isLeft) const { + // Enforce required sorting of `leftOrRightOp`. + leftOrRightOp = QueryExecutionTree::createSortedTree(std::move(leftOrRightOp), + {inputCol}); + // Create a copy of this. + // + // NOTE: The RHS used to be `std::make_shared()`, which is + // wrong because it first calls the copy constructor of the base class + // `Operation`, which would then ignore the changes in `variableColumnMap_` + // made below (see `Operation::getInternallyVisibleVariableColumns` and + // `Operation::getExternallyVariableColumns`). + auto lhs = lhs_; + auto rhs = rhs_; + if (isLeft) { + lhs.treeAndCol_ = {leftOrRightOp, inputCol}; + } else { + rhs.treeAndCol_ = {leftOrRightOp, inputCol}; + } + std::shared_ptr p = + TransitivePathBase::makeTransitivePath(getExecutionContext(), subtree_, + lhs, rhs, minDist_, maxDist_); + + // Note: The `variable` in the following structured binding is `const`, even + // if we bind by value. We deliberately make one unnecessary copy of the + // `variable` to keep the code simpler. + for (auto [variable, columnIndexWithType] : + leftOrRightOp->getVariableColumns()) { + ColumnIndex columnIndex = columnIndexWithType.columnIndex_; + if (columnIndex == inputCol) { + continue; + } + + columnIndexWithType.columnIndex_ += columnIndex > inputCol ? 1 : 2; + + AD_CORRECTNESS_CHECK(!p->variableColumns_.contains(variable)); + p->variableColumns_[variable] = columnIndexWithType; + p->resultWidth_++; + } + return p; +} + +// _____________________________________________________________________________ +bool TransitivePathBase::isBoundOrId() const { + return lhs_.isBoundVariable() || rhs_.isBoundVariable() || + !lhs_.isVariable() || !rhs_.isVariable(); +} + +// _____________________________________________________________________________ +template +void TransitivePathBase::copyColumns(const IdTableView& inputTable, + IdTableStatic& outputTable, + size_t inputRow, size_t outputRow, + size_t skipCol) const { + size_t inCol = 0; + size_t outCol = 2; + while (inCol < inputTable.numColumns() && outCol < outputTable.numColumns()) { + if (skipCol == inCol) { + inCol++; + continue; + } + + outputTable(outputRow, outCol) = inputTable(inputRow, inCol); + inCol++; + outCol++; + } +} + +// _____________________________________________________________________________ +void TransitivePathBase::insertIntoMap(Map& map, Id key, Id value) const { + auto [it, success] = map.try_emplace(key, allocator()); + it->second.insert(value); +} diff --git a/src/engine/TransitivePathBase.h b/src/engine/TransitivePathBase.h new file mode 100644 index 0000000000..d107a54709 --- /dev/null +++ b/src/engine/TransitivePathBase.h @@ -0,0 +1,272 @@ +// Copyright 2019, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) +// Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#pragma once + +#include +#include + +#include "engine/Operation.h" +#include "engine/QueryExecutionTree.h" + +using TreeAndCol = std::pair, size_t>; +struct TransitivePathSide { + // treeAndCol contains the QueryExecutionTree of this side and the column + // where the Ids of this side are located. This member only has a value if + // this side was bound. + std::optional treeAndCol_; + // Column of the sub table where the Ids of this side are located + size_t subCol_; + std::variant value_; + // The column in the ouput table where this side Ids are written to. + // This member is set by the TransitivePath class + size_t outputCol_ = 0; + + bool isVariable() const { return std::holds_alternative(value_); }; + + bool isBoundVariable() const { return treeAndCol_.has_value(); }; + + std::string getCacheKey() const { + std::ostringstream os; + if (!isVariable()) { + os << "Id: " << std::get(value_); + } + + os << ", subColumn: " << subCol_ << "to " << outputCol_; + + if (treeAndCol_.has_value()) { + const auto& [tree, col] = treeAndCol_.value(); + os << ", Subtree:\n"; + os << tree->getCacheKey() << "with join column " << col << "\n"; + } + return std::move(os).str(); + } + + bool isSortedOnInputCol() const { + if (!treeAndCol_.has_value()) { + return false; + } + + auto [tree, col] = treeAndCol_.value(); + const std::vector& sortedOn = + tree->getRootOperation()->getResultSortedOn(); + // TODO use std::ranges::starts_with + return (!sortedOn.empty() && sortedOn[0] == col); + } +}; + +// We deliberately use the `std::` variants of a hash set and hash map because +// `absl`s types are not exception safe. +struct HashId { + auto operator()(Id id) const { return std::hash{}(id.getBits()); } +}; + +using Set = std::unordered_set, + ad_utility::AllocatorWithLimit>; +using Map = std::unordered_map< + Id, Set, HashId, std::equal_to, + ad_utility::AllocatorWithLimit>>; + +/** + * @class TransitivePathBase + * @brief A common base class for different implementations of the Transitive + * Path operation. To create an actual object use the static factory function + * `makeStaticPath`. + * + */ +class TransitivePathBase : public Operation { + protected: + std::shared_ptr subtree_; + TransitivePathSide lhs_; + TransitivePathSide rhs_; + size_t resultWidth_ = 2; + size_t minDist_; + size_t maxDist_; + VariableToColumnMap variableColumns_; + + public: + TransitivePathBase(QueryExecutionContext* qec, + std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, + size_t minDist, size_t maxDist); + + virtual ~TransitivePathBase() = 0; + + /** + * Returns a new TransitivePath operation that uses the fact that leftop + * generates all possible values for the left side of the paths. If the + * results of leftop is smaller than all possible values this will result in a + * faster transitive path operation (as the transitive paths has to be + * computed for fewer elements). + */ + std::shared_ptr bindLeftSide( + std::shared_ptr leftop, size_t inputCol) const; + + /** + * Returns a new TransitivePath operation that uses the fact that rightop + * generates all possible values for the right side of the paths. If the + * results of rightop is smaller than all possible values this will result in + * a faster transitive path operation (as the transitive paths has to be + * computed for fewer elements). + */ + std::shared_ptr bindRightSide( + std::shared_ptr rightop, size_t inputCol) const; + + bool isBoundOrId() const; + + /** + * Getters, mainly necessary for testing + */ + size_t getMinDist() const { return minDist_; } + size_t getMaxDist() const { return maxDist_; } + const TransitivePathSide& getLeft() const { return lhs_; } + const TransitivePathSide& getRight() const { return rhs_; } + + protected: + std::string getCacheKeyImpl() const override; + + /** + * @brief Decide on which transitive path side the hull computation should + * start and where it should end. The start and target side are chosen by + * the following criteria: + * + * 1. If a side is bound, then this side will be the start side. + * 2. If a side is an id, then this side will be the start side. + * 3. If both sides are variables, the left side is chosen as start + * (arbitrarily). + * + * @return std::pair The first entry + * of the pair is the start side, the second entry is the target side. + */ + std::pair decideDirection(); + + /** + * @brief Fill the given table with the transitive hull and use the + * startSideTable to fill in the rest of the columns. + * This function is called if the start side is bound and a variable. + * + * @param table The result table which will be filled. + * @param hull The transitive hull. + * @param nodes The start nodes of the transitive hull. These need to be in + * the same order and amount as the starting side nodes in the startTable. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull + * @param startSideTable An IdTable that holds other results. The other + * results will be transferred to the new result table. + * @param skipCol This column contains the Ids of the start side in the + * startSideTable and will be skipped. + */ + void fillTableWithHull(IdTable& table, const Map& hull, + std::vector& nodes, size_t startSideCol, + size_t targetSideCol, const IdTable& startSideTable, + size_t skipCol) const; + + /** + * @brief Fill the given table with the transitive hull. + * This function is called if the sides are unbound or ids. + * + * @param table The result table which will be filled. + * @param hull The transitive hull. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull + */ + void fillTableWithHull(IdTable& table, const Map& hull, size_t startSideCol, + size_t targetSideCol) const; + + // Copy the columns from the input table to the output table + template + void copyColumns(const IdTableView& inputTable, + IdTableStatic& outputTable, size_t inputRow, + size_t outputRow, size_t skipCol) const; + + // A small helper function: Insert the `value` to the set at `map[key]`. + // As the sets all have an allocator with memory limit, this construction is a + // little bit more involved, so this can be a separate helper function. + void insertIntoMap(Map& map, Id key, Id value) const; + + public: + std::string getDescriptor() const override; + + size_t getResultWidth() const override; + + vector resultSortedOn() const override; + + void setTextLimit(size_t limit) override; + + bool knownEmptyResult() override; + + float getMultiplicity(size_t col) override; + + private: + uint64_t getSizeEstimateBeforeLimit() override; + + template + void fillTableWithHullImpl(IdTable& table, const Map& hull, + std::vector& nodes, size_t startSideCol, + size_t targetSideCol, + const IdTable& startSideTable, + size_t skipCol) const; + + template + void fillTableWithHullImpl(IdTable& table, const Map& hull, + size_t startSideCol, size_t targetSideCol) const; + + public: + size_t getCostEstimate() override; + + /** + * @brief Make a concrete TransitivePath object using the given parameters. + * The concrete object will either be TransitivePathFallback or + * TransitivePathBinSearch, depending on the useBinSearch flag. + * + * @param qec QueryExecutionContext for the TransitivePath Operation + * @param child QueryExecutionTree for the subquery of the TransitivePath + * @param leftSide Settings for the left side of the TransitivePath + * @param rightSide Settings for the right side of the TransitivePath + * @param minDist Minimum distance a resulting path may have (distance = + * number of nodes) + * @param maxDist Maximum distance a resulting path may have (distance = + * number of nodes) + * @param useBinSearch If true, the returned object will be a + * TransitivePathBinSearch. Else it will be a TransitivePathFallback + */ + static std::shared_ptr makeTransitivePath( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist, bool useBinSearch); + + /** + * @brief Make a concrete TransitivePath object using the given parameters. + * The concrete object will either be TransitivePathFallback or + * TransitivePathBinSearch, depending on the runtime constant "use-binsearch". + * + * @param qec QueryExecutionContext for the TransitivePath Operation + * @param child QueryExecutionTree for the subquery of the TransitivePath + * @param leftSide Settings for the left side of the TransitivePath + * @param rightSide Settings for the right side of the TransitivePath + * @param minDist Minimum distance a resulting path may have (distance = + * number of nodes) + * @param maxDist Maximum distance a resulting path may have (distance = + * number of nodes) + */ + static std::shared_ptr makeTransitivePath( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist); + + vector getChildren() override; + + VariableToColumnMap computeVariableToColumnMap() const override; + + // The internal implementation of `bindLeftSide` and `bindRightSide` which + // share a lot of code. + std::shared_ptr bindLeftOrRightSide( + std::shared_ptr leftOrRightOp, size_t inputCol, + bool isLeft) const; +}; diff --git a/src/engine/TransitivePathBinSearch.cpp b/src/engine/TransitivePathBinSearch.cpp new file mode 100644 index 0000000000..f12df9829e --- /dev/null +++ b/src/engine/TransitivePathBinSearch.cpp @@ -0,0 +1,31 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#include "TransitivePathBinSearch.h" + +#include +#include + +#include "engine/TransitivePathBase.h" + +// _____________________________________________________________________________ +TransitivePathBinSearch::TransitivePathBinSearch( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist) + : TransitivePathImpl(qec, std::move(child), + std::move(leftSide), + std::move(rightSide), minDist, maxDist) { + auto [startSide, targetSide] = decideDirection(); + subtree_ = QueryExecutionTree::createSortedTree( + subtree_, {startSide.subCol_, targetSide.subCol_}); +} + +// _____________________________________________________________________________ +BinSearchMap TransitivePathBinSearch::setupEdgesMap( + const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + return BinSearchMap{dynSub.getColumn(startSide.subCol_), + dynSub.getColumn(targetSide.subCol_)}; +} diff --git a/src/engine/TransitivePathBinSearch.h b/src/engine/TransitivePathBinSearch.h new file mode 100644 index 0000000000..8e4501579b --- /dev/null +++ b/src/engine/TransitivePathBinSearch.h @@ -0,0 +1,77 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#pragma once + +#include +#include + +#include "engine/Operation.h" +#include "engine/QueryExecutionTree.h" +#include "engine/TransitivePathImpl.h" +#include "engine/idTable/IdTable.h" + +/** + * @class BinSearchMap + * @brief This struct represents a simple Binary Search Map. Given an Id, the + * BinSearchMap can return a list (i.e. span) of successor Ids. + * + * It is expected that the two input spans startIds_ and targetIds_ are sorted + * first by start id and then by target id. + * Example: + * + * startId | targetId + * ------------------ + * 1 | 1 + * 1 | 2 + * 2 | 4 + * 3 | 2 + * 3 | 4 + * 3 | 6 + * 5 | 2 + * 5 | 6 + * + */ +struct BinSearchMap { + std::span startIds_; + std::span targetIds_; + + /** + * @brief Return the successors for the given id. + * The successors are all target ids, where the corresponding start id is + * equal to the given id `node`. + * + * @param node The input id, which will be looked up in startIds_ + * @return A std::span, which consists of all targetIds_ where + * startIds_ == node. + */ + auto successors(const Id node) const { + auto range = std::ranges::equal_range(startIds_, node); + + auto startIndex = std::distance(startIds_.begin(), range.begin()); + + return targetIds_.subspan(startIndex, range.size()); + } +}; + +/** + * @class TransitivePathBinSearch + * @brief This class implements the transitive path operation. The + * implementation represents the graph as adjacency lists and uses binary search + * to find successors of given nodes. + */ +class TransitivePathBinSearch : public TransitivePathImpl { + public: + TransitivePathBinSearch(QueryExecutionContext* qec, + std::shared_ptr child, + TransitivePathSide leftSide, + TransitivePathSide rightSide, size_t minDist, + size_t maxDist); + + private: + // initialize the map from the subresult + BinSearchMap setupEdgesMap( + const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const override; +}; diff --git a/src/engine/TransitivePathHashMap.cpp b/src/engine/TransitivePathHashMap.cpp new file mode 100644 index 0000000000..a48bcc39a8 --- /dev/null +++ b/src/engine/TransitivePathHashMap.cpp @@ -0,0 +1,46 @@ +// Copyright 2019, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) +// Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#include "TransitivePathHashMap.h" + +#include + +#include "engine/CallFixedSize.h" +#include "engine/TransitivePathBase.h" + +// _____________________________________________________________________________ +TransitivePathHashMap::TransitivePathHashMap( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist) + : TransitivePathImpl( + qec, std::move(child), std::move(leftSide), std::move(rightSide), + minDist, maxDist) {} + +// _____________________________________________________________________________ +HashMapWrapper TransitivePathHashMap::setupEdgesMap( + const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + return CALL_FIXED_SIZE((std::array{dynSub.numColumns()}), + &TransitivePathHashMap::setupEdgesMap, this, dynSub, + startSide, targetSide); +} + +// _____________________________________________________________________________ +template +HashMapWrapper TransitivePathHashMap::setupEdgesMap( + const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + const IdTableView sub = dynSub.asStaticView(); + Map edges{allocator()}; + decltype(auto) startCol = sub.getColumn(startSide.subCol_); + decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); + + for (size_t i = 0; i < sub.size(); i++) { + checkCancellation(); + insertIntoMap(edges, startCol[i], targetCol[i]); + } + return HashMapWrapper{std::move(edges), allocator()}; +} diff --git a/src/engine/TransitivePathHashMap.h b/src/engine/TransitivePathHashMap.h new file mode 100644 index 0000000000..3bda2c2117 --- /dev/null +++ b/src/engine/TransitivePathHashMap.h @@ -0,0 +1,106 @@ +// Copyright 2019, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) +// Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#pragma once + +#include + +#include "engine/Operation.h" +#include "engine/QueryExecutionTree.h" +#include "engine/TransitivePathImpl.h" +#include "engine/idTable/IdTable.h" + +/** + * @class HashMapWrapper + * @brief A wrapper for the Map class from TransitivePathBase. This wrapper + * implements the successors function, which is used in transitiveHull function. + * + */ +struct HashMapWrapper { + Map map_; + Set emptySet_; + + HashMapWrapper(Map map, ad_utility::AllocatorWithLimit allocator) + : map_(std::move(map)), emptySet_(allocator){}; + + /** + * @brief Return the successors for the given Id. The successors are all ids, + * which are stored under the key 'node' + * + * @param node The input id + * @return A const Set&, consisting of all target ids which have an ingoing + * edge from 'node' + */ + const auto& successors(const Id node) const { + auto iterator = map_.find(node); + if (iterator == map_.end()) { + return emptySet_; + } + return iterator->second; + } +}; + +/** + * @class TransitivePathHashMap + * @brief This class implements the transitive path operation. The + * implementation uses a hash map to represent the graph and find successors + * of given nodes. + * + * + */ +class TransitivePathHashMap : public TransitivePathImpl { + public: + TransitivePathHashMap(QueryExecutionContext* qec, + std::shared_ptr child, + TransitivePathSide leftSide, + TransitivePathSide rightSide, size_t minDist, + size_t maxDist); + + private: + /** + * @brief Prepare a Map and a nodes vector for the transitive hull + * computation. + * + * @tparam SUB_WIDTH Number of columns of the sub table + * @tparam SIDE_WIDTH Number of columns of the startSideTable + * @param sub The sub table result + * @param startSide The TransitivePathSide where the edges start + * @param targetSide The TransitivePathSide where the edges end + * @param startSideTable An IdTable containing the Ids for the startSide + * @return std::pair> A Map and Id vector (nodes) for the + * transitive hull computation + */ + template + std::pair> setupMapAndNodes( + const IdTable& sub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, + const IdTable& startSideTable) const; + + /** + * @brief Prepare a Map and a nodes vector for the transitive hull + * computation. + * + * @tparam SUB_WIDTH Number of columns of the sub table + * @param sub The sub table result + * @param startSide The TransitivePathSide where the edges start + * @param targetSide The TransitivePathSide where the edges end + * @return std::pair> A Map and Id vector (nodes) for the + * transitive hull computation + */ + template + std::pair> setupMapAndNodes( + const IdTable& sub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const; + + // initialize the map from the subresult + HashMapWrapper setupEdgesMap( + const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const override; + + template + HashMapWrapper setupEdgesMap(const IdTable& dynSub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const; +}; diff --git a/src/engine/TransitivePathImpl.h b/src/engine/TransitivePathImpl.h new file mode 100644 index 0000000000..16fe987dbb --- /dev/null +++ b/src/engine/TransitivePathImpl.h @@ -0,0 +1,313 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#pragma once + +#include + +#include "engine/CallFixedSize.h" +#include "engine/TransitivePathBase.h" +#include "util/Exception.h" +#include "util/Timer.h" + +/** + * @class TransitivePathImpl + * @brief This class implements common functions for the concrete TransitivePath + * classes TransitivePathBinSearch and TransitivePathHashMap. The template can + * be set to a map data structure which is used for the transitive hull + * computation. + * + * @tparam T A map data structure for the transitive hull computation. + */ +template +class TransitivePathImpl : public TransitivePathBase { + public: + TransitivePathImpl(QueryExecutionContext* qec, + std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, + size_t minDist, size_t maxDist) + : TransitivePathBase(qec, std::move(child), std::move(leftSide), + std::move(rightSide), minDist, maxDist){}; + + /** + * @brief Compute the transitive hull with a bound side. + * This function is called when the startSide is bound and + * it is a variable. The other IdTable contains the result + * of the start side and will be used to get the start nodes. + * + * @tparam RES_WIDTH Number of columns of the result table + * @tparam SUB_WIDTH Number of columns of the sub table + * @tparam SIDE_WIDTH Number of columns of the + * @param res The result table which will be filled in-place + * @param sub The IdTable for the sub result + * @param startSide The start side for the transitive hull + * @param targetSide The target side for the transitive hull + * @param startSideTable The IdTable of the startSide + */ + template + void computeTransitivePathBound(IdTable* dynRes, const IdTable& dynSub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, + const IdTable& startSideTable) const { + auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); + timer.start(); + + auto [edges, nodes] = setupMapAndNodes( + dynSub, startSide, targetSide, startSideTable); + + timer.stop(); + auto initTime = timer.msecs(); + timer.start(); + + Map hull(allocator()); + if (!targetSide.isVariable()) { + hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); + } else { + hull = transitiveHull(edges, nodes, std::nullopt); + } + + timer.stop(); + auto hullTime = timer.msecs(); + timer.start(); + + fillTableWithHull(*dynRes, hull, nodes, startSide.outputCol_, + targetSide.outputCol_, startSideTable, + startSide.treeAndCol_.value().second); + + timer.stop(); + auto fillTime = timer.msecs(); + + auto& info = runtimeInfo(); + info.addDetail("Initialization time", initTime.count()); + info.addDetail("Hull time", hullTime.count()); + info.addDetail("IdTable fill time", fillTime.count()); + }; + + /** + * @brief Compute the transitive hull. + * This function is called when no side is bound (or an id). + * + * @tparam RES_WIDTH Number of columns of the result table + * @tparam SUB_WIDTH Number of columns of the sub table + * @param res The result table which will be filled in-place + * @param sub The IdTable for the sub result + * @param startSide The start side for the transitive hull + * @param targetSide The target side for the transitive hull + */ + + template + void computeTransitivePath(IdTable* dynRes, const IdTable& dynSub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); + timer.start(); + + auto [edges, nodes] = + setupMapAndNodes(dynSub, startSide, targetSide); + + timer.stop(); + auto initTime = timer.msecs(); + timer.start(); + + Map hull{allocator()}; + if (!targetSide.isVariable()) { + hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); + } else { + hull = transitiveHull(edges, nodes, std::nullopt); + } + + timer.stop(); + auto hullTime = timer.msecs(); + timer.start(); + + fillTableWithHull(*dynRes, hull, startSide.outputCol_, + targetSide.outputCol_); + timer.stop(); + auto fillTime = timer.msecs(); + + auto& info = runtimeInfo(); + info.addDetail("Initialization time", initTime.count()); + info.addDetail("Hull time", hullTime.count()); + info.addDetail("IdTable fill time", fillTime.count()); + }; + + protected: + /** + * @brief Compute the result for this TransitivePath operation + * This function chooses the start and target side for the transitive + * hull computation. This choice of the start side has a large impact + * on the time it takes to compute the hull. The set of nodes on the + * start side should be as small as possible. + * + * @return ResultTable The result of the TransitivePath operation + */ + ResultTable computeResult() override { + if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() && + rhs_.isVariable()) { + AD_THROW( + "This query might have to evalute the empty path, which is currently " + "not supported"); + } + auto [startSide, targetSide] = decideDirection(); + shared_ptr subRes = subtree_->getResult(); + + IdTable idTable{allocator()}; + + idTable.setNumColumns(getResultWidth()); + + size_t subWidth = subRes->idTable().numColumns(); + + if (startSide.isBoundVariable()) { + shared_ptr sideRes = + startSide.treeAndCol_.value().first->getResult(); + size_t sideWidth = sideRes->idTable().numColumns(); + + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), + &TransitivePathImpl::computeTransitivePathBound, this, + &idTable, subRes->idTable(), startSide, targetSide, + sideRes->idTable()); + + return {std::move(idTable), resultSortedOn(), + ResultTable::getMergedLocalVocab(*sideRes, *subRes)}; + } + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), + &TransitivePathImpl::computeTransitivePath, this, + &idTable, subRes->idTable(), startSide, targetSide); + + // NOTE: The only place, where the input to a transitive path operation is + // not an index scan (which has an empty local vocabulary by default) is the + // `LocalVocabTest`. But it doesn't harm to propagate the local vocab here + // either. + return {std::move(idTable), resultSortedOn(), + subRes->getSharedLocalVocab()}; + }; + + /** + * @brief Compute the transitive hull starting at the given nodes, + * using the given Map. + * + * @param edges Adjacency lists, mapping Ids (nodes) to their connected + * Ids. + * @param nodes A list of Ids. These Ids are used as starting points for the + * transitive hull. Thus, this parameter guides the performance of this + * algorithm. + * @param target Optional target Id. If supplied, only paths which end + * in this Id are added to the hull. + * @return Map Maps each Id to its connected Ids in the transitive hull + */ + Map transitiveHull(const T& edges, const std::vector& startNodes, + std::optional target) const { + // For every node do a dfs on the graph + Map hull{allocator()}; + + std::vector> stack; + ad_utility::HashSetWithMemoryLimit marks{ + getExecutionContext()->getAllocator()}; + for (auto startNode : startNodes) { + if (hull.contains(startNode)) { + // We have already computed the hull for this node + continue; + } + + marks.clear(); + stack.clear(); + stack.push_back({startNode, 0}); + + if (minDist_ == 0 && + (!target.has_value() || startNode == target.value())) { + insertIntoMap(hull, startNode, startNode); + } + + while (!stack.empty()) { + checkCancellation(); + auto [node, steps] = stack.back(); + stack.pop_back(); + + if (steps <= maxDist_ && marks.count(node) == 0) { + if (steps >= minDist_) { + marks.insert(node); + if (!target.has_value() || node == target.value()) { + insertIntoMap(hull, startNode, node); + } + } + + const auto& successors = edges.successors(node); + for (auto successor : successors) { + stack.push_back({successor, steps + 1}); + } + } + } + } + return hull; + } + + /** + * @brief Prepare a Map and a nodes vector for the transitive hull + * computation. + * + * @tparam SUB_WIDTH Number of columns of the sub table + * @param sub The sub table result + * @param startSide The TransitivePathSide where the edges start + * @param targetSide The TransitivePathSide where the edges end + * @return std::pair> A Map and Id vector (nodes) for the + * transitive hull computation + */ + template + std::pair> setupMapAndNodes( + const IdTable& sub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + std::vector nodes; + auto edges = setupEdgesMap(sub, startSide, targetSide); + + // id -> var|id + if (!startSide.isVariable()) { + nodes.push_back(std::get(startSide.value_)); + // var -> var + } else { + std::span startNodes = sub.getColumn(startSide.subCol_); + // TODO Use ranges::to. + nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); + if (minDist_ == 0) { + std::span targetNodes = sub.getColumn(targetSide.subCol_); + nodes.insert(nodes.end(), targetNodes.begin(), targetNodes.end()); + } + } + + return {std::move(edges), std::move(nodes)}; + }; + + /** + * @brief Prepare a Map and a nodes vector for the transitive hull + * computation. + * + * @tparam SUB_WIDTH Number of columns of the sub table + * @tparam SIDE_WIDTH Number of columns of the startSideTable + * @param sub The sub table result + * @param startSide The TransitivePathSide where the edges start + * @param targetSide The TransitivePathSide where the edges end + * @param startSideTable An IdTable containing the Ids for the startSide + * @return std::pair> A Map and Id vector (nodes) for the + * transitive hull computation + */ + template + std::pair> setupMapAndNodes( + const IdTable& sub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, + const IdTable& startSideTable) const { + std::vector nodes; + auto edges = setupEdgesMap(sub, startSide, targetSide); + + // Bound -> var|id + std::span startNodes = + startSideTable.getColumn(startSide.treeAndCol_.value().second); + // TODO Use ranges::to. + nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); + + return {std::move(edges), std::move(nodes)}; + }; + + virtual T setupEdgesMap(const IdTable& dynSub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const = 0; +}; diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h index 930b32082b..5bea460cc9 100644 --- a/src/global/RuntimeParameters.h +++ b/src/global/RuntimeParameters.h @@ -45,6 +45,7 @@ inline auto& RuntimeParameters() { DurationParameter{ 30s}), SizeT<"lazy-index-scan-max-size-materialization">{1'000'000}, + Bool<"use-binsearch-transitive-path">{true}, Bool<"group-by-hash-map-enabled">{false}}; }(); return params; diff --git a/test/LocalVocabTest.cpp b/test/LocalVocabTest.cpp index 1f76ddb2bc..bdba4a9b68 100644 --- a/test/LocalVocabTest.cpp +++ b/test/LocalVocabTest.cpp @@ -23,7 +23,7 @@ #include "engine/QueryExecutionTree.h" #include "engine/ResultTable.h" #include "engine/Sort.h" -#include "engine/TransitivePath.h" +#include "engine/TransitivePathBase.h" #include "engine/Union.h" #include "engine/Values.h" #include "engine/sparqlExpressions/GroupConcatExpression.h" @@ -324,8 +324,9 @@ TEST(LocalVocab, propagation) { // local-vocabulary. Still, it doesn't harm to test this. TransitivePathSide left(std::nullopt, 0, Variable{"?x"}); TransitivePathSide right(std::nullopt, 1, Variable{"?y"}); - TransitivePath transitivePath(testQec, qet(values1), left, right, 1, 1); - checkLocalVocab(transitivePath, localVocab1); + auto transitivePath = TransitivePathBase::makeTransitivePath( + testQec, qet(values1), std::move(left), std::move(right), 1, 1); + checkLocalVocab(*transitivePath, localVocab1); // PATTERN TRICK operations. HasPredicateScan hasPredicateScan(testQec, qet(values1), 0, Variable{"?z"}); diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 0316ddc3fb..a6cb7ed68a 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -727,8 +727,8 @@ TEST(QueryPlanner, TransitivePathRightId) { auto getId = ad_utility::testing::makeGetId(qec->getIndex()); - TransitivePathSide left{std::nullopt, 0, Variable("?x"), 0}; - TransitivePathSide right{std::nullopt, 1, getId(""), 1}; + TransitivePathSide left{std::nullopt, 1, Variable("?x"), 0}; + TransitivePathSide right{std::nullopt, 0, getId(""), 1}; h::expect( "SELECT ?y WHERE {" "?x

+ }", @@ -765,8 +765,9 @@ TEST(QueryPlanner, TransitivePathBindRight) { h::TransitivePath( left, right, 0, std::numeric_limits::max(), scan("?y", "

", ""), - scan("?_qlever_internal_variable_query_planner_0", "

", - "?_qlever_internal_variable_query_planner_1"))); + // TODO Get rid of this sort operation + h::Sort(scan("?_qlever_internal_variable_query_planner_0", "

", + "?_qlever_internal_variable_query_planner_1")))); } // __________________________________________________________________________ diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index bc61b070d9..dce66eeabc 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -18,7 +18,7 @@ #include "engine/Sort.h" #include "engine/TextIndexScanForEntity.h" #include "engine/TextIndexScanForWord.h" -#include "engine/TransitivePath.h" +#include "engine/TransitivePathBase.h" #include "engine/Union.h" #include "gmock/gmock-matchers.h" #include "gmock/gmock.h" @@ -234,16 +234,22 @@ inline auto TransitivePathSideMatcher = [](TransitivePathSide side) { inline auto TransitivePath = [](TransitivePathSide left, TransitivePathSide right, size_t minDist, size_t maxDist, const std::same_as auto&... childMatchers) { - return RootOperation<::TransitivePath>(AllOf( - Property("getChildren", &Operation::getChildren, - ElementsAre(Pointee(childMatchers)...)), - AD_PROPERTY(TransitivePath, getMinDist, Eq(minDist)), - AD_PROPERTY(TransitivePath, getMaxDist, Eq(maxDist)), - AD_PROPERTY(TransitivePath, getLeft, TransitivePathSideMatcher(left)), - AD_PROPERTY(TransitivePath, getRight, - TransitivePathSideMatcher(right)))); + return RootOperation<::TransitivePathBase>( + AllOf(Property("getChildren", &Operation::getChildren, + ElementsAre(Pointee(childMatchers)...)), + AD_PROPERTY(TransitivePathBase, getMinDist, Eq(minDist)), + AD_PROPERTY(TransitivePathBase, getMaxDist, Eq(maxDist)), + AD_PROPERTY(TransitivePathBase, getLeft, + TransitivePathSideMatcher(left)), + AD_PROPERTY(TransitivePathBase, getRight, + TransitivePathSideMatcher(right)))); }; +// Match a sort operation. Currently, this is only required by the binary search +// version of the transitive path operation. This matcher checks only the +// children of the sort operation. +inline auto Sort = MatchTypeAndUnorderedChildren<::Sort>; + // Match a `Filter` operation. The matching of the expression is currently only // done via the descriptor. constexpr auto Filter = [](std::string_view descriptor, diff --git a/test/TransitivePathTest.cpp b/test/TransitivePathTest.cpp index 7fdb976eb4..4e82a65202 100644 --- a/test/TransitivePathTest.cpp +++ b/test/TransitivePathTest.cpp @@ -1,209 +1,489 @@ // Copyright 2018, University of Freiburg, // Chair of Algorithms and Data Structures. // Author: Florian Kramer (florian.kramer@mail.uni-freiburg.de) +// Johannes Herrmann (johannes.r.herrmann(at)gmail.com) +#include #include -#include +#include +#include -#include "./util/AllocatorTestHelpers.h" #include "./util/IdTestHelpers.h" -#include "engine/TransitivePath.h" -#include "global/Id.h" +#include "./util/IndexTestHelpers.h" +#include "engine/QueryExecutionTree.h" +#include "engine/TransitivePathBase.h" +#include "engine/ValuesForTesting.h" +#include "gtest/gtest.h" +#include "util/GTestHelpers.h" +#include "util/IdTableHelpers.h" #include "util/IndexTestHelpers.h" using ad_utility::testing::getQec; -using ad_utility::testing::makeAllocator; namespace { auto V = ad_utility::testing::VocabId; +using Vars = std::vector>; -// First sort both of the inputs and then ASSERT their equality. Needed for -// results of the TransitivePath operations which have a non-deterministic order -// because of the hash maps which are used internally. -void assertSameUnorderedContent(const IdTable& a, const IdTable& b) { - auto aCpy = a.clone(); - auto bCpy = b.clone(); - ASSERT_EQ(a.numColumns(), b.numColumns()); - auto sorter = [](const auto& rowFromA, const auto& rowFromB) { - for (size_t i = 0; i < rowFromA.numColumns(); ++i) { - if (rowFromA[i] != rowFromB[i]) { - return rowFromA[i] < rowFromB[i]; - } - } - // equal means "not smaller" - return false; - }; - std::sort(aCpy.begin(), aCpy.end(), sorter); - std::sort(bCpy.begin(), bCpy.end(), sorter); - ASSERT_EQ(aCpy, bCpy); -} } // namespace -TEST(TransitivePathTest, idToId) { - IdTable sub(2, makeAllocator()); - sub.push_back({V(0), V(1)}); - sub.push_back({V(1), V(2)}); - sub.push_back({V(1), V(3)}); - sub.push_back({V(2), V(3)}); +class TransitivePathTest : public testing::TestWithParam { + public: + [[nodiscard]] static std::pair, + QueryExecutionContext*> + makePath(IdTable input, Vars vars, TransitivePathSide left, + TransitivePathSide right, size_t minDist, size_t maxDist) { + bool useBinSearch = GetParam(); + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(input), vars); + return {TransitivePathBase::makeTransitivePath( + qec, std::move(subtree), std::move(left), std::move(right), + minDist, maxDist, useBinSearch), + qec}; + } + + [[nodiscard]] static std::shared_ptr makePathUnbound( + IdTable input, Vars vars, TransitivePathSide left, + TransitivePathSide right, size_t minDist, size_t maxDist) { + auto [T, qec] = makePath(std::move(input), vars, std::move(left), + std::move(right), minDist, maxDist); + return T; + } + + [[nodiscard]] static std::shared_ptr makePathLeftBound( + IdTable input, Vars vars, IdTable sideTable, size_t sideTableCol, + Vars sideVars, TransitivePathSide left, TransitivePathSide right, + size_t minDist, size_t maxDist) { + auto [T, qec] = makePath(std::move(input), vars, std::move(left), + std::move(right), minDist, maxDist); + auto leftOp = ad_utility::makeExecutionTree( + qec, std::move(sideTable), sideVars); + return T->bindLeftSide(leftOp, sideTableCol); + } + + [[nodiscard]] static std::shared_ptr makePathRightBound( + IdTable input, Vars vars, IdTable sideTable, size_t sideTableCol, + Vars sideVars, TransitivePathSide left, TransitivePathSide right, + size_t minDist, size_t maxDist) { + auto [T, qec] = makePath(std::move(input), vars, std::move(left), + std::move(right), minDist, maxDist); + auto rightOp = ad_utility::makeExecutionTree( + qec, std::move(sideTable), sideVars); + return T->bindRightSide(rightOp, sideTableCol); + } +}; + +TEST_P(TransitivePathTest, idToId) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 3}}); + + auto expected = makeIdTableFromVector({{0, 3}}); - IdTable result(2, makeAllocator()); + TransitivePathSide left(std::nullopt, 0, V(0), 0); + TransitivePathSide right(std::nullopt, 1, V(3), 1); + auto T = + makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, std::numeric_limits::max()); - IdTable expected(2, makeAllocator()); - expected.push_back({V(0), V(3)}); + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST_P(TransitivePathTest, idToVar) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 3}}); + + auto expected = makeIdTableFromVector({{0, 1}, {0, 2}, {0, 3}}); TransitivePathSide left(std::nullopt, 0, V(0), 0); - TransitivePathSide right(std::nullopt, 1, V(3), 1); - TransitivePath T(getQec(), nullptr, left, right, 1, - std::numeric_limits::max()); + TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); + auto T = + makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, std::numeric_limits::max()); - T.computeTransitivePath<2, 2>(&result, sub, left, right); - assertSameUnorderedContent(expected, result); + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); } -TEST(TransitivePathTest, idToVar) { - IdTable sub(2, makeAllocator()); - sub.push_back({V(0), V(1)}); - sub.push_back({V(1), V(2)}); - sub.push_back({V(1), V(3)}); - sub.push_back({V(2), V(3)}); +TEST_P(TransitivePathTest, varToId) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 3}}); + + auto expected = makeIdTableFromVector({ + {2, 3}, + {1, 3}, + {0, 3}, + }); + + TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); + TransitivePathSide right(std::nullopt, 1, V(3), 1); + auto T = + makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, std::numeric_limits::max()); + + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} - IdTable result(2, makeAllocator()); +TEST_P(TransitivePathTest, idToVarMinLengthZero) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 3}}); - IdTable expected(2, makeAllocator()); - expected.push_back({V(0), V(1)}); - expected.push_back({V(0), V(2)}); - expected.push_back({V(0), V(3)}); + auto expected = makeIdTableFromVector({{0, 0}, {0, 1}, {0, 2}, {0, 3}}); TransitivePathSide left(std::nullopt, 0, V(0), 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - TransitivePath T(getQec(), nullptr, left, right, 1, - std::numeric_limits::max()); + auto T = + makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 0, std::numeric_limits::max()); - T.computeTransitivePath<2, 2>(&result, sub, left, right); - assertSameUnorderedContent(expected, result); + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); } -TEST(TransitivePathTest, varTovar) { - IdTable sub(2, makeAllocator()); - sub.push_back({V(0), V(1)}); - sub.push_back({V(1), V(2)}); - sub.push_back({V(1), V(3)}); - sub.push_back({V(2), V(3)}); +TEST_P(TransitivePathTest, varToIdMinLengthZero) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 3}}); - IdTable result(2, makeAllocator()); + auto expected = makeIdTableFromVector({ + {3, 3}, + {2, 3}, + {1, 3}, + {0, 3}, + }); - IdTable expected(2, makeAllocator()); - expected.push_back({V(0), V(1)}); - expected.push_back({V(0), V(2)}); - expected.push_back({V(0), V(3)}); - expected.push_back({V(1), V(2)}); - expected.push_back({V(1), V(3)}); - expected.push_back({V(2), V(3)}); + TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); + TransitivePathSide right(std::nullopt, 1, V(3), 1); + auto T = + makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 0, std::numeric_limits::max()); + + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST_P(TransitivePathTest, varTovar) { + auto sub = makeIdTableFromVector({ + {0, 1}, + {1, 2}, + {1, 3}, + {2, 3}, + }); + + auto expected = makeIdTableFromVector({ + {0, 1}, + {0, 2}, + {0, 3}, + {1, 2}, + {1, 3}, + {2, 3}, + }); TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - TransitivePath T(getQec(), nullptr, right, left, 1, - std::numeric_limits::max()); + auto T = + makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, std::numeric_limits::max()); - T.computeTransitivePath<2, 2>(&result, sub, left, right); - assertSameUnorderedContent(expected, result); + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); } -TEST(TransitivePathTest, unlimitedMaxLength) { - IdTable sub(2, makeAllocator()); - sub.push_back({V(0), V(2)}); - sub.push_back({V(2), V(4)}); - sub.push_back({V(4), V(7)}); - sub.push_back({V(0), V(7)}); - sub.push_back({V(3), V(3)}); - sub.push_back({V(7), V(0)}); - // Disconnected component. - sub.push_back({V(10), V(11)}); - - IdTable result(2, makeAllocator()); - - IdTable expected(2, makeAllocator()); - expected.push_back({V(0), V(2)}); - expected.push_back({V(0), V(4)}); - expected.push_back({V(0), V(7)}); - expected.push_back({V(0), V(0)}); - expected.push_back({V(2), V(4)}); - expected.push_back({V(2), V(7)}); - expected.push_back({V(2), V(0)}); - expected.push_back({V(2), V(2)}); - expected.push_back({V(4), V(7)}); - expected.push_back({V(4), V(0)}); - expected.push_back({V(4), V(2)}); - expected.push_back({V(4), V(4)}); - expected.push_back({V(3), V(3)}); - expected.push_back({V(7), V(0)}); - expected.push_back({V(7), V(2)}); - expected.push_back({V(7), V(4)}); - expected.push_back({V(7), V(7)}); - expected.push_back({V(10), V(11)}); +TEST_P(TransitivePathTest, unlimitedMaxLength) { + auto sub = makeIdTableFromVector({{0, 2}, + {2, 4}, + {4, 7}, + {0, 7}, + {3, 3}, + {7, 0}, + // Disconnected component. + {V(10), V(11)}}); + + auto expected = makeIdTableFromVector({{0, 2}, + {0, 4}, + {0, 7}, + {0, 0}, + {2, 4}, + {2, 7}, + {2, 0}, + {2, 2}, + {4, 7}, + {4, 0}, + {4, 2}, + {4, 4}, + {3, 3}, + {7, 0}, + {7, 2}, + {7, 4}, + {7, 7}, + {V(10), V(11)}}); TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - TransitivePath T(getQec(), nullptr, left, right, 1, - std::numeric_limits::max()); + auto T = + makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, std::numeric_limits::max()); - T.computeTransitivePath<2, 2>(&result, sub, left, right); - assertSameUnorderedContent(expected, result); + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); } -TEST(TransitivePathTest, maxLength2) { - IdTable sub(2, makeAllocator()); - sub.push_back({V(0), V(2)}); - sub.push_back({V(2), V(4)}); - sub.push_back({V(4), V(7)}); - sub.push_back({V(0), V(7)}); - sub.push_back({V(3), V(3)}); - sub.push_back({V(7), V(0)}); - // Disconnected component. - sub.push_back({V(10), V(11)}); - - IdTable result(2, makeAllocator()); - - IdTable expected(2, makeAllocator()); - - expected.push_back({V(0), V(2)}); - expected.push_back({V(0), V(4)}); - expected.push_back({V(0), V(7)}); - expected.push_back({V(0), V(0)}); - expected.push_back({V(2), V(4)}); - expected.push_back({V(2), V(7)}); - expected.push_back({V(4), V(7)}); - expected.push_back({V(4), V(0)}); - expected.push_back({V(3), V(3)}); - expected.push_back({V(7), V(0)}); - expected.push_back({V(7), V(2)}); - expected.push_back({V(7), V(7)}); - expected.push_back({V(10), V(11)}); +TEST_P(TransitivePathTest, idToLeftBound) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 3}, {3, 4}}); + + auto leftOpTable = makeIdTableFromVector({ + {0, 1}, + {0, 2}, + {0, 3}, + }); + + auto expected = makeIdTableFromVector({ + {1, 4, 0}, + {2, 4, 0}, + {3, 4, 0}, + }); TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); + TransitivePathSide right(std::nullopt, 1, V(4), 1); + auto T = makePathLeftBound( + std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + std::move(leftOpTable), 1, {Variable{"?x"}, Variable{"?start"}}, + std::move(left), std::move(right), 0, std::numeric_limits::max()); + + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST_P(TransitivePathTest, idToRightBound) { + auto sub = makeIdTableFromVector({ + {0, 1}, + {1, 2}, + {1, 3}, + {2, 3}, + {3, 4}, + }); + + auto rightOpTable = makeIdTableFromVector({ + {2, 5}, + {3, 5}, + {4, 5}, + }); + + auto expected = makeIdTableFromVector({ + {0, 2, 5}, + {0, 3, 5}, + {0, 4, 5}, + }); + + TransitivePathSide left(std::nullopt, 0, V(0), 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - TransitivePath T(getQec(), nullptr, left, right, 1, 2); - T.computeTransitivePath<2, 2>(&result, sub, left, right); - assertSameUnorderedContent(expected, result); - - result.clear(); - expected.clear(); - expected.push_back({V(7), V(0)}); - expected.push_back({V(7), V(2)}); - expected.push_back({V(7), V(7)}); - - left.value_ = V(7); - right.value_ = Variable{"?target"}; - T.computeTransitivePath<2, 2>(&result, sub, left, right); - assertSameUnorderedContent(expected, result); - - result.clear(); - expected.clear(); - expected.push_back({V(0), V(2)}); - expected.push_back({V(7), V(2)}); - - left.value_ = Variable{"?start"}; - right.value_ = V(2); - T.computeTransitivePath<2, 2>(&result, sub, right, left); - assertSameUnorderedContent(expected, result); + auto T = makePathRightBound( + std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + std::move(rightOpTable), 0, {Variable{"?target"}, Variable{"?x"}}, + std::move(left), std::move(right), 0, std::numeric_limits::max()); + + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); } + +TEST_P(TransitivePathTest, leftBoundToVar) { + auto sub = makeIdTableFromVector({ + {1, 2}, + {2, 3}, + {2, 4}, + {3, 4}, + }); + + auto leftOpTable = makeIdTableFromVector({ + {0, 1}, + {0, 2}, + {0, 3}, + }); + + auto expected = makeIdTableFromVector({ + {1, 1, 0}, + {1, 2, 0}, + {1, 3, 0}, + {1, 4, 0}, + {2, 2, 0}, + {2, 3, 0}, + {2, 4, 0}, + {3, 3, 0}, + {3, 4, 0}, + }); + + TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); + TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); + auto T = makePathLeftBound( + std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + std::move(leftOpTable), 1, {Variable{"?x"}, Variable{"?start"}}, + std::move(left), std::move(right), 0, std::numeric_limits::max()); + + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST_P(TransitivePathTest, rightBoundToVar) { + auto sub = makeIdTableFromVector({ + {1, 2}, + {2, 3}, + {2, 4}, + {3, 4}, + }); + + auto rightOpTable = makeIdTableFromVector({ + {2, 5}, + {3, 5}, + {4, 5}, + }); + + auto expected = makeIdTableFromVector({ + {1, 2, 5}, + {1, 3, 5}, + {1, 4, 5}, + {2, 2, 5}, + {2, 3, 5}, + {2, 4, 5}, + {3, 3, 5}, + {3, 4, 5}, + {4, 4, 5}, + }); + + TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); + TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); + auto T = makePathRightBound( + std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + std::move(rightOpTable), 0, {Variable{"?target"}, Variable{"?x"}}, + std::move(left), std::move(right), 0, std::numeric_limits::max()); + + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST_P(TransitivePathTest, maxLength2FromVariable) { + auto sub = makeIdTableFromVector({ + {0, 2}, + {2, 4}, + {4, 7}, + {0, 7}, + {3, 3}, + {7, 0}, + // Disconnected component. + {10, 11}, + }); + + auto expected = makeIdTableFromVector({{0, 2}, + {0, 4}, + {0, 7}, + {0, 0}, + {2, 4}, + {2, 7}, + {4, 7}, + {4, 0}, + {3, 3}, + {7, 0}, + {7, 2}, + {7, 7}, + {10, 11}}); + + TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); + TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); + auto T = + makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, 2); + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST_P(TransitivePathTest, maxLength2FromId) { + auto sub = makeIdTableFromVector({ + {0, 2}, + {2, 4}, + {4, 7}, + {0, 7}, + {3, 3}, + {7, 0}, + // Disconnected component. + {10, 11}, + }); + + auto expected = makeIdTableFromVector({ + {7, 0}, + {7, 2}, + {7, 7}, + }); + + TransitivePathSide left(std::nullopt, 0, V(7), 0); + TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); + auto T = + makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, 2); + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST_P(TransitivePathTest, maxLength2ToId) { + auto sub = makeIdTableFromVector({ + {0, 2}, + {2, 4}, + {4, 7}, + {0, 7}, + {3, 3}, + {7, 0}, + // Disconnected component. + {10, 11}, + }); + + auto expected = makeIdTableFromVector({ + {0, 2}, + {7, 2}, + }); + + TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); + TransitivePathSide right(std::nullopt, 1, V(2), 1); + auto T = + makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, 2); + auto resultTable = T->computeResultOnlyForTesting(); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST_P(TransitivePathTest, zeroLengthException) { + auto sub = makeIdTableFromVector({ + {0, 2}, + {2, 4}, + {4, 7}, + {0, 7}, + {3, 3}, + {7, 0}, + // Disconnected component. + {10, 11}, + }); + + TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); + TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); + auto T = + makePathUnbound(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 0, std::numeric_limits::max()); + AD_EXPECT_THROW_WITH_MESSAGE( + T->computeResultOnlyForTesting(), + ::testing::ContainsRegex( + "This query might have to evalute the empty path, which is currently " + "not supported")); +} + +INSTANTIATE_TEST_SUITE_P(TransitivePathTestSuite, TransitivePathTest, + testing::Bool(), + [](const testing::TestParamInfo& info) { + return info.param ? "TransitivePathBinSearch" + : "TransitivePathHashMap"; + });