From a6ec4f1ec21a216b6d77d5c8dce8f4be9be1f2ba Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Mon, 15 Jan 2024 12:16:59 +0100 Subject: [PATCH] Make the has-predicate scans work again. --- src/engine/CMakeLists.txt | 2 +- src/engine/CountAvailablePredicates.cpp | 20 ++++- src/engine/HasPredicateScan.cpp | 99 +++++++++++-------------- src/engine/HasPredicateScan.h | 15 +--- src/engine/QueryExecutionTree.cpp | 5 ++ src/engine/QueryExecutionTree.h | 1 + src/engine/QueryPlanner.cpp | 49 ++++++++++++ src/engine/QueryPlanner.h | 10 +++ 8 files changed, 132 insertions(+), 69 deletions(-) diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 22246ee6e5..8e37590511 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -6,7 +6,7 @@ add_library(engine IndexScan.cpp Join.cpp Sort.cpp TextOperationWithoutFilter.cpp TextOperationWithFilter.cpp Distinct.cpp OrderBy.cpp Filter.cpp Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp - OptionalJoin.cpp CountAvailablePredicates.cpp GroupBy.cpp + OptionalJoin.cpp CountAvailablePredicates.cpp GroupBy.cpp HasPredicateScan.cpp Union.cpp MultiColumnJoin.cpp TransitivePath.cpp Service.cpp Values.cpp Bind.cpp Minus.cpp RuntimeInformation.cpp CheckUsePatternTrick.cpp VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp diff --git a/src/engine/CountAvailablePredicates.cpp b/src/engine/CountAvailablePredicates.cpp index 38e4ee3e20..0339227ffb 100644 --- a/src/engine/CountAvailablePredicates.cpp +++ b/src/engine/CountAvailablePredicates.cpp @@ -5,6 +5,7 @@ #include "engine/CountAvailablePredicates.h" #include "engine/CallFixedSize.h" +#include "engine/IndexScan.h" #include "index/IndexImpl.h" // _____________________________________________________________________________ @@ -117,7 +118,24 @@ ResultTable CountAvailablePredicates::computeResult() { const CompactVectorOfStrings& patterns = _executionContext->getIndex().getPatterns(); - if (_subtree == nullptr) { + AD_CORRECTNESS_CHECK(_subtree); + bool isFullScan = [&]() { + auto indexScan = + dynamic_cast(_subtree->getRootOperation().get()); + if (!indexScan) { + return false; + } + if (!indexScan->getSubject().isVariable() || + !indexScan->getObject().isVariable()) { + return false; + } + + return indexScan->getPredicate() == HAS_PATTERN_PREDICATE; + }(); + + if (isFullScan) { + _subtree->getRootOperation()->updateRuntimeInformationWhenOptimizedOut( + RuntimeInformation::Status::lazilyMaterialized); // Compute the predicates for all entities CountAvailablePredicates::computePatternTrickAllEntities(&idTable, patterns); diff --git a/src/engine/HasPredicateScan.cpp b/src/engine/HasPredicateScan.cpp index 4a5b1aedf8..8d85a3bc66 100644 --- a/src/engine/HasPredicateScan.cpp +++ b/src/engine/HasPredicateScan.cpp @@ -2,9 +2,10 @@ // Chair of Algorithms and Data Structures. // Author: Florian Kramer (florian.kramer@mail.uni-freiburg.de) -#include "HasPredicateScan.h" +#include "engine/HasPredicateScan.h" -#include "CallFixedSize.h" +#include "engine/CallFixedSize.h" +#include "index/IndexImpl.h" HasPredicateScan::HasPredicateScan(QueryExecutionContext* qec, std::shared_ptr subtree, @@ -213,9 +214,14 @@ ResultTable HasPredicateScan::computeResult() { IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(getResultWidth()); - const std::vector& hasPattern = getIndex().getHasPattern(); - const CompactVectorOfStrings& hasPredicate = getIndex().getHasPredicate(); const CompactVectorOfStrings& patterns = getIndex().getPatterns(); + auto hasPattern = + getExecutionContext() + ->getIndex() + .getImpl() + .getPermutation(Permutation::Enum::PSO) + .lazyScan(qlever::specialIds.at(HAS_PATTERN_PREDICATE), std::nullopt, + std::nullopt, {}, cancellationHandle_); switch (_type) { case ScanType::FREE_S: { @@ -223,8 +229,7 @@ ResultTable HasPredicateScan::computeResult() { if (!getIndex().getId(_object, &objectId)) { AD_THROW("The predicate '" + _object + "' is not in the vocabulary."); } - HasPredicateScan::computeFreeS(&idTable, objectId, hasPattern, - hasPredicate, patterns); + HasPredicateScan::computeFreeS(&idTable, objectId, hasPattern, patterns); return {std::move(idTable), resultSortedOn(), LocalVocab{}}; }; case ScanType::FREE_O: { @@ -232,13 +237,12 @@ ResultTable HasPredicateScan::computeResult() { if (!getIndex().getId(_subject, &subjectId)) { AD_THROW("The subject " + _subject + " is not in the vocabulary."); } - HasPredicateScan::computeFreeO(&idTable, subjectId, hasPattern, - hasPredicate, patterns); + HasPredicateScan::computeFreeO(&idTable, subjectId, hasPattern, patterns); return {std::move(idTable), resultSortedOn(), LocalVocab{}}; }; case ScanType::FULL_SCAN: HasPredicateScan::computeFullScan( - &idTable, hasPattern, hasPredicate, patterns, + &idTable, hasPattern, patterns, getIndex().getNumDistinctSubjectPredicatePairs()); return {std::move(idTable), resultSortedOn(), LocalVocab{}}; case ScanType::SUBQUERY_S: @@ -246,10 +250,15 @@ ResultTable HasPredicateScan::computeResult() { std::shared_ptr subresult = _subtree->getResult(); int inWidth = subresult->idTable().numColumns(); int outWidth = idTable.numColumns(); + HasPredicateScan::computeSubqueryS<0, 0>(&idTable, subresult->idTable(), + _subtreeJoinColumn, hasPattern, + patterns); + /* CALL_FIXED_SIZE((std::array{inWidth, outWidth}), HasPredicateScan::computeSubqueryS, &idTable, subresult->idTable(), _subtreeJoinColumn, hasPattern, - hasPredicate, patterns); + patterns); + */ return {std::move(idTable), resultSortedOn(), subresult->getSharedLocalVocab()}; } @@ -257,41 +266,30 @@ ResultTable HasPredicateScan::computeResult() { } void HasPredicateScan::computeFreeS( - IdTable* resultTable, Id objectId, const std::vector& hasPattern, - const CompactVectorOfStrings& hasPredicate, + IdTable* resultTable, Id objectId, auto&& hasPattern, const CompactVectorOfStrings& patterns) { IdTableStatic<1> result = std::move(*resultTable).toStatic<1>(); - uint64_t entityIndex = 0; - while (entityIndex < hasPattern.size() || entityIndex < hasPredicate.size()) { - if (entityIndex < hasPattern.size() && - hasPattern[entityIndex] != NO_PATTERN) { - // add the pattern - const auto& pattern = patterns[hasPattern[entityIndex]]; + for (const auto& block : hasPattern) { + auto patternColumn = block.getColumn(1); + auto subjects = block.getColumn(0); + for (size_t i : ad_utility::integerRange(block.numRows())) { + const auto& pattern = patterns[patternColumn[i].getInt()]; for (const auto& predicate : pattern) { if (predicate == objectId) { - result.push_back( - {Id::makeFromVocabIndex(VocabIndex::make(entityIndex))}); - } - } - } else if (entityIndex < hasPredicate.size()) { - // add the relations - for (const auto& predicate : hasPredicate[entityIndex]) { - if (predicate == objectId) { - result.push_back( - {Id::makeFromVocabIndex(VocabIndex::make(entityIndex))}); + result.push_back({subjects[i]}); } + break; } } - entityIndex++; } *resultTable = std::move(result).toDynamic(); } void HasPredicateScan::computeFreeO( - IdTable* resultTable, Id subjectAsId, - const std::vector& hasPattern, - const CompactVectorOfStrings& hasPredicate, + IdTable* resultTable, Id subjectAsId, auto&& hasPattern, const CompactVectorOfStrings& patterns) { + AD_FAIL(); + /* // Subjects always have to be from the vocabulary if (subjectAsId.getDatatype() != Datatype::VocabIndex) { return; @@ -313,35 +311,23 @@ void HasPredicateScan::computeFreeO( } } *resultTable = std::move(result).toDynamic(); + */ } void HasPredicateScan::computeFullScan( - IdTable* resultTable, const std::vector& hasPattern, - const CompactVectorOfStrings& hasPredicate, + IdTable* resultTable, auto&& hasPattern, const CompactVectorOfStrings& patterns, size_t resultSize) { IdTableStatic<2> result = std::move(*resultTable).toStatic<2>(); result.reserve(resultSize); - - uint64_t subjectIndex = 0; - while (subjectIndex < hasPattern.size() || - subjectIndex < hasPredicate.size()) { - if (subjectIndex < hasPattern.size() && - hasPattern[subjectIndex] != NO_PATTERN) { - // add the pattern - for (const auto& predicate : patterns[hasPattern[subjectIndex]]) { - result.push_back( - {Id::makeFromVocabIndex(VocabIndex::make(subjectIndex)), - predicate}); - } - } else if (subjectIndex < hasPredicate.size()) { - // add the relations - for (const auto& predicate : hasPredicate[subjectIndex]) { - result.push_back( - {Id::makeFromVocabIndex(VocabIndex::make(subjectIndex)), - predicate}); + for (const auto& block : hasPattern) { + auto patternColumn = block.getColumn(1); + auto subjects = block.getColumn(0); + for (size_t i : ad_utility::integerRange(block.numRows())) { + const auto& pattern = patterns[patternColumn[i].getInt()]; + for (const auto& predicate : pattern) { + result.push_back({subjects[i], predicate}); } } - subjectIndex++; } *resultTable = std::move(result).toDynamic(); } @@ -349,9 +335,9 @@ void HasPredicateScan::computeFullScan( template void HasPredicateScan::computeSubqueryS( IdTable* dynResult, const IdTable& dynInput, const size_t subtreeColIndex, - const std::vector& hasPattern, - const CompactVectorOfStrings& hasPredicate, - const CompactVectorOfStrings& patterns) { + auto&& hasPattern, const CompactVectorOfStrings& patterns) { + AD_FAIL(); + /* IdTableStatic result = std::move(*dynResult).toStatic(); const IdTableView input = dynInput.asStaticView(); @@ -389,6 +375,7 @@ void HasPredicateScan::computeSubqueryS( } } *dynResult = std::move(result).toDynamic(); + */ } void HasPredicateScan::setSubject(const TripleComponent& subject) { diff --git a/src/engine/HasPredicateScan.h b/src/engine/HasPredicateScan.h index 2cd6bc9959..98481e2299 100644 --- a/src/engine/HasPredicateScan.h +++ b/src/engine/HasPredicateScan.h @@ -84,27 +84,20 @@ class HasPredicateScan : public Operation { } // These are made static and public mainly for easier testing - static void computeFreeS(IdTable* resultTable, Id objectId, - const std::vector& hasPattern, - const CompactVectorOfStrings& hasPredicate, + static void computeFreeS(IdTable* resultTable, Id objectId, auto&& hasPattern, const CompactVectorOfStrings& patterns); static void computeFreeO(IdTable* resultTable, Id subjectAsId, - const std::vector& hasPattern, - const CompactVectorOfStrings& hasPredicate, + auto&& hasPattern, const CompactVectorOfStrings& patterns); - static void computeFullScan(IdTable* resultTable, - const std::vector& hasPattern, - const CompactVectorOfStrings& hasPredicate, + static void computeFullScan(IdTable* resultTable, auto&& hasPattern, const CompactVectorOfStrings& patterns, size_t resultSize); template static void computeSubqueryS(IdTable* result, const IdTable& _subtree, - size_t subtreeColIndex, - const std::vector& hasPattern, - const CompactVectorOfStrings& hasPredicate, + size_t subtreeColIndex, auto&& hasPattern, const CompactVectorOfStrings& patterns); private: diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index 86b146616c..b7a13a6af3 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -18,6 +18,7 @@ #include "engine/ExportQueryExecutionTrees.h" #include "engine/Filter.h" #include "engine/GroupBy.h" +#include "engine/HasPredicateScan.h" #include "engine/IndexScan.h" #include "engine/Join.h" #include "engine/Minus.h" @@ -163,6 +164,8 @@ void QueryExecutionTree::setOperation(std::shared_ptr operation) { type_ = ORDER_BY; } else if constexpr (std::is_same_v) { type_ = GROUP_BY; + } else if constexpr (std::is_same_v) { + type_ = HAS_PREDICATE_SCAN; } else if constexpr (std::is_same_v) { type_ = FILTER; } else if constexpr (std::is_same_v) { @@ -204,6 +207,8 @@ template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); +template void QueryExecutionTree::setOperation( + std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation( std::shared_ptr); diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h index f74644c0a9..8a533ce91c 100644 --- a/src/engine/QueryExecutionTree.h +++ b/src/engine/QueryExecutionTree.h @@ -48,6 +48,7 @@ class QueryExecutionTree { OPTIONAL_JOIN, COUNT_AVAILABLE_PREDICATES, GROUP_BY, + HAS_PREDICATE_SCAN, UNION, MULTICOLUMN_JOIN, TRANSITIVE_PATH, diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 56ba54d26d..142f34d26e 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -735,6 +736,11 @@ vector QueryPlanner::seedWithScansAndText( "necessary also rebuild the index."); } + if (node._triple._p._iri == HAS_PREDICATE_PREDICATE) { + pushPlan(makeSubtreePlan(_qec, node._triple)); + continue; + } + if (node._variables.size() == 1) { // There is exactly one variable in the triple (may occur twice). if (isVariable(node._triple._s) && isVariable(node._triple._o) && @@ -1792,6 +1798,14 @@ std::vector QueryPlanner::createJoinCandidates( candidates.push_back(std::move(opt.value())); } + // Check if one of the two operations is a HAS_PREDICATE_SCAN. + // If the join column corresponds to the has-predicate scan's + // subject column we can use a specialized join that avoids + // loading the full has-predicate predicate. + if (auto opt = createJoinWithHasPredicateScan(a, b, jcs)) { + candidates.push_back(std::move(opt.value())); + } + // Test if one of `a` or `b` is a transitive path to which we can bind the // other one. if (auto opt = createJoinWithTransitivePath(a, b, jcs)) { @@ -1855,6 +1869,41 @@ auto QueryPlanner::createJoinWithTransitivePath( return plan; } +// ______________________________________________________________________________________ +auto QueryPlanner::createJoinWithHasPredicateScan( + SubtreePlan a, SubtreePlan b, + const std::vector>& jcs) + -> std::optional { + // Check if one of the two operations is a HAS_PREDICATE_SCAN. + // If the join column corresponds to the has-predicate scan's + // subject column we can use a specialized join that avoids + // loading the full has-predicate predicate. + using enum QueryExecutionTree::OperationType; + auto isSuitablePredicateScan = [](const auto& tree, size_t joinColumn) { + return tree._qet->getType() == HAS_PREDICATE_SCAN && joinColumn == 0 && + static_cast(tree._qet->getRootOperation().get()) + ->getType() == HasPredicateScan::ScanType::FULL_SCAN; + }; + + const bool aIsSuitablePredicateScan = isSuitablePredicateScan(a, jcs[0][0]); + const bool bIsSuitablePredicateScan = isSuitablePredicateScan(b, jcs[0][1]); + if (!(aIsSuitablePredicateScan || bIsSuitablePredicateScan)) { + return std::nullopt; + } + auto hasPredicateScanTree = aIsSuitablePredicateScan ? a._qet : b._qet; + auto otherTree = aIsSuitablePredicateScan ? b._qet : a._qet; + size_t otherTreeJoinColumn = aIsSuitablePredicateScan ? jcs[0][1] : jcs[0][0]; + auto qec = otherTree->getRootOperation()->getExecutionContext(); + // Note that this is a new operation. + auto object = static_cast( + hasPredicateScanTree->getRootOperation().get()) + ->getObject(); + auto plan = makeSubtreePlan( + qec, std::move(otherTree), otherTreeJoinColumn, std::move(object)); + mergeSubtreePlanIds(plan, a, b); + return plan; +} + // ______________________________________________________________________________________ auto QueryPlanner::createJoinAsTextFilter( SubtreePlan a, SubtreePlan b, diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 8b67f1e9bc..e09be794ad 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -309,6 +309,16 @@ class QueryPlanner { SubtreePlan a, SubtreePlan b, const std::vector>& jcs); + // Used internally by `createJoinCandidates`. If `a` or `b` is a + // `HasPredicateScan` with a variable as a subject (`?x ql:has-predicate + // `) and `a` and `b` can be joined on that subject variable, + // then returns a `HasPredicateScan` that takes the other input as a subtree. + // Else returns `std::nullopt`. + [[nodiscard]] static std::optional + createJoinWithHasPredicateScan( + SubtreePlan a, SubtreePlan b, + const std::vector>& jcs); + // Used internally by `createJoinCandidates`. If `a` or `b` is a // `TextOperationWithoutFilter` create a `TextOperationWithFilter` that takes // the result of the other input as the filter input. Else return