From 45cc1b460ca0d1b5c7019e83e449d16acf6ff7a7 Mon Sep 17 00:00:00 2001 From: realHannes Date: Mon, 18 Nov 2024 20:23:57 +0100 Subject: [PATCH] changes w.r.t. comments from #1573 --- src/engine/Filter.cpp | 26 +++-- src/engine/Filter.h | 6 +- src/engine/IndexScan.cpp | 94 +++++++++++++------ src/engine/IndexScan.h | 40 +++++--- src/engine/Operation.cpp | 18 ++-- src/engine/Operation.h | 31 +++--- src/engine/QueryExecutionTree.cpp | 2 +- src/engine/QueryExecutionTree.h | 12 +-- .../PrefilterExpressionIndex.cpp | 17 ++++ .../PrefilterExpressionIndex.h | 8 ++ test/PrefilterExpressionIndexTest.cpp | 21 +++-- 11 files changed, 185 insertions(+), 90 deletions(-) diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index be8009b617..ed9f3ac88b 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -28,7 +28,7 @@ Filter::Filter(QueryExecutionContext* qec, : Operation(qec), _subtree(std::move(subtree)), _expression{std::move(expression)} { - setPrefilterExpressionForIndexScanChildren(); + setPrefilterExpressionForDirectIndexScanChild(); } // _____________________________________________________________________________ @@ -45,14 +45,26 @@ string Filter::getDescriptor() const { } //______________________________________________________________________________ -void Filter::setPrefilterExpressionForIndexScanChildren() { - const std::vector& prefilterVec = +void Filter::setPrefilterExpressionForDirectIndexScanChild() { + std::vector prefilterPairs = _expression.getPrefilterExpressionForMetadata(); - this->forAllDescendants([&prefilterVec](const QueryExecutionTree* ptr) { - if (ptr) { - ptr->setPrefilterExpression(prefilterVec); + std::vector relevantPairs; + relevantPairs.reserve(prefilterPairs.size()); + VariableToColumnMap varToColMap = _subtree->getVariableColumns(); + // Add all the PrefilterVariable values whose Variable value is + // contained in the VariableToColumnMap. This is done to avoid that certain + // subqueries filter out too much. + for (auto& prefilterPair : prefilterPairs) { + if (varToColMap.find(prefilterPair.second) != varToColMap.end()) { + relevantPairs.emplace_back(std::move(prefilterPair)); } - }); + } + auto optNewSubTree = + _subtree->getRootOperation()->setPrefilterExprGetUpdatedQetPtr( + std::move(relevantPairs)); + if (optNewSubTree.has_value()) { + _subtree = std::move(optNewSubTree.value()); + } } // _____________________________________________________________________________ diff --git a/src/engine/Filter.h b/src/engine/Filter.h index a475758e0f..4dd1933fc5 100644 --- a/src/engine/Filter.h +++ b/src/engine/Filter.h @@ -59,7 +59,11 @@ class Filter : public Operation { return _subtree->getVariableColumns(); } - void setPrefilterExpressionForIndexScanChildren(); + // This method is directly called by the constructor. + // It sets the appropriate `` pair for each + // `IndexScan` child by invoking `setPrefilterExpression` on all descendants + // in the `QueryExecutionTree`. + void setPrefilterExpressionForDirectIndexScanChild(); ProtoResult computeResult(bool requestLaziness) override; diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index 2040d3d8de..a45ba069a7 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -18,14 +18,14 @@ using std::string; // _____________________________________________________________________________ IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation, const SparqlTripleSimple& triple, Graphs graphsToFilter, - PrefilterIndexPairs prefilters) + PrefilterIndexPair prefilter) : Operation(qec), permutation_(permutation), subject_(triple.s_), predicate_(triple.p_), object_(triple.o_), graphsToFilter_{std::move(graphsToFilter)}, - prefilters_{std::move(prefilters)}, + prefilter_{std::move(prefilter)}, numVariables_(static_cast(subject_.isVariable()) + static_cast(predicate_.isVariable()) + static_cast(object_.isVariable())) { @@ -54,9 +54,26 @@ IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation, // _____________________________________________________________________________ IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation, const SparqlTriple& triple, Graphs graphsToFilter, - PrefilterIndexPairs prefilters) + PrefilterIndexPair prefilter) : IndexScan(qec, permutation, triple.getSimple(), std::move(graphsToFilter), - std::move(prefilters)) {} + std::move(prefilter)) {} + +// _____________________________________________________________________________ +IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation, + const TripleComponent& s, const TripleComponent& p, + const TripleComponent& o, + const std::vector& additionalColumns, + const std::vector& additionalVariables, + Graphs graphsToFilter, PrefilterIndexPair prefilter) + : Operation(qec), + permutation_(permutation), + subject_(s), + predicate_(p), + object_(o), + graphsToFilter_(std::move(graphsToFilter)), + prefilter_(std::move(prefilter)), + additionalColumns_(additionalColumns), + additionalVariables_(additionalVariables) {} // _____________________________________________________________________________ string IndexScan::getCacheKeyImpl() const { @@ -119,21 +136,34 @@ vector IndexScan::resultSortedOn() const { } // _____________________________________________________________________________ -void IndexScan::setPrefilterExpression( +std::optional> +IndexScan::setPrefilterExprGetUpdatedQetPtr( const std::vector& prefilterVariablePairs) { - const std::vector& sortedColumns = resultSortedOn(); - VariableToColumnMap varToColMap = computeVariableToColumnMap(); + // The column index of the first sorted column. + const ColumnIndex sortedIdx = 0; + if (numVariables_ < 1) { + return std::nullopt; + } - const auto addPrefilterIfSorted = [&](const PrefilterVariablePair& pair) { - const Variable& variable = pair.second; - if (varToColMap.find(variable) != varToColMap.end()) { - const ColumnIndex colIdx = varToColMap[variable].columnIndex_; - if (std::ranges::find(sortedColumns, colIdx) != sortedColumns.end()) { - prefilters_.push_back(std::make_pair(pair.first->clone(), colIdx)); - } + VariableToColumnMap varToColMap = computeVariableToColumnMap(); + // Search for a Variable key-value given the sortedIdx (index of first sorted + // column) in the VariableToColumnMap. + auto mapIt = + std::ranges::find_if(varToColMap, [&sortedIdx](const auto& keyValuePair) { + return keyValuePair.second.columnIndex_ == sortedIdx; + }); + if (mapIt != varToColMap.end()) { + // Check if the previously found Variable (key-value from varToColMap) + // matches with a pair. + auto itPairs = std::ranges::find_if( + prefilterVariablePairs, + [&mapIt](const auto& pair) { return pair.second == mapIt->first; }); + if (itPairs != prefilterVariablePairs.end()) { + return makeCopyWithAddedPrefilters( + std::make_pair(itPairs->first->clone(), sortedIdx)); } - }; - std::ranges::for_each(prefilterVariablePairs, addPrefilterIfSorted); + } + return std::nullopt; } // _____________________________________________________________________________ @@ -155,12 +185,23 @@ VariableToColumnMap IndexScan::computeVariableToColumnMap() const { return variableToColumnMap; } +//______________________________________________________________________________ +std::shared_ptr IndexScan::makeCopyWithAddedPrefilters( + PrefilterIndexPair prefilter) { + return ad_utility::makeExecutionTree( + getExecutionContext(), permutation_, subject_, predicate_, object_, + additionalColumns_, additionalVariables_, std::move(graphsToFilter_), + std::move(prefilter)); +} + //______________________________________________________________________________ std::vector IndexScan::applyFilterBlockMetadata( - std::vector&& blocks) const { - std::ranges::for_each(prefilters_, [&blocks](const PrefilterIndexPair& pair) { - pair.first->evaluate(blocks, pair.second); - }); + const std::vector blocks) const { + if (prefilter_.has_value()) { + auto& prefilterIndexPair = prefilter_.value(); + return prefilterExpressions::detail::evaluatePrefilterExpressionOnMetadata( + prefilterIndexPair.first->clone(), blocks, prefilterIndexPair.second); + } return blocks; }; @@ -178,7 +219,7 @@ Result::Generator IndexScan::chunkedIndexScan() const { } // _____________________________________________________________________________ -IdTable IndexScan::completeIndexScan() const { +IdTable IndexScan::materializedIndexScan() const { // Get the blocks. auto metadata = getMetadataForScan(); auto blockSpan = @@ -189,7 +230,7 @@ IdTable IndexScan::completeIndexScan() const { ? std::optional{applyFilterBlockMetadata( {blockSpan.begin(), blockSpan.end()})} : std::nullopt; - // Create IdTable, fill it with content by performing scan(). + // Create the IdTable and fill it with content by performing scan(). using enum Permutation::Enum; IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(numVariables_); @@ -211,7 +252,7 @@ ProtoResult IndexScan::computeResult(bool requestLaziness) { if (requestLaziness) { return {chunkedIndexScan(), resultSortedOn()}; } - return {completeIndexScan(), getResultSortedOn(), LocalVocab{}}; + return {materializedIndexScan(), getResultSortedOn(), LocalVocab{}}; } // _____________________________________________________________________________ @@ -287,7 +328,7 @@ ScanSpecificationAsTripleComponent IndexScan::getScanSpecificationTc() const { // _____________________________________________________________________________ Permutation::IdTableGenerator IndexScan::getLazyScan( - std::vector&& blocks) const { + std::vector blocks) const { // If there is a LIMIT or OFFSET clause that constrains the scan // (which can happen with an explicit subquery), we cannot use the prefiltered // blocks, as we currently have no mechanism to include limits and offsets @@ -355,8 +396,7 @@ IndexScan::lazyScanForJoinOfTwoScans(const IndexScan& s1, const IndexScan& s2) { auto [blocks1, blocks2] = CompressedRelationReader::getBlocksForJoin( metaBlocks1.value(), metaBlocks2.value()); - std::array result{s1.getLazyScan(std::move(blocks1)), - s2.getLazyScan(std::move(blocks2))}; + std::array result{s1.getLazyScan(blocks1), s2.getLazyScan(blocks2)}; result[0].details().numBlocksAll_ = metaBlocks1.value().blockMetadata_.size(); result[1].details().numBlocksAll_ = metaBlocks2.value().blockMetadata_.size(); return result; @@ -377,7 +417,7 @@ Permutation::IdTableGenerator IndexScan::lazyScanForJoinOfColumnWithScan( auto blocks = CompressedRelationReader::getBlocksForJoin(joinColumn, metaBlocks1.value()); - auto result = getLazyScan(std::move(blocks)); + auto result = getLazyScan(blocks); result.details().numBlocksAll_ = metaBlocks1.value().blockMetadata_.size(); return result; } diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index c5efb552f1..ee89dd5aa9 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -12,12 +12,10 @@ class SparqlTripleSimple; class IndexScan final : public Operation { using Graphs = ScanSpecificationAsTripleComponent::Graphs; - // Pair containing a `PrefilterExpression` with `ColumnIndex` (eval. index) - using PrefilterIndexPair = - std::pair, - ColumnIndex>; - // Vector with `PrefilterIndexPair` values. - using PrefilterIndexPairs = std::vector; + // Optional pair containing a `PrefilterExpression` with `ColumnIndex` (eval. + // index) + using PrefilterIndexPair = std::optional, ColumnIndex>>; private: Permutation::Enum permutation_; @@ -25,7 +23,7 @@ class IndexScan final : public Operation { TripleComponent predicate_; TripleComponent object_; Graphs graphsToFilter_; - PrefilterIndexPairs prefilters_; + PrefilterIndexPair prefilter_; size_t numVariables_; size_t sizeEstimate_; bool sizeEstimateIsExact_; @@ -40,11 +38,18 @@ class IndexScan final : public Operation { public: IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation, const SparqlTriple& triple, Graphs graphsToFilter = std::nullopt, - PrefilterIndexPairs prefilters = {}); + PrefilterIndexPair prefilter = std::nullopt); IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation, const SparqlTripleSimple& triple, Graphs graphsToFilter = std::nullopt, - PrefilterIndexPairs prefilters = {}); + PrefilterIndexPair prefilter = std::nullopt); + // Constructor to simplify copy creation of an `IndexScan`. + IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation, + const TripleComponent& s, const TripleComponent& p, + const TripleComponent& o, + const std::vector& additionalColumns, + const std::vector& additionalVariables, + Graphs graphsToFilter, PrefilterIndexPair prefilter); ~IndexScan() override = default; @@ -66,9 +71,11 @@ class IndexScan final : public Operation { vector resultSortedOn() const override; - // Set `PrefilterExpression`s. - void setPrefilterExpression(const std::vector& - prefilterVariablePairs) override; + // Set `PrefilterExpression`s and return updated `QueryExecutionTree` pointer + // if necessary. + std::optional> + setPrefilterExprGetUpdatedQetPtr(const std::vector& + prefilterVariablePairs) override; size_t numVariables() const { return numVariables_; } @@ -151,19 +158,22 @@ class IndexScan final : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; + std::shared_ptr makeCopyWithAddedPrefilters( + PrefilterIndexPair prefilter); + // Filter relevant `CompressedBlockMetadata` blocks by applying the // `PrefilterExpression`s from `prefilters_`. std::vector applyFilterBlockMetadata( - std::vector&& blocks) const; + const std::vector blocks) const; // Return the (lazy) `IdTable` for this `IndexScan` in chunks. Result::Generator chunkedIndexScan() const; // Get the `IdTable` for this `IndexScan` in one piece. - IdTable completeIndexScan() const; + IdTable materializedIndexScan() const; // Helper functions for the public `getLazyScanFor...` methods and // `chunkedIndexScan` (see above). Permutation::IdTableGenerator getLazyScan( - std::vector&& blocks) const; + std::vector blocks) const; std::optional getMetadataForScan() const; }; diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 21a00cb458..27bb450c6a 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -12,36 +12,30 @@ using namespace std::chrono_literals; //______________________________________________________________________________ template -void Operation::forAllDescendantsImpl(F&& f) { +void Operation::forAllDescendantsImpl(F f) { static_assert( std::is_same_v>); for (auto ptr : getChildren()) { if (ptr) { - std::forward(f)(ptr); - ptr->forAllDescendants(std::forward(f)); + f(ptr); + ptr->forAllDescendants(f); } } } //______________________________________________________________________________ template -void Operation::forAllDescendantsImpl(F&& f) const { +void Operation::forAllDescendantsImpl(F f) const { static_assert( std::is_same_v>); for (auto ptr : getChildren()) { if (ptr) { - std::forward(f)(ptr); - ptr->forAllDescendants(std::forward(f)); + f(ptr); + ptr->forAllDescendants(f); } } } -// _____________________________________________________________________________ -void Operation::forAllDescendants( - std::function&& func) { - forAllDescendantsImpl(std::move(func)); -} - // _____________________________________________________________________________ vector Operation::collectWarnings() const { vector res = getWarnings(); diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 8e642ed442..bda63db8b4 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -78,21 +78,22 @@ class Operation { } // Set `PrefilterExpression`s (for `IndexScan`). - virtual void setPrefilterExpression( - const std::vector&){ - // The prefiltering procedure is implemented by applying the - // PrefilterExpressions directly on the CompressedBlockMetadata. - // This is currently done while performing the result computation for - // IndexScan. - // (1) Overwrite this method for the derived IndexScan class. - // (2) The default method for all other derived classes is implemented - // here, no PrefilterExpressions need to be set. + // The prefiltering procedure is implemented by applying the + // PrefilterExpressions directly on the CompressedBlockMetadata. + // This is currently done while performing the result computation for + // IndexScan. + // (1) Overwrite this method for the derived `IndexScan` class. Given + // the resulting changes w.r.t. `IndexScan`, we also have to return an updated + // `QueryExecutionTree` pointer. + // (2) The default method for all other derived classes is implemented here, + // no PrefilterExpressions need to be set. Given that no changes occur for an + // `Operation` object here, return std::nullopt to indicate that nothing has + // changed. + virtual std::optional> + setPrefilterExprGetUpdatedQetPtr(const std::vector&) { + return std::nullopt; }; - // Apply the provided void function for all descendants. The given function - // must be invocable with a `const QueryExecutionTree*` object. - void forAllDescendants(std::function&& func); - // Get a unique, not ambiguous string representation for a subtree. // This should act like an ID for each subtree. // Calls `getCacheKeyImpl` and adds the information about the `LIMIT` clause. @@ -374,11 +375,11 @@ class Operation { // Recursively call a function on all children. template - void forAllDescendantsImpl(F&& f); + void forAllDescendantsImpl(F f); // Recursively call a function on all children. template - void forAllDescendantsImpl(F&& f) const; + void forAllDescendantsImpl(F f) const; // Holds a precomputed Result of this operation if it is the sibling of a // Service operation. diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index 6f499b5ef8..2f42cf51b2 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -99,7 +99,7 @@ size_t QueryExecutionTree::getSizeEstimate() { void QueryExecutionTree::setPrefilterExpression( const std::vector& prefilterVec) const { AD_CONTRACT_CHECK(rootOperation_); - rootOperation_->setPrefilterExpression(prefilterVec); + rootOperation_->setPrefilterExprGetUpdatedQetPtr(prefilterVec); } // _____________________________________________________________________________ diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h index c08864e3aa..d2cc452b1d 100644 --- a/src/engine/QueryExecutionTree.h +++ b/src/engine/QueryExecutionTree.h @@ -111,26 +111,26 @@ class QueryExecutionTree { } template - void forAllDescendants(F&& f) { + void forAllDescendants(F f) { static_assert( std::is_same_v>); for (auto ptr : rootOperation_->getChildren()) { if (ptr) { - std::forward(f)(ptr); - ptr->forAllDescendants(std::forward(f)); + f(ptr); + ptr->forAllDescendants(f); } } } template - void forAllDescendants(F&& f) const { + void forAllDescendants(F f) const { static_assert( std::is_same_v>); for (auto ptr : rootOperation_->getChildren()) { if (ptr) { - std::forward(f)(ptr); - ptr->forAllDescendants(std::forward(f)); + f(ptr); + ptr->forAllDescendants(f); } } } diff --git a/src/engine/sparqlExpressions/PrefilterExpressionIndex.cpp b/src/engine/sparqlExpressions/PrefilterExpressionIndex.cpp index b7a7c2fd0e..5e5b73cbd1 100644 --- a/src/engine/sparqlExpressions/PrefilterExpressionIndex.cpp +++ b/src/engine/sparqlExpressions/PrefilterExpressionIndex.cpp @@ -417,6 +417,23 @@ template class LogicalExpression; template class LogicalExpression; namespace detail { +//______________________________________________________________________________ +std::vector evaluatePrefilterExpressionOnMetadata( + const std::unique_ptr prefilterExpr, + const std::vector& blocks, const size_t evaluationColumn) { + if (blocks.size() <= 2) { + return blocks; + } + BlockMetadata firstBlock = blocks.front(); + BlockMetadata lastBlock = blocks.back(); + std::vector blocksAdjusted(blocks.begin() + 1, + blocks.end() - 1); + blocksAdjusted = prefilterExpr->evaluate(blocksAdjusted, evaluationColumn); + blocksAdjusted.insert(blocksAdjusted.begin(), firstBlock); + blocksAdjusted.push_back(lastBlock); + return blocksAdjusted; +} + //______________________________________________________________________________ void checkPropertiesForPrefilterConstruction( const std::vector& vec) { diff --git a/src/engine/sparqlExpressions/PrefilterExpressionIndex.h b/src/engine/sparqlExpressions/PrefilterExpressionIndex.h index a5ca2c8d8d..4100d0fe74 100644 --- a/src/engine/sparqlExpressions/PrefilterExpressionIndex.h +++ b/src/engine/sparqlExpressions/PrefilterExpressionIndex.h @@ -206,6 +206,14 @@ using OrExpression = prefilterExpressions::LogicalExpression< prefilterExpressions::LogicalOperator::OR>; namespace detail { +//______________________________________________________________________________ +// Helper function to perform the evaluation for the provided +// `PrefilterExpression` on the given `BlockMetadata` values, while taking into +// account possible incomplete blocks. +std::vector evaluatePrefilterExpressionOnMetadata( + const std::unique_ptr prefilterExpr, + const std::vector& blocks, const size_t evaluationColumn); + //______________________________________________________________________________ // Pair containing a `PrefilterExpression` and its corresponding `Variable`. using PrefilterExprVariablePair = diff --git a/test/PrefilterExpressionIndexTest.cpp b/test/PrefilterExpressionIndexTest.cpp index a941afab6a..82d3386fb6 100644 --- a/test/PrefilterExpressionIndexTest.cpp +++ b/test/PrefilterExpressionIndexTest.cpp @@ -84,8 +84,10 @@ class PrefilterExpressionOnMetadataTest : public ::testing::Test { const BlockMetadata b24 = makeBlock(DateId(DateParser, "2024-10-08"), BlankNodeId(10)); - // All blocks that contain mixed (ValueId) types over column 0 - const std::vector mixedBlocks = {b2, b4, b11, b18, b22, b24}; + // All blocks that contain mixed (ValueId) types over column 0, + // or possibly incomplete ones. + const std::vector mixedAndPossiblyIncompleteBlocks = { + b1, b2, b4, b11, b18, b22, b24}; // Ordered and unique vector with BlockMetadata const std::vector blocks = { @@ -142,14 +144,18 @@ class PrefilterExpressionOnMetadataTest : public ::testing::Test { auto makeTest(std::unique_ptr expr, std::vector&& expected) { std::vector expectedAdjusted; - // This is for convenience, we automatically insert all mixed blocks - // which must be always returned. + // This is for convenience, we automatically insert all mixed and possibly + // incomplete blocks which must be always returned. std::ranges::set_union( - expected, mixedBlocks, std::back_inserter(expectedAdjusted), + expected, mixedAndPossiblyIncompleteBlocks, + std::back_inserter(expectedAdjusted), [](const BlockMetadata& b1, const BlockMetadata& b2) { return b1.blockIndex_ < b2.blockIndex_; }); - ASSERT_EQ(expr->evaluate(blocks, 0), expectedAdjusted); + ASSERT_EQ( + prefilterExpressions::detail::evaluatePrefilterExpressionOnMetadata( + std::move(expr), blocks, 0), + expectedAdjusted); } }; @@ -525,6 +531,9 @@ TEST_F(PrefilterExpressionOnMetadataTest, testWithOneBlockMetadataValue) { EXPECT_EQ(expr->evaluate(input, 0), input); EXPECT_EQ(expr->evaluate(input, 1), std::vector{}); EXPECT_EQ(expr->evaluate(input, 2), std::vector{}); + EXPECT_EQ(prefilterExpressions::detail::evaluatePrefilterExpressionOnMetadata( + std::move(expr), input, 0), + input); } //______________________________________________________________________________