Skip to content

Commit

Permalink
Make the has-predicate scans work again.
Browse files Browse the repository at this point in the history
  • Loading branch information
joka921 committed Jan 15, 2024
1 parent c79d25f commit a6ec4f1
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 69 deletions.
2 changes: 1 addition & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ add_library(engine
IndexScan.cpp Join.cpp Sort.cpp TextOperationWithoutFilter.cpp
TextOperationWithFilter.cpp Distinct.cpp OrderBy.cpp Filter.cpp
Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp
OptionalJoin.cpp CountAvailablePredicates.cpp GroupBy.cpp
OptionalJoin.cpp CountAvailablePredicates.cpp GroupBy.cpp HasPredicateScan.cpp
Union.cpp MultiColumnJoin.cpp TransitivePath.cpp Service.cpp
Values.cpp Bind.cpp Minus.cpp RuntimeInformation.cpp CheckUsePatternTrick.cpp
VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp
Expand Down
20 changes: 19 additions & 1 deletion src/engine/CountAvailablePredicates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "engine/CountAvailablePredicates.h"

#include "engine/CallFixedSize.h"
#include "engine/IndexScan.h"
#include "index/IndexImpl.h"

// _____________________________________________________________________________
Expand Down Expand Up @@ -117,7 +118,24 @@ ResultTable CountAvailablePredicates::computeResult() {
const CompactVectorOfStrings<Id>& patterns =
_executionContext->getIndex().getPatterns();

if (_subtree == nullptr) {
AD_CORRECTNESS_CHECK(_subtree);
bool isFullScan = [&]() {
auto indexScan =
dynamic_cast<const IndexScan*>(_subtree->getRootOperation().get());
if (!indexScan) {
return false;
}
if (!indexScan->getSubject().isVariable() ||
!indexScan->getObject().isVariable()) {
return false;
}

return indexScan->getPredicate() == HAS_PATTERN_PREDICATE;
}();

if (isFullScan) {
_subtree->getRootOperation()->updateRuntimeInformationWhenOptimizedOut(
RuntimeInformation::Status::lazilyMaterialized);
// Compute the predicates for all entities
CountAvailablePredicates::computePatternTrickAllEntities(&idTable,
patterns);
Expand Down
99 changes: 43 additions & 56 deletions src/engine/HasPredicateScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
// Chair of Algorithms and Data Structures.
// Author: Florian Kramer ([email protected])

#include "HasPredicateScan.h"
#include "engine/HasPredicateScan.h"

#include "CallFixedSize.h"
#include "engine/CallFixedSize.h"
#include "index/IndexImpl.h"

HasPredicateScan::HasPredicateScan(QueryExecutionContext* qec,
std::shared_ptr<QueryExecutionTree> subtree,
Expand Down Expand Up @@ -213,85 +214,82 @@ ResultTable HasPredicateScan::computeResult() {
IdTable idTable{getExecutionContext()->getAllocator()};
idTable.setNumColumns(getResultWidth());

const std::vector<PatternID>& hasPattern = getIndex().getHasPattern();
const CompactVectorOfStrings<Id>& hasPredicate = getIndex().getHasPredicate();
const CompactVectorOfStrings<Id>& patterns = getIndex().getPatterns();
auto hasPattern =
getExecutionContext()
->getIndex()
.getImpl()
.getPermutation(Permutation::Enum::PSO)
.lazyScan(qlever::specialIds.at(HAS_PATTERN_PREDICATE), std::nullopt,
std::nullopt, {}, cancellationHandle_);

switch (_type) {
case ScanType::FREE_S: {
Id objectId;
if (!getIndex().getId(_object, &objectId)) {
AD_THROW("The predicate '" + _object + "' is not in the vocabulary.");
}
HasPredicateScan::computeFreeS(&idTable, objectId, hasPattern,
hasPredicate, patterns);
HasPredicateScan::computeFreeS(&idTable, objectId, hasPattern, patterns);
return {std::move(idTable), resultSortedOn(), LocalVocab{}};
};
case ScanType::FREE_O: {
Id subjectId;
if (!getIndex().getId(_subject, &subjectId)) {
AD_THROW("The subject " + _subject + " is not in the vocabulary.");
}
HasPredicateScan::computeFreeO(&idTable, subjectId, hasPattern,
hasPredicate, patterns);
HasPredicateScan::computeFreeO(&idTable, subjectId, hasPattern, patterns);
return {std::move(idTable), resultSortedOn(), LocalVocab{}};
};
case ScanType::FULL_SCAN:
HasPredicateScan::computeFullScan(
&idTable, hasPattern, hasPredicate, patterns,
&idTable, hasPattern, patterns,
getIndex().getNumDistinctSubjectPredicatePairs());
return {std::move(idTable), resultSortedOn(), LocalVocab{}};
case ScanType::SUBQUERY_S:

std::shared_ptr<const ResultTable> subresult = _subtree->getResult();
int inWidth = subresult->idTable().numColumns();
int outWidth = idTable.numColumns();
HasPredicateScan::computeSubqueryS<0, 0>(&idTable, subresult->idTable(),
_subtreeJoinColumn, hasPattern,
patterns);
/*
CALL_FIXED_SIZE((std::array{inWidth, outWidth}),
HasPredicateScan::computeSubqueryS, &idTable,
subresult->idTable(), _subtreeJoinColumn, hasPattern,
hasPredicate, patterns);
patterns);
*/
return {std::move(idTable), resultSortedOn(),
subresult->getSharedLocalVocab()};
}
AD_FAIL();
}

void HasPredicateScan::computeFreeS(
IdTable* resultTable, Id objectId, const std::vector<PatternID>& hasPattern,
const CompactVectorOfStrings<Id>& hasPredicate,
IdTable* resultTable, Id objectId, auto&& hasPattern,
const CompactVectorOfStrings<Id>& patterns) {
IdTableStatic<1> result = std::move(*resultTable).toStatic<1>();
uint64_t entityIndex = 0;
while (entityIndex < hasPattern.size() || entityIndex < hasPredicate.size()) {
if (entityIndex < hasPattern.size() &&
hasPattern[entityIndex] != NO_PATTERN) {
// add the pattern
const auto& pattern = patterns[hasPattern[entityIndex]];
for (const auto& block : hasPattern) {
auto patternColumn = block.getColumn(1);
auto subjects = block.getColumn(0);
for (size_t i : ad_utility::integerRange(block.numRows())) {
const auto& pattern = patterns[patternColumn[i].getInt()];
for (const auto& predicate : pattern) {
if (predicate == objectId) {
result.push_back(
{Id::makeFromVocabIndex(VocabIndex::make(entityIndex))});
}
}
} else if (entityIndex < hasPredicate.size()) {
// add the relations
for (const auto& predicate : hasPredicate[entityIndex]) {
if (predicate == objectId) {
result.push_back(
{Id::makeFromVocabIndex(VocabIndex::make(entityIndex))});
result.push_back({subjects[i]});
}
break;
}
}
entityIndex++;
}
*resultTable = std::move(result).toDynamic();
}

void HasPredicateScan::computeFreeO(
IdTable* resultTable, Id subjectAsId,
const std::vector<PatternID>& hasPattern,
const CompactVectorOfStrings<Id>& hasPredicate,
IdTable* resultTable, Id subjectAsId, auto&& hasPattern,
const CompactVectorOfStrings<Id>& patterns) {
AD_FAIL();
/*
// Subjects always have to be from the vocabulary
if (subjectAsId.getDatatype() != Datatype::VocabIndex) {
return;
Expand All @@ -313,45 +311,33 @@ void HasPredicateScan::computeFreeO(
}
}
*resultTable = std::move(result).toDynamic();
*/
}

void HasPredicateScan::computeFullScan(
IdTable* resultTable, const std::vector<PatternID>& hasPattern,
const CompactVectorOfStrings<Id>& hasPredicate,
IdTable* resultTable, auto&& hasPattern,
const CompactVectorOfStrings<Id>& patterns, size_t resultSize) {
IdTableStatic<2> result = std::move(*resultTable).toStatic<2>();
result.reserve(resultSize);

uint64_t subjectIndex = 0;
while (subjectIndex < hasPattern.size() ||
subjectIndex < hasPredicate.size()) {
if (subjectIndex < hasPattern.size() &&
hasPattern[subjectIndex] != NO_PATTERN) {
// add the pattern
for (const auto& predicate : patterns[hasPattern[subjectIndex]]) {
result.push_back(
{Id::makeFromVocabIndex(VocabIndex::make(subjectIndex)),
predicate});
}
} else if (subjectIndex < hasPredicate.size()) {
// add the relations
for (const auto& predicate : hasPredicate[subjectIndex]) {
result.push_back(
{Id::makeFromVocabIndex(VocabIndex::make(subjectIndex)),
predicate});
for (const auto& block : hasPattern) {
auto patternColumn = block.getColumn(1);
auto subjects = block.getColumn(0);
for (size_t i : ad_utility::integerRange(block.numRows())) {
const auto& pattern = patterns[patternColumn[i].getInt()];
for (const auto& predicate : pattern) {
result.push_back({subjects[i], predicate});
}
}
subjectIndex++;
}
*resultTable = std::move(result).toDynamic();
}

template <int IN_WIDTH, int OUT_WIDTH>
void HasPredicateScan::computeSubqueryS(
IdTable* dynResult, const IdTable& dynInput, const size_t subtreeColIndex,
const std::vector<PatternID>& hasPattern,
const CompactVectorOfStrings<Id>& hasPredicate,
const CompactVectorOfStrings<Id>& patterns) {
auto&& hasPattern, const CompactVectorOfStrings<Id>& patterns) {
AD_FAIL();
/*
IdTableStatic<OUT_WIDTH> result = std::move(*dynResult).toStatic<OUT_WIDTH>();
const IdTableView<IN_WIDTH> input = dynInput.asStaticView<IN_WIDTH>();
Expand Down Expand Up @@ -389,6 +375,7 @@ void HasPredicateScan::computeSubqueryS(
}
}
*dynResult = std::move(result).toDynamic();
*/
}

void HasPredicateScan::setSubject(const TripleComponent& subject) {
Expand Down
15 changes: 4 additions & 11 deletions src/engine/HasPredicateScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,27 +84,20 @@ class HasPredicateScan : public Operation {
}

// These are made static and public mainly for easier testing
static void computeFreeS(IdTable* resultTable, Id objectId,
const std::vector<PatternID>& hasPattern,
const CompactVectorOfStrings<Id>& hasPredicate,
static void computeFreeS(IdTable* resultTable, Id objectId, auto&& hasPattern,
const CompactVectorOfStrings<Id>& patterns);

static void computeFreeO(IdTable* resultTable, Id subjectAsId,
const std::vector<PatternID>& hasPattern,
const CompactVectorOfStrings<Id>& hasPredicate,
auto&& hasPattern,
const CompactVectorOfStrings<Id>& patterns);

static void computeFullScan(IdTable* resultTable,
const std::vector<PatternID>& hasPattern,
const CompactVectorOfStrings<Id>& hasPredicate,
static void computeFullScan(IdTable* resultTable, auto&& hasPattern,
const CompactVectorOfStrings<Id>& patterns,
size_t resultSize);

template <int IN_WIDTH, int OUT_WIDTH>
static void computeSubqueryS(IdTable* result, const IdTable& _subtree,
size_t subtreeColIndex,
const std::vector<PatternID>& hasPattern,
const CompactVectorOfStrings<Id>& hasPredicate,
size_t subtreeColIndex, auto&& hasPattern,
const CompactVectorOfStrings<Id>& patterns);

private:
Expand Down
5 changes: 5 additions & 0 deletions src/engine/QueryExecutionTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "engine/ExportQueryExecutionTrees.h"
#include "engine/Filter.h"
#include "engine/GroupBy.h"
#include "engine/HasPredicateScan.h"
#include "engine/IndexScan.h"
#include "engine/Join.h"
#include "engine/Minus.h"
Expand Down Expand Up @@ -163,6 +164,8 @@ void QueryExecutionTree::setOperation(std::shared_ptr<Op> operation) {
type_ = ORDER_BY;
} else if constexpr (std::is_same_v<Op, GroupBy>) {
type_ = GROUP_BY;
} else if constexpr (std::is_same_v<Op, HasPredicateScan>) {
type_ = HAS_PREDICATE_SCAN;
} else if constexpr (std::is_same_v<Op, Filter>) {
type_ = FILTER;
} else if constexpr (std::is_same_v<Op, NeutralElementOperation>) {
Expand Down Expand Up @@ -204,6 +207,8 @@ template void QueryExecutionTree::setOperation(std::shared_ptr<Service>);
template void QueryExecutionTree::setOperation(std::shared_ptr<TransitivePath>);
template void QueryExecutionTree::setOperation(std::shared_ptr<OrderBy>);
template void QueryExecutionTree::setOperation(std::shared_ptr<GroupBy>);
template void QueryExecutionTree::setOperation(
std::shared_ptr<HasPredicateScan>);
template void QueryExecutionTree::setOperation(std::shared_ptr<Filter>);
template void QueryExecutionTree::setOperation(
std::shared_ptr<NeutralElementOperation>);
Expand Down
1 change: 1 addition & 0 deletions src/engine/QueryExecutionTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class QueryExecutionTree {
OPTIONAL_JOIN,
COUNT_AVAILABLE_PREDICATES,
GROUP_BY,
HAS_PREDICATE_SCAN,
UNION,
MULTICOLUMN_JOIN,
TRANSITIVE_PATH,
Expand Down
49 changes: 49 additions & 0 deletions src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <engine/Distinct.h>
#include <engine/Filter.h>
#include <engine/GroupBy.h>
#include <engine/HasPredicateScan.h>
#include <engine/IndexScan.h>
#include <engine/Join.h>
#include <engine/Minus.h>
Expand Down Expand Up @@ -735,6 +736,11 @@ vector<QueryPlanner::SubtreePlan> QueryPlanner::seedWithScansAndText(
"necessary also rebuild the index.");
}

if (node._triple._p._iri == HAS_PREDICATE_PREDICATE) {
pushPlan(makeSubtreePlan<HasPredicateScan>(_qec, node._triple));
continue;
}

if (node._variables.size() == 1) {
// There is exactly one variable in the triple (may occur twice).
if (isVariable(node._triple._s) && isVariable(node._triple._o) &&
Expand Down Expand Up @@ -1792,6 +1798,14 @@ std::vector<QueryPlanner::SubtreePlan> QueryPlanner::createJoinCandidates(
candidates.push_back(std::move(opt.value()));
}

// Check if one of the two operations is a HAS_PREDICATE_SCAN.
// If the join column corresponds to the has-predicate scan's
// subject column we can use a specialized join that avoids
// loading the full has-predicate predicate.
if (auto opt = createJoinWithHasPredicateScan(a, b, jcs)) {
candidates.push_back(std::move(opt.value()));
}

// Test if one of `a` or `b` is a transitive path to which we can bind the
// other one.
if (auto opt = createJoinWithTransitivePath(a, b, jcs)) {
Expand Down Expand Up @@ -1855,6 +1869,41 @@ auto QueryPlanner::createJoinWithTransitivePath(
return plan;
}

// ______________________________________________________________________________________
auto QueryPlanner::createJoinWithHasPredicateScan(
SubtreePlan a, SubtreePlan b,
const std::vector<std::array<ColumnIndex, 2>>& jcs)
-> std::optional<SubtreePlan> {
// Check if one of the two operations is a HAS_PREDICATE_SCAN.
// If the join column corresponds to the has-predicate scan's
// subject column we can use a specialized join that avoids
// loading the full has-predicate predicate.
using enum QueryExecutionTree::OperationType;
auto isSuitablePredicateScan = [](const auto& tree, size_t joinColumn) {
return tree._qet->getType() == HAS_PREDICATE_SCAN && joinColumn == 0 &&
static_cast<HasPredicateScan*>(tree._qet->getRootOperation().get())
->getType() == HasPredicateScan::ScanType::FULL_SCAN;
};

const bool aIsSuitablePredicateScan = isSuitablePredicateScan(a, jcs[0][0]);
const bool bIsSuitablePredicateScan = isSuitablePredicateScan(b, jcs[0][1]);
if (!(aIsSuitablePredicateScan || bIsSuitablePredicateScan)) {
return std::nullopt;
}
auto hasPredicateScanTree = aIsSuitablePredicateScan ? a._qet : b._qet;
auto otherTree = aIsSuitablePredicateScan ? b._qet : a._qet;
size_t otherTreeJoinColumn = aIsSuitablePredicateScan ? jcs[0][1] : jcs[0][0];
auto qec = otherTree->getRootOperation()->getExecutionContext();
// Note that this is a new operation.
auto object = static_cast<HasPredicateScan*>(
hasPredicateScanTree->getRootOperation().get())
->getObject();
auto plan = makeSubtreePlan<HasPredicateScan>(
qec, std::move(otherTree), otherTreeJoinColumn, std::move(object));
mergeSubtreePlanIds(plan, a, b);
return plan;
}

// ______________________________________________________________________________________
auto QueryPlanner::createJoinAsTextFilter(
SubtreePlan a, SubtreePlan b,
Expand Down
10 changes: 10 additions & 0 deletions src/engine/QueryPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,16 @@ class QueryPlanner {
SubtreePlan a, SubtreePlan b,
const std::vector<std::array<ColumnIndex, 2>>& jcs);

// Used internally by `createJoinCandidates`. If `a` or `b` is a
// `HasPredicateScan` with a variable as a subject (`?x ql:has-predicate
// <VariableOrIri>`) and `a` and `b` can be joined on that subject variable,
// then returns a `HasPredicateScan` that takes the other input as a subtree.
// Else returns `std::nullopt`.
[[nodiscard]] static std::optional<SubtreePlan>
createJoinWithHasPredicateScan(
SubtreePlan a, SubtreePlan b,
const std::vector<std::array<ColumnIndex, 2>>& jcs);

// Used internally by `createJoinCandidates`. If `a` or `b` is a
// `TextOperationWithoutFilter` create a `TextOperationWithFilter` that takes
// the result of the other input as the filter input. Else return
Expand Down

0 comments on commit a6ec4f1

Please sign in to comment.