diff --git a/src/engine/TextIndexScanForEntity.h b/src/engine/TextIndexScanForEntity.h index c6c0ac3bc0..e40db8f28f 100644 --- a/src/engine/TextIndexScanForEntity.h +++ b/src/engine/TextIndexScanForEntity.h @@ -7,7 +7,6 @@ #include #include "engine/Operation.h" -#include "parser/MagicServiceQuery.h" #include "parser/TextSearchQuery.h" // This operation retrieves all text records and their corresponding @@ -60,8 +59,6 @@ class TextIndexScanForEntity : public Operation { vector resultSortedOn() const override; - void setVariableToColumnMap(); - VariableToColumnMap computeVariableToColumnMap() const override; const TextIndexScanForEntityConfiguration& getConfig() const { @@ -79,4 +76,6 @@ class TextIndexScanForEntity : public Operation { ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; vector getChildren() override { return {}; } + + void setVariableToColumnMap(); }; diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index 1672808c24..3f1d3af8dd 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -71,6 +71,8 @@ void TextIndexScanForWord::setVariableToColumnMap() { "Text index scan for word shouldn't have a variable to bind match " "defined when the word is not a prefix."); } + AD_CORRECTNESS_CHECK(config_.isPrefix_ || + !config_.varToBindMatch_.has_value()); if (config_.varToBindScore_.has_value()) { config_.variableColumns_.value()[config_.varToBindScore_.value()] = makeAlwaysDefinedColumn(index); diff --git a/src/engine/TextIndexScanForWord.h b/src/engine/TextIndexScanForWord.h index 2d9b54bafb..9826200fa3 100644 --- a/src/engine/TextIndexScanForWord.h +++ b/src/engine/TextIndexScanForWord.h @@ -7,7 +7,6 @@ #include #include "engine/Operation.h" -#include "parser/MagicServiceQuery.h" #include "parser/TextSearchQuery.h" // This operation retrieves all text records from the fulltext index that @@ -45,8 +44,6 @@ class TextIndexScanForWord : public Operation { vector resultSortedOn() const override; - void setVariableToColumnMap(); - VariableToColumnMap computeVariableToColumnMap() const override; const TextIndexScanForWordConfiguration& getConfig() const { return config_; } @@ -59,4 +56,6 @@ class TextIndexScanForWord : public Operation { ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; vector getChildren() override { return {}; } + + void setVariableToColumnMap(); }; diff --git a/src/parser/TextSearchQuery.cpp b/src/parser/TextSearchQuery.cpp index c97a1b6dba..dfd79fdfdc 100644 --- a/src/parser/TextSearchQuery.cpp +++ b/src/parser/TextSearchQuery.cpp @@ -9,6 +9,24 @@ #include "parser/MagicServiceIriConstants.h" #include "util/http/HttpParser/AcceptHeaderQleverVisitor.h" +std::variant VarOrFixedEntity::makeEntityVariant( + const QueryExecutionContext* qec, + std::variant entity) { + if (std::holds_alternative(entity)) { + VocabIndex index; + std::string fixedEntity = std::move(std::get(entity)); + bool success = qec->getIndex().getVocab().getId(fixedEntity, &index); + if (!success) { + throw std::runtime_error( + "The entity " + fixedEntity + + " is not part of the underlying knowledge graph and can " + "therefore not be used as the object of ql:contains-entity"); + } + return FixedEntity(std::move(fixedEntity), std::move(index)); + } + return std::get(entity); +}; + namespace parsedQuery { // ____________________________________________________________________________ @@ -17,7 +35,7 @@ void TextSearchQuery::throwSubjectVariableException( if (!subject.isVariable()) { throw TextSearchException( absl::StrCat("The predicate <", predString, - "> needs a Variable as subject. The subject given was: ", + "> needs a variable as subject. The subject given was: ", subject.toString())); } } @@ -29,7 +47,7 @@ void TextSearchQuery::throwSubjectAndObjectVariableException( if (!(subject.isVariable() && object.isVariable())) { throw TextSearchException(absl::StrCat( "The predicate <", predString, - "> needs a Variable as subject and one as object. The subject given " + "> needs a variable as subject and one as object. The subject given " "was: ", subject.toString(), ". The object given was: ", object.toString())); } @@ -51,7 +69,7 @@ void TextSearchQuery::throwObjectLiteralException( if (!object.isLiteral()) { throw TextSearchException( absl::StrCat("The predicate <", predString, - "> needs a Literal as object. The object given was: ", + "> needs a literal as object. The object given was: ", object.toString())); } } @@ -62,7 +80,7 @@ void TextSearchQuery::predStringTextSearch(const Variable& subjectVar, if (configVarToConfigs_[objectVar].textVar_.has_value()) { throw TextSearchException(absl::StrCat( "Each text search config should only be linked to a single text " - "Variable. The second text variable given was: ", + "variable. The second text variable given was: ", subjectVar.name(), ". The config variable was: ", objectVar.name())); } configVarToConfigs_[objectVar].textVar_ = subjectVar; @@ -75,7 +93,7 @@ void TextSearchQuery::predStringContainsWord( std::string_view literal = asStringViewUnsafe(objectLiteral.getContent()); if (literal.empty()) { throw TextSearchException( - "The predicate shouldn't have an empty Literal as " + "The predicate shouldn't have an empty literal as " "object."); } configVarToConfigs_[subjectVar].word_ = literal; @@ -95,8 +113,8 @@ void TextSearchQuery::predStringContainsEntity(const Variable& subjectVar, object.getIri().toStringRepresentation(); } else { throw TextSearchException(absl::StrCat( - "The predicate needs a Variable as subject and an " - "IRI, Literal or Variable as object. The object given was: ", + "The predicate needs a variable as subject and an " + "IRI, literal or variable as object. The object given was: ", object.toString())); } } @@ -105,8 +123,10 @@ void TextSearchQuery::predStringContainsEntity(const Variable& subjectVar, void TextSearchQuery::predStringBindMatch(const Variable& subjectVar, const Variable& objectVar) { if (configVarToConfigs_[subjectVar].varToBindMatch_.has_value()) { - throw TextSearchException( - "A text search should only contain one ."); + throw TextSearchException(absl::StrCat( + "Each text search config should only contain at most one . " + "The second match variable given was: ", + objectVar.name(), ". The config variable was: ", subjectVar.name())); } configVarToConfigs_[subjectVar].varToBindMatch_ = objectVar; } @@ -115,8 +135,10 @@ void TextSearchQuery::predStringBindMatch(const Variable& subjectVar, void TextSearchQuery::predStringBindScore(const Variable& subjectVar, const Variable& objectVar) { if (configVarToConfigs_[subjectVar].varToBindScore_.has_value()) { - throw TextSearchException( - "A text search should only contain one ."); + throw TextSearchException(absl::StrCat( + "Each text search config should only contain at most one . " + "The second match variable given was: ", + objectVar.name(), ". The config variable was: ", subjectVar.name())); } configVarToConfigs_[subjectVar].varToBindScore_ = objectVar; } @@ -129,7 +151,6 @@ void TextSearchQuery::addParameter(const SparqlTriple& triple) { const TripleComponent& object = simpleTriple.o_; auto predString = extractParameterName(predicate, TEXT_SEARCH_IRI); - if (predString == "text-search") { throwSubjectAndObjectVariableException("text-search", subject, object); predStringTextSearch(subject.getVariable(), object.getVariable()); @@ -162,15 +183,49 @@ TextSearchQuery::toConfigs(const QueryExecutionContext* qec) const { ad_utility::HashMap> potentialTermsForCvar; for (const auto& [var, conf] : configVarToConfigs_) { if (!conf.isWordSearch_.has_value()) { - throw TextSearchException( + throw TextSearchException(absl::StrCat( "Text search service needs configs with exactly one occurrence of " - "either or " - "."); + "either or . The config variable " + "was: ", + var.name())); } if (!conf.textVar_.has_value()) { - AD_FAIL(); + // Last updated this error message 24.02.2025 + throw TextSearchException(absl::StrCat( + "Text search service needs a text variable that is linked to one or " + "multiple text search config variables with the predicate " + ". \n" + "The config variable can then be used with the predicates: " + ", , , . \n" + ": This predicate needs a literal as object which has " + "one word with optionally a * at the end. This word or prefix is " + "then used to search the text index. \n" + ": This predicate needs a variable, IRI or literal " + "as object. If a variable is given this variable can be used outside " + "of this service. If an IRI or literal is given the entity is fixed. " + "The entity given is then used to search the text index. \n" + "A config should contain exactly one occurrence of either " + " or . \n" + ": This predicate should only be used in a text search " + "config with a word that is a prefix. The object should be a " + "variable. That variable specifies the column name for the column " + "with prefix matches of the word search.\n" + ": The object of this predicate should be a variable. " + "That variable specifies the column name for the column containing " + "the scores of the respective word or entity search. \n", + "The config variable was: ", var.name())); } if (conf.isWordSearch_.value()) { + if (conf.varToBindMatch_.has_value() && + !conf.word_.value().ends_with("*")) { + throw TextSearchException( + absl::StrCat("The text search config shouldn't define a variable " + "for the prefix match column if the word isn't a " + "prefix. The config variable was: ", + var.name(), ". The word was: \"", conf.word_.value(), + "\". The text variable bound to was: ", + conf.textVar_.value().name())); + } potentialTermsForCvar[conf.textVar_.value()].push_back( conf.word_.value()); } @@ -185,8 +240,8 @@ TextSearchQuery::toConfigs(const QueryExecutionContext* qec) const { auto it = potentialTermsForCvar.find(conf.textVar_.value()); if (it == potentialTermsForCvar.end()) { throw TextSearchException(absl::StrCat( - "Entity search has to happen on a text Variable that is also " - "contained in a word search. Text Variable: ", + "Entity search has to happen on a text variable that is also " + "contained in a word search. Text variable: ", conf.textVar_.value().name(), " is not contained in a word search.")); } diff --git a/src/parser/TextSearchQuery.h b/src/parser/TextSearchQuery.h index 2f21be13f3..531f15bab0 100644 --- a/src/parser/TextSearchQuery.h +++ b/src/parser/TextSearchQuery.h @@ -25,29 +25,12 @@ struct VarOrFixedEntity { static std::variant makeEntityVariant( const QueryExecutionContext* qec, - std::variant entity) { - if (std::holds_alternative(entity)) { - VocabIndex index; - std::string fixedEntity = std::move(std::get(entity)); - bool success = qec->getIndex().getVocab().getId(fixedEntity, &index); - if (!success) { - throw std::runtime_error( - "The entity " + fixedEntity + - " is not part of the underlying knowledge graph and can " - "therefore not be used as the object of ql:contains-entity"); - } - return FixedEntity(std::move(fixedEntity), std::move(index)); - } else { - return std::get(entity); - } - }; + std::variant entity); VarOrFixedEntity(const QueryExecutionContext* qec, std::variant entity) : entity_(makeEntityVariant(qec, std::move(entity))) {} - ~VarOrFixedEntity() = default; - bool hasFixedEntity() const { return std::holds_alternative(entity_); } diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 84faa4e1cc..b8749951cc 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -2592,7 +2592,7 @@ TEST(QueryPlanner, TextSearchService) { "}" "}"), ::testing::HasSubstr( - "The predicate needs a Variable as subject and one as " + "The predicate needs a variable as subject and one as " "object. The subject given was: ?t. The object given was: \"fail\"")); AD_EXPECT_THROW_WITH_MESSAGE( @@ -2605,7 +2605,7 @@ TEST(QueryPlanner, TextSearchService) { "}" "}"), ::testing::HasSubstr("Each text search config should only be linked to a " - "single text Variable. The second text variable " + "single text variable. The second text variable " "given was: ?t2. The config variable was: ?conf")); // Predicate contains-word @@ -2618,7 +2618,7 @@ TEST(QueryPlanner, TextSearchService) { "\"fail\" qlts:contains-word \"test\" ." "}" "}"), - ::testing::HasSubstr("The predicate needs a Variable as " + ::testing::HasSubstr("The predicate needs a variable as " "subject. The subject given was: \"fail\"")); AD_EXPECT_THROW_WITH_MESSAGE( @@ -2644,7 +2644,7 @@ TEST(QueryPlanner, TextSearchService) { "?conf qlts:contains-word ?word ." "}" "}"), - ::testing::HasSubstr("The predicate needs a Literal as " + ::testing::HasSubstr("The predicate needs a literal as " "object. The object given was: ?word")); AD_EXPECT_THROW_WITH_MESSAGE( @@ -2657,7 +2657,7 @@ TEST(QueryPlanner, TextSearchService) { "}" "}"), ::testing::HasSubstr("The predicate shouldn't have an " - "empty Literal as object.")); + "empty literal as object.")); // Predicate contains-entity AD_EXPECT_THROW_WITH_MESSAGE( @@ -2669,7 +2669,7 @@ TEST(QueryPlanner, TextSearchService) { "\"fail\" qlts:contains-entity ?e ." "}" "}"), - ::testing::HasSubstr("The predicate needs a Variable " + ::testing::HasSubstr("The predicate needs a variable " "as subject. The subject given was: \"fail\"")); AD_EXPECT_THROW_WITH_MESSAGE( @@ -2696,12 +2696,75 @@ TEST(QueryPlanner, TextSearchService) { "}" "}"), ::testing::HasSubstr( - "The predicate needs a Variable as subject and an " - "IRI, Literal or Variable as object. The object given was: 13")); + "The predicate needs a variable as subject and an " + "IRI, literal or variable as object. The object given was: 13")); - // Predicate bind-match and bind-score not covered. + // Predicate bind-match + AD_EXPECT_THROW_WITH_MESSAGE( + SparqlParser::parseQuery( + "PREFIX qlts: " + "SELECT * WHERE {" + "SERVICE qlts: {" + "?t qlts:text-search ?conf ." + "?conf qlts:contains-word \"test\" ." + "?conf qlts:bind-match \"fail\" ." + "}" + "}"), + ::testing::HasSubstr( + "The predicate needs a variable as subject and one " + "as object. The subject given was: ?conf. The object given was: " + "\"fail\"")); + + AD_EXPECT_THROW_WITH_MESSAGE( + SparqlParser::parseQuery( + "PREFIX qlts: " + "SELECT * WHERE {" + "SERVICE qlts: {" + "?t qlts:text-search ?conf ." + "?conf qlts:contains-word \"test\" ." + "?conf qlts:bind-match ?match1 ." + "?conf qlts:bind-match ?match2 ." + "}" + "}"), + ::testing::HasSubstr( + "Each text search config should only contain at most one " + ". The second match variable given was: ?match2. The " + "config variable was: ?conf")); + + // Predicate bind-score + AD_EXPECT_THROW_WITH_MESSAGE( + SparqlParser::parseQuery( + "PREFIX qlts: " + "SELECT * WHERE {" + "SERVICE qlts: {" + "?t qlts:text-search ?conf ." + "?conf qlts:contains-word \"test\" ." + "?conf qlts:bind-score 100 ." + "}" + "}"), + ::testing::HasSubstr( + "The predicate needs a variable as subject and one " + "as object. The subject given was: ?conf. The object given was: " + "100")); + + AD_EXPECT_THROW_WITH_MESSAGE( + SparqlParser::parseQuery( + "PREFIX qlts: " + "SELECT * WHERE {" + "SERVICE qlts: {" + "?t qlts:text-search ?conf ." + "?conf qlts:contains-word \"test\" ." + "?conf qlts:bind-score ?score1 ." + "?conf qlts:bind-score ?score2 ." + "}" + "}"), + ::testing::HasSubstr( + "Each text search config should only contain at most one " + ". The second match variable given was: ?score2. The " + "config variable was: ?conf")); // toConfigs errors + // No contains-word or contains-entity ParsedQuery pq = SparqlParser::parseQuery( "PREFIX qlts: " "SELECT * WHERE {" @@ -2716,6 +2779,60 @@ TEST(QueryPlanner, TextSearchService) { "Text search service needs configs with exactly one occurrence of " "either or .")); + // No text variable defined + pq = SparqlParser::parseQuery( + "PREFIX qlts: " + "SELECT * WHERE {" + "SERVICE qlts: {" + "?t qlts:contains-word \"test\" ." + "}" + "}"); + qp = makeQueryPlanner(); + AD_EXPECT_THROW_WITH_MESSAGE( + qp.createExecutionTree(pq), + ::testing::HasSubstr( + "Text search service needs a text variable that is linked to one or " + "multiple text search config variables with the predicate " + ". \n" + "The config variable can then be used with the predicates: " + ", , , . \n" + ": This predicate needs a literal as object which has " + "one word with optionally a * at the end. This word or prefix is " + "then used to search the text index. \n" + ": This predicate needs a variable, IRI or literal " + "as object. If a variable is given this variable can be used outside " + "of this service. If an IRI or literal is given the entity is fixed. " + "The entity given is then used to search the text index. \n" + "A config should contain exactly one occurrence of either " + " or . \n" + ": This predicate should only be used in a text search " + "config with a word that is a prefix. The object should be a " + "variable. That variable specifies the column name for the column " + "with prefix matches of the word search.\n" + ": The object of this predicate should be a variable. " + "That variable specifies the column name for the column containing " + "the scores of the respective word or entity search. \n" + "The config variable was: ?t")); + + // in a word search on a non prefix + pq = SparqlParser::parseQuery( + "PREFIX qlts: " + "SELECT * WHERE {" + "SERVICE qlts: {" + "?t qlts:text-search ?conf ." + "?conf qlts:contains-word \"test\" ." + "?conf qlts:bind-match ?test_match ." + "}" + "}"); + qp = makeQueryPlanner(); + AD_EXPECT_THROW_WITH_MESSAGE( + qp.createExecutionTree(pq), + ::testing::HasSubstr( + "The text search config shouldn't define a variable for the prefix " + "match column if the word isn't a prefix. The config variable was: " + "?conf. The word was: \"test\". The text variable bound to was: ?t")); + + // Entity search on a text variable that has no word-search pq = SparqlParser::parseQuery( "PREFIX qlts: " "SELECT * WHERE {" @@ -2727,11 +2844,11 @@ TEST(QueryPlanner, TextSearchService) { qp = makeQueryPlanner(); AD_EXPECT_THROW_WITH_MESSAGE( qp.createExecutionTree(pq), - ::testing::HasSubstr("Entity search has to happen on a text Variable " + ::testing::HasSubstr("Entity search has to happen on a text variable " "that is also contained in a word search. Text " - "Variable: ?t2 is not contained in a word search.")); + "variable: ?t2 is not contained in a word search.")); - // Begin expect tests + // Begin checking query execution trees auto qec = ad_utility::testing::getQec( "

\"this text contains some words and is part of the test\" . " "

.

\"picking the right text can be a hard "