Skip to content

Commit

Permalink
Own classes for both Literal and Iri (#1301)
Browse files Browse the repository at this point in the history
So far, literals and IRIs were stored as a `std::string` (in a normalized way, that is, without escaping) and the distinction between literals and IRIs was made via the first character (`"` or `<`). The code to deal with datatypes (the stuff after `^^`) and language tags (e.g., `@en`) was also ad-hoc using low-level string operations. Now there are proper classes `Literal` and `Iri`, which internally still store their data as normalized strings starting with `"` or `<` (just like before), but they are now two different types and each with a proper interface. This continues work started with #1186.

The new classes are used in the `TripleComponent` class, which is used by the Turtle and SPARQL parsers. As a consequence, a lot of code is affected by this change. The new classes are not yet used by `LocalVocab` and `ExportQueryExecutionTrees`.
  • Loading branch information
joka921 authored Mar 22, 2024
1 parent bb9959a commit 7ad3f58
Show file tree
Hide file tree
Showing 52 changed files with 970 additions and 924 deletions.
3 changes: 0 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -414,9 +414,6 @@ add_executable(ServerMain src/ServerMain.cpp)
qlever_target_link_libraries (ServerMain engine ${CMAKE_THREAD_LIBS_INIT} Boost::program_options)
target_precompile_headers(ServerMain REUSE_FROM engine)

add_executable(TurtleParserMain src/TurtleParserMain.cpp)
qlever_target_link_libraries(TurtleParserMain parser ${CMAKE_THREAD_LIBS_INIT})

add_executable(VocabularyMergerMain src/VocabularyMergerMain.cpp)
qlever_target_link_libraries(VocabularyMergerMain index ${CMAKE_THREAD_LIBS_INIT})

Expand Down
201 changes: 0 additions & 201 deletions src/TurtleParserMain.cpp

This file was deleted.

10 changes: 7 additions & 3 deletions src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,15 +208,19 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id,
std::optional<string> entity =
index.idToOptionalString(id.getVocabIndex());
AD_CONTRACT_CHECK(entity.has_value());
// TODO<joka921> make this more efficient AND more correct
auto litOrIri =
ad_utility::triple_component::LiteralOrIri::fromStringRepresentation(
entity.value());
if constexpr (onlyReturnLiterals) {
if (!entity.value().starts_with('"')) {
if (!litOrIri.isLiteral()) {
return std::nullopt;
}
}
if constexpr (removeQuotesAndAngleBrackets) {
entity = RdfEscaping::normalizedContentFromLiteralOrIri(
std::move(entity.value()));
entity = asStringViewUnsafe(litOrIri.getContent());
}
// TODO<joka921> handle the exporting of literals more correctly.
return std::pair{escapeFunction(std::move(entity.value())), nullptr};
}
case LocalVocabIndex: {
Expand Down
28 changes: 17 additions & 11 deletions src/engine/sparqlExpressions/LiteralExpression.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include "engine/sparqlExpressions/SparqlExpression.h"
#include "util/TypeTraits.h"

namespace sparqlExpression {
namespace detail {
Expand Down Expand Up @@ -38,24 +39,27 @@ class LiteralExpression : public SparqlExpression {

// Evaluating just returns the constant/literal value.
ExpressionResult evaluate(EvaluationContext* context) const override {
// Common code for the `Literal` and `std::string` case.
auto getIdOrString = [this,
&context](const std::string& s) -> ExpressionResult {
// Common code for the `Literal` and `Iri` case.
auto getIdOrString =
[this,
&context](const ad_utility::SameAsAny<TripleComponent::Literal,
TripleComponent::Iri> auto& s)
-> ExpressionResult {
if (auto ptr = cachedResult_.load(std::memory_order_relaxed)) {
return *ptr;
}
Id id;
bool idWasFound = context->_qec.getIndex().getId(s, &id);
IdOrString result = idWasFound ? IdOrString{id} : IdOrString{s};
auto id = context->_qec.getIndex().getId(s);
IdOrString result =
id.has_value() ? IdOrString{id.value()}
: IdOrString{std::string{s.toStringRepresentation()}};
auto ptrForCache = std::make_unique<IdOrString>(result);
ptrForCache.reset(std::atomic_exchange_explicit(
&cachedResult_, ptrForCache.release(), std::memory_order_relaxed));
context->cancellationHandle_->throwIfCancelled();
return result;
};
if constexpr (std::is_same_v<TripleComponent::Literal, T>) {
return getIdOrString(_value.rawContent());
} else if constexpr (std::is_same_v<string, T>) {
if constexpr (ad_utility::SameAsAny<T, TripleComponent::Literal,
TripleComponent::Iri>) {
return getIdOrString(_value);
} else if constexpr (std::is_same_v<Variable, T>) {
return evaluateIfVariable(context, _value);
Expand Down Expand Up @@ -99,7 +103,9 @@ class LiteralExpression : public SparqlExpression {
} else if constexpr (std::is_same_v<T, ValueId>) {
return absl::StrCat("#valueId ", _value.getBits(), "#");
} else if constexpr (std::is_same_v<T, TripleComponent::Literal>) {
return absl::StrCat("#literal: ", _value.rawContent());
return absl::StrCat("#literal: ", _value.toStringRepresentation());
} else if constexpr (std::is_same_v<T, TripleComponent::Iri>) {
return absl::StrCat("#iri: ", _value.toStringRepresentation());
} else if constexpr (std::is_same_v<T, VectorWithMemoryLimit<ValueId>>) {
// We should never cache this, as objects of this type of expression are
// used exactly *once* in the HashMap optimization of the GROUP BY
Expand Down Expand Up @@ -175,7 +181,7 @@ class LiteralExpression : public SparqlExpression {

/// The actual instantiations and aliases of LiteralExpressions.
using VariableExpression = detail::LiteralExpression<::Variable>;
using IriExpression = detail::LiteralExpression<string>;
using IriExpression = detail::LiteralExpression<TripleComponent::Iri>;
using StringLiteralExpression =
detail::LiteralExpression<TripleComponent::Literal>;
using IdExpression = detail::LiteralExpression<ValueId>;
Expand Down
27 changes: 8 additions & 19 deletions src/engine/sparqlExpressions/RegexExpression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,6 @@ std::optional<std::string> getPrefixRegex(std::string regex) {
return regex;
}

// Assert that `input` starts and ends with double quotes `"` and remove those
// quotes.
std::string removeQuotes(std::string_view input) {
AD_CORRECTNESS_CHECK(input.size() >= 2 && input.starts_with('"') &&
input.ends_with('"'));
input.remove_prefix(1);
input.remove_suffix(1);
return std::string{input};
}
} // namespace sparqlExpression::detail

namespace sparqlExpression {
Expand All @@ -93,16 +84,15 @@ RegexExpression::RegexExpression(
"REGEX expressions are currently supported only on variables.");
}
std::string regexString;
std::string originalRegexString;
if (auto regexPtr =
dynamic_cast<const StringLiteralExpression*>(regex.get())) {
originalRegexString = regexPtr->value().normalizedLiteralContent().get();
if (!regexPtr->value().datatypeOrLangtag().empty()) {
const auto& regexLiteral = regexPtr->value();
regexString = asStringViewUnsafe(regexLiteral.getContent());
if (regexLiteral.hasDatatype() || regexLiteral.hasLanguageTag()) {
throw std::runtime_error(
"The second argument to the REGEX function (which contains the "
"regular expression) must not contain a language tag or a datatype");
}
regexString = detail::removeQuotes(originalRegexString);
} else {
throw std::runtime_error(
"The second argument to the REGEX function must be a "
Expand All @@ -111,15 +101,14 @@ RegexExpression::RegexExpression(
if (optionalFlags.has_value()) {
if (auto flagsPtr = dynamic_cast<const StringLiteralExpression*>(
optionalFlags.value().get())) {
std::string_view originalFlags =
flagsPtr->value().normalizedLiteralContent().get();
if (!flagsPtr->value().datatypeOrLangtag().empty()) {
const auto& flagsLiteral = flagsPtr->value();
std::string_view flags = asStringViewUnsafe(flagsLiteral.getContent());
if (flagsLiteral.hasDatatype() || flagsLiteral.hasLanguageTag()) {
throw std::runtime_error(
"The third argument to the REGEX function (which contains optional "
"flags to configure the evaluation) must not contain a language "
"tag or a datatype");
}
auto flags = detail::removeQuotes(originalFlags);
auto firstInvalidFlag = flags.find_first_not_of("imsu");
if (firstInvalidFlag != std::string::npos) {
throw std::runtime_error{absl::StrCat(
Expand Down Expand Up @@ -148,8 +137,8 @@ RegexExpression::RegexExpression(
const auto& r = std::get<RE2>(regex_);
if (r.error_code() != RE2::NoError) {
throw std::runtime_error{absl::StrCat(
"The regex ", originalRegexString,
" is not supported by QLever (which uses Google's RE2 library). "
"The regex \"", regexString,
"\" is not supported by QLever (which uses Google's RE2 library). "
"Error from RE2 is: ",
r.error())};
}
Expand Down
2 changes: 1 addition & 1 deletion src/engine/sparqlExpressions/RelationalExpressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ RelationalExpression<Comp>::getLanguageFilterExpression() const {
// TODO<joka921> Is this even allowed by the grammar?
return LangFilterData{
varPtr->variable(),
std::string{langPtr->value().normalizedLiteralContent().get()}};
std::string{asStringViewUnsafe(langPtr->value().getContent())}};
};

const auto& child1 = children_[0];
Expand Down
4 changes: 2 additions & 2 deletions src/global/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ static const std::string HAS_PREDICATE_PREDICATE =
makeInternalIri("has-predicate");
static const std::string HAS_PATTERN_PREDICATE = makeInternalIri("has-pattern");
static constexpr std::pair<std::string_view, std::string_view> GEOF_PREFIX = {
"geof:", "<http://www.opengis.net/def/function/geosparql/"};
"geof:", "http://www.opengis.net/def/function/geosparql/"};
static constexpr std::pair<std::string_view, std::string_view> MATH_PREFIX = {
"math:", "<http://www.w3.org/2005/xpath-functions/math#"};
"math:", "http://www.w3.org/2005/xpath-functions/math#"};

static const std::string INTERNAL_VARIABLE_PREFIX =
"?_QLever_internal_variable_";
Expand Down
Loading

0 comments on commit 7ad3f58

Please sign in to comment.