diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index c0b9b572b9..409c1d75e7 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -207,8 +207,9 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id, index.idToOptionalString(id.getVocabIndex()); AD_CONTRACT_CHECK(entity.has_value()); // TODO make this more efficient AND more correct - auto litOrIri = ad_utility::triple_component::LiteralOrIri:: - fromInternalRepresentation(entity.value()); + auto litOrIri = + ad_utility::triple_component::LiteralOrIri::fromStringRepresentation( + entity.value()); if constexpr (onlyReturnLiterals) { if (!litOrIri.isLiteral()) { return std::nullopt; diff --git a/src/parser/Iri.cpp b/src/parser/Iri.cpp index ad09efd46d..0fd8254f89 100644 --- a/src/parser/Iri.cpp +++ b/src/parser/Iri.cpp @@ -11,17 +11,16 @@ namespace ad_utility::triple_component { // __________________________________________ -Iri::Iri(NormalizedString iri) : iri_{std::move(iri)} {} +Iri::Iri(std::string iri) : iri_{std::move(iri)} {} // __________________________________________ Iri::Iri(const Iri& prefix, NormalizedStringView suffix) - : iri_{asNormalizedStringViewUnsafe("<") + - NormalizedString{prefix.getContent()} + suffix + - asNormalizedStringViewUnsafe(">")} {}; + : iri_{absl::StrCat("<"sv, asStringViewUnsafe(prefix.getContent()), + asStringViewUnsafe(suffix), ">"sv)} {}; // __________________________________________ NormalizedStringView Iri::getContent() const { - return NormalizedStringView{iri_}.substr(1, iri_.size() - 2); + return asNormalizedStringViewUnsafe(iri_).substr(1, iri_.size() - 2); } // __________________________________________ @@ -29,9 +28,10 @@ Iri Iri::fromIriref(std::string_view stringWithBrackets) { auto first = stringWithBrackets.find('<'); AD_CORRECTNESS_CHECK(first != std::string_view::npos); return Iri{ - asNormalizedStringViewUnsafe(stringWithBrackets.substr(0, first + 1)) + - RdfEscaping::normalizeIriWithBrackets(stringWithBrackets.substr(first)) + - asNormalizedStringViewUnsafe(">")}; + absl::StrCat(stringWithBrackets.substr(0, first + 1), + asStringViewUnsafe(RdfEscaping::normalizeIriWithBrackets( + stringWithBrackets.substr(first))), + ">"sv)}; } // __________________________________________ @@ -41,14 +41,14 @@ Iri Iri::fromPrefixAndSuffix(const Iri& prefix, std::string_view suffix) { } // __________________________________________ -Iri Iri::fromStringRepresentation(std::string_view s) { +Iri Iri::fromStringRepresentation(std::string s) { AD_CORRECTNESS_CHECK(s.starts_with("<") || s.starts_with("@")); - return Iri{NormalizedString{asNormalizedStringViewUnsafe(s)}}; + return Iri{std::move(s)}; } // __________________________________________ -std::string_view Iri::toStringRepresentation() const { - return asStringViewUnsafe(iri_); -} +const std::string& Iri::toStringRepresentation() const { return iri_; } +// __________________________________________ +std::string& Iri::toStringRepresentation() { return iri_; } } // namespace ad_utility::triple_component diff --git a/src/parser/Iri.h b/src/parser/Iri.h index 606a440623..69f3ee8085 100644 --- a/src/parser/Iri.h +++ b/src/parser/Iri.h @@ -10,19 +10,15 @@ namespace ad_utility::triple_component { -// A class to hold IRIs. It does not store the leading or trailing -// angled bracket. -// -// E.g. For the input "", -// only "http://example.org/books/book1" is to be stored in the iri_ variable. +// A class to hold IRIs. class Iri { private: - // Store the string value of the IRI without any leading or trailing angled + // Store the string value of the IRI including the angle brackets. // brackets. - NormalizedString iri_; + std::string iri_; // Create a new iri object - explicit Iri(NormalizedString iri); + explicit Iri(std::string iri); // Create a new iri using a prefix Iri(const Iri& prefix, NormalizedStringView suffix); @@ -32,13 +28,13 @@ class Iri { Iri() = default; template friend H AbslHashValue(H h, const std::same_as auto& iri) { - return H::combine(std::move(h), - asStringViewUnsafe(NormalizedStringView(iri.iri_))); + return H::combine(std::move(h), iri.iri_); } bool operator==(const Iri&) const = default; - static Iri fromStringRepresentation(std::string_view s); + static Iri fromStringRepresentation(std::string s); - std::string_view toStringRepresentation() const; + const std::string& toStringRepresentation() const; + std::string& toStringRepresentation(); // Create a new iri given an iri with brackets static Iri fromIriref(std::string_view stringWithBrackets); diff --git a/src/parser/Literal.cpp b/src/parser/Literal.cpp index 482abe551c..dca504da59 100644 --- a/src/parser/Literal.cpp +++ b/src/parser/Literal.cpp @@ -9,13 +9,13 @@ #include "parser/LiteralOrIri.h" -static constexpr NormalizedChar quote{'"'}; -static constexpr NormalizedChar at{'@'}; -static constexpr NormalizedChar hat{'^'}; +static constexpr char quote{'"'}; +static constexpr char at{'@'}; +static constexpr char hat{'^'}; namespace ad_utility::triple_component { // __________________________________________ -Literal::Literal(NormalizedString content, size_t beginOfSuffix) +Literal::Literal(std::string content, size_t beginOfSuffix) : content_{std::move(content)}, beginOfSuffix_{beginOfSuffix} { AD_CORRECTNESS_CHECK(content_.starts_with(quote)); AD_CORRECTNESS_CHECK(beginOfSuffix_ >= 2); @@ -33,7 +33,7 @@ bool Literal::hasDatatype() const { return getSuffix().starts_with(hat); } // __________________________________________ NormalizedStringView Literal::getContent() const { - return NormalizedStringView{content_}.substr(1, beginOfSuffix_ - 2); + return content().substr(1, beginOfSuffix_ - 2); } // __________________________________________ @@ -42,7 +42,7 @@ NormalizedStringView Literal::getDatatype() const { AD_THROW("The literal does not have an explicit datatype."); } // We don't return the enclosing - NormalizedStringView result = content_; + NormalizedStringView result = content(); result.remove_prefix(beginOfSuffix_ + 3); result.remove_suffix(1); return result; @@ -53,7 +53,7 @@ NormalizedStringView Literal::getLanguageTag() const { if (!hasLanguageTag()) { AD_THROW("The literal does not have an explicit language tag."); } - return NormalizedStringView{content_}.substr(beginOfSuffix_ + 1); + return content().substr(beginOfSuffix_ + 1); } // __________________________________________ @@ -80,8 +80,9 @@ Literal Literal::literalWithoutQuotes( Literal Literal::literalWithNormalizedContent( NormalizedString normalizedRdfContent, std::optional> descriptor) { - auto quotes = asNormalizedStringViewUnsafe("\""); - auto actualContent = quotes + std::move(normalizedRdfContent) + quotes; + auto quotes = "\""sv; + auto actualContent = + absl::StrCat(quotes, asStringViewUnsafe(normalizedRdfContent), quotes); auto sz = actualContent.size(); auto literal = Literal{std::move(actualContent), sz}; if (!descriptor.has_value()) { @@ -104,31 +105,34 @@ Literal Literal::literalWithNormalizedContent( // __________________________________________ void Literal::addLanguageTag(std::string_view languageTag) { - content_.push_back(at); - content_.append(RdfEscaping::normalizeLanguageTag(languageTag)); + AD_CORRECTNESS_CHECK(!hasDatatype() && !hasLanguageTag()); + if (languageTag.starts_with('@')) { + absl::StrAppend(&content_, languageTag); + } else { + absl::StrAppend(&content_, "@"sv, languageTag); + } } // __________________________________________ void Literal::addDatatype(const Iri& datatype) { - content_.append(asNormalizedStringViewUnsafe("^^")); - content_.append( - asNormalizedStringViewUnsafe(datatype.toStringRepresentation())); + AD_CORRECTNESS_CHECK(!hasDatatype() && !hasLanguageTag()); + absl::StrAppend(&content_, "^^"sv, datatype.toStringRepresentation()); } // __________________________________________ -std::string_view Literal::toStringRepresentation() const { - return asStringViewUnsafe(content_); -} +const std::string& Literal::toStringRepresentation() const { return content_; } + +// __________________________________________ +std::string& Literal::toStringRepresentation() { return content_; } // __________________________________________ -Literal Literal::fromStringRepresentation(std::string_view internal) { +Literal Literal::fromStringRepresentation(std::string internal) { // TODO This is a little dangerous as there might be quotes in the // IRI which might lead to unexpected results here. AD_CORRECTNESS_CHECK(internal.starts_with('"')); auto endIdx = internal.rfind('"'); AD_CORRECTNESS_CHECK(endIdx > 0); - return Literal{NormalizedString{asNormalizedStringViewUnsafe(internal)}, - endIdx + 1}; + return Literal{std::move(internal), endIdx + 1}; } } // namespace ad_utility::triple_component diff --git a/src/parser/Literal.h b/src/parser/Literal.h index 659ab56f64..65ac735c86 100644 --- a/src/parser/Literal.h +++ b/src/parser/Literal.h @@ -16,14 +16,14 @@ class Literal { // For example `"Hello World"@en` or `"With"Quote"^^` (note // that the quote in the middle is unescaped because this is the normalized // form that QLever stores. - NormalizedString content_; + std::string content_; // The position after the closing `"`, so either the size of the string, or // the position of the `@` or `^^` for literals with language tags or // datatypes. std::size_t beginOfSuffix_; // Create a new literal without any descriptor - explicit Literal(NormalizedString content, size_t beginOfSuffix_); + explicit Literal(std::string content, size_t beginOfSuffix_); // Similar to `fromEscapedRdfLiteral`, except the rdfContent is expected to // already be normalized @@ -33,22 +33,27 @@ class Literal { // Internal helper function. Return either the empty string (for a plain // literal), `@langtag` or `^^`. - NormalizedStringView getSuffix() const { - NormalizedStringView result = content_; + std::string_view getSuffix() const { + std::string_view result = content_; result.remove_prefix(beginOfSuffix_); return result; } + NormalizedStringView content() const { + return asNormalizedStringViewUnsafe(content_); + } + public: template friend H AbslHashValue(H h, const std::same_as auto& literal) { - return H::combine(std::move(h), asStringViewUnsafe(literal.content_)); + return H::combine(std::move(h), literal.content_); } bool operator==(const Literal&) const = default; - std::string_view toStringRepresentation() const; + const std::string& toStringRepresentation() const; + std::string& toStringRepresentation(); - static Literal fromStringRepresentation(std::string_view internal); + static Literal fromStringRepresentation(std::string internal); // Return true if the literal has an assigned language tag bool hasLanguageTag() const; diff --git a/src/parser/LiteralOrIri.h b/src/parser/LiteralOrIri.h index 7d883b6c74..ba34b7c8e9 100644 --- a/src/parser/LiteralOrIri.h +++ b/src/parser/LiteralOrIri.h @@ -37,20 +37,21 @@ class LiteralOrIri { // Create a new LiteralOrIri based on an Iri object explicit LiteralOrIri(Iri iri); - std::string toInternalRepresentation() const { - auto impl = [](const auto& val) { - return absl::StrCat(val.toStringRepresentation()); + const std::string& toStringRepresentation() const { + auto impl = [](const auto& val) -> decltype(auto) { + return val.toStringRepresentation(); }; return std::visit(impl, data_); } - static LiteralOrIri fromInternalRepresentation(std::string_view internal) { + static LiteralOrIri fromStringRepresentation(std::string internal) { char tag = internal.front(); if (tag == iriPrefixChar) { - return LiteralOrIri{Iri::fromStringRepresentation(internal)}; + return LiteralOrIri{Iri::fromStringRepresentation(std::move(internal))}; } else { AD_CORRECTNESS_CHECK(tag == literalPrefixChar); - return LiteralOrIri{Literal::fromStringRepresentation(internal)}; + return LiteralOrIri{ + Literal::fromStringRepresentation(std::move(internal))}; } } template diff --git a/src/parser/TripleComponent.cpp b/src/parser/TripleComponent.cpp index 8a9d2e127e..4b77bbb198 100644 --- a/src/parser/TripleComponent.cpp +++ b/src/parser/TripleComponent.cpp @@ -72,9 +72,9 @@ std::string TripleComponent::toRdfLiteral() const { } else if (isString()) { return getString(); } else if (isLiteral()) { - return std::string{getLiteral().toStringRepresentation()}; + return getLiteral().toStringRepresentation(); } else if (isIri()) { - return std::string{getIri().toStringRepresentation()}; + return getIri().toStringRepresentation(); } else { auto [value, type] = ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue( diff --git a/src/parser/TripleComponent.h b/src/parser/TripleComponent.h index 239683752c..474a65f18e 100644 --- a/src/parser/TripleComponent.h +++ b/src/parser/TripleComponent.h @@ -191,7 +191,7 @@ class TripleComponent { AD_CONTRACT_CHECK(!isString()); if (isLiteral() || isIri()) { VocabIndex idx; - const std::string_view content = [&]() { + const std::string& content = [&]() -> const std::string& { if (isLiteral()) { return getLiteral().toStringRepresentation(); } else { @@ -224,16 +224,14 @@ class TripleComponent { // If `toValueId` could not convert to `Id`, we have a string, which we // look up in (and potentially add to) our local vocabulary. AD_CORRECTNESS_CHECK(isString() || isLiteral() || isIri()); - // TODO Make the internal representation of the `Literal` and - // `Iri` class movable. - std::string newWord = [&]() -> std::string { + std::string& newWord = [&]() -> std::string& { if (isString()) { return getString(); } else { if (isLiteral()) { - return std::string{getLiteral().toStringRepresentation()}; + return getLiteral().toStringRepresentation(); } else { - return std::string{getIri().toStringRepresentation()}; + return getIri().toStringRepresentation(); } } }(); diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp index 02ee9e1631..c1274e8ff0 100644 --- a/test/util/IndexTestHelpers.cpp +++ b/test/util/IndexTestHelpers.cpp @@ -267,7 +267,7 @@ std::function makeGetId(const Index& index) { return triple_component::LiteralOrIri::iriref(el); } else { AD_CONTRACT_CHECK(el.starts_with('\"')); - return triple_component::LiteralOrIri::fromInternalRepresentation(el); + return triple_component::LiteralOrIri::fromStringRepresentation(el); } }(); auto id = index.getId(literalOrIri);