Skip to content

Commit

Permalink
Some improvements from a review with Hannah.
Browse files Browse the repository at this point in the history
  • Loading branch information
joka921 committed Mar 21, 2024
1 parent 7f8d909 commit f43dff9
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 69 deletions.
5 changes: 3 additions & 2 deletions src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,9 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id,
index.idToOptionalString(id.getVocabIndex());
AD_CONTRACT_CHECK(entity.has_value());
// TODO<joka921> make this more efficient AND more correct
auto litOrIri = ad_utility::triple_component::LiteralOrIri::
fromInternalRepresentation(entity.value());
auto litOrIri =
ad_utility::triple_component::LiteralOrIri::fromStringRepresentation(
entity.value());
if constexpr (onlyReturnLiterals) {
if (!litOrIri.isLiteral()) {
return std::nullopt;
Expand Down
26 changes: 13 additions & 13 deletions src/parser/Iri.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,27 @@

namespace ad_utility::triple_component {
// __________________________________________
Iri::Iri(NormalizedString iri) : iri_{std::move(iri)} {}
Iri::Iri(std::string iri) : iri_{std::move(iri)} {}

// __________________________________________
Iri::Iri(const Iri& prefix, NormalizedStringView suffix)
: iri_{asNormalizedStringViewUnsafe("<") +
NormalizedString{prefix.getContent()} + suffix +
asNormalizedStringViewUnsafe(">")} {};
: iri_{absl::StrCat("<"sv, asStringViewUnsafe(prefix.getContent()),
asStringViewUnsafe(suffix), ">"sv)} {};

// __________________________________________
NormalizedStringView Iri::getContent() const {
return NormalizedStringView{iri_}.substr(1, iri_.size() - 2);
return asNormalizedStringViewUnsafe(iri_).substr(1, iri_.size() - 2);
}

// __________________________________________
Iri Iri::fromIriref(std::string_view stringWithBrackets) {
auto first = stringWithBrackets.find('<');
AD_CORRECTNESS_CHECK(first != std::string_view::npos);
return Iri{
asNormalizedStringViewUnsafe(stringWithBrackets.substr(0, first + 1)) +
RdfEscaping::normalizeIriWithBrackets(stringWithBrackets.substr(first)) +
asNormalizedStringViewUnsafe(">")};
absl::StrCat(stringWithBrackets.substr(0, first + 1),
asStringViewUnsafe(RdfEscaping::normalizeIriWithBrackets(
stringWithBrackets.substr(first))),
">"sv)};
}

// __________________________________________
Expand All @@ -41,14 +41,14 @@ Iri Iri::fromPrefixAndSuffix(const Iri& prefix, std::string_view suffix) {
}

// __________________________________________
Iri Iri::fromStringRepresentation(std::string_view s) {
Iri Iri::fromStringRepresentation(std::string s) {
AD_CORRECTNESS_CHECK(s.starts_with("<") || s.starts_with("@"));
return Iri{NormalizedString{asNormalizedStringViewUnsafe(s)}};
return Iri{std::move(s)};
}

// __________________________________________
std::string_view Iri::toStringRepresentation() const {
return asStringViewUnsafe(iri_);
}
const std::string& Iri::toStringRepresentation() const { return iri_; }
// __________________________________________
std::string& Iri::toStringRepresentation() { return iri_; }

} // namespace ad_utility::triple_component
20 changes: 8 additions & 12 deletions src/parser/Iri.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,15 @@

namespace ad_utility::triple_component {

// A class to hold IRIs. It does not store the leading or trailing
// angled bracket.
//
// E.g. For the input "<http://example.org/books/book1>",
// only "http://example.org/books/book1" is to be stored in the iri_ variable.
// A class to hold IRIs.
class Iri {
private:
// Store the string value of the IRI without any leading or trailing angled
// Store the string value of the IRI including the angle brackets.
// brackets.
NormalizedString iri_;
std::string iri_;

// Create a new iri object
explicit Iri(NormalizedString iri);
explicit Iri(std::string iri);

// Create a new iri using a prefix
Iri(const Iri& prefix, NormalizedStringView suffix);
Expand All @@ -32,13 +28,13 @@ class Iri {
Iri() = default;
template <typename H>
friend H AbslHashValue(H h, const std::same_as<Iri> auto& iri) {
return H::combine(std::move(h),
asStringViewUnsafe(NormalizedStringView(iri.iri_)));
return H::combine(std::move(h), iri.iri_);
}
bool operator==(const Iri&) const = default;
static Iri fromStringRepresentation(std::string_view s);
static Iri fromStringRepresentation(std::string s);

std::string_view toStringRepresentation() const;
const std::string& toStringRepresentation() const;
std::string& toStringRepresentation();

// Create a new iri given an iri with brackets
static Iri fromIriref(std::string_view stringWithBrackets);
Expand Down
44 changes: 24 additions & 20 deletions src/parser/Literal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@

#include "parser/LiteralOrIri.h"

static constexpr NormalizedChar quote{'"'};
static constexpr NormalizedChar at{'@'};
static constexpr NormalizedChar hat{'^'};
static constexpr char quote{'"'};
static constexpr char at{'@'};
static constexpr char hat{'^'};

namespace ad_utility::triple_component {
// __________________________________________
Literal::Literal(NormalizedString content, size_t beginOfSuffix)
Literal::Literal(std::string content, size_t beginOfSuffix)
: content_{std::move(content)}, beginOfSuffix_{beginOfSuffix} {
AD_CORRECTNESS_CHECK(content_.starts_with(quote));
AD_CORRECTNESS_CHECK(beginOfSuffix_ >= 2);
Expand All @@ -33,7 +33,7 @@ bool Literal::hasDatatype() const { return getSuffix().starts_with(hat); }

// __________________________________________
NormalizedStringView Literal::getContent() const {
return NormalizedStringView{content_}.substr(1, beginOfSuffix_ - 2);
return content().substr(1, beginOfSuffix_ - 2);
}

// __________________________________________
Expand All @@ -42,7 +42,7 @@ NormalizedStringView Literal::getDatatype() const {
AD_THROW("The literal does not have an explicit datatype.");
}
// We don't return the enclosing <angle brackets>
NormalizedStringView result = content_;
NormalizedStringView result = content();
result.remove_prefix(beginOfSuffix_ + 3);
result.remove_suffix(1);
return result;
Expand All @@ -53,7 +53,7 @@ NormalizedStringView Literal::getLanguageTag() const {
if (!hasLanguageTag()) {
AD_THROW("The literal does not have an explicit language tag.");
}
return NormalizedStringView{content_}.substr(beginOfSuffix_ + 1);
return content().substr(beginOfSuffix_ + 1);
}

// __________________________________________
Expand All @@ -80,8 +80,9 @@ Literal Literal::literalWithoutQuotes(
Literal Literal::literalWithNormalizedContent(
NormalizedString normalizedRdfContent,
std::optional<std::variant<Iri, string>> descriptor) {
auto quotes = asNormalizedStringViewUnsafe("\"");
auto actualContent = quotes + std::move(normalizedRdfContent) + quotes;
auto quotes = "\""sv;
auto actualContent =
absl::StrCat(quotes, asStringViewUnsafe(normalizedRdfContent), quotes);
auto sz = actualContent.size();
auto literal = Literal{std::move(actualContent), sz};
if (!descriptor.has_value()) {
Expand All @@ -104,31 +105,34 @@ Literal Literal::literalWithNormalizedContent(

// __________________________________________
void Literal::addLanguageTag(std::string_view languageTag) {
content_.push_back(at);
content_.append(RdfEscaping::normalizeLanguageTag(languageTag));
AD_CORRECTNESS_CHECK(!hasDatatype() && !hasLanguageTag());
if (languageTag.starts_with('@')) {
absl::StrAppend(&content_, languageTag);
} else {
absl::StrAppend(&content_, "@"sv, languageTag);
}

Check warning on line 113 in src/parser/Literal.cpp

View check run for this annotation

Codecov / codecov/patch

src/parser/Literal.cpp#L112-L113

Added lines #L112 - L113 were not covered by tests
}

// __________________________________________
void Literal::addDatatype(const Iri& datatype) {
content_.append(asNormalizedStringViewUnsafe("^^"));
content_.append(
asNormalizedStringViewUnsafe(datatype.toStringRepresentation()));
AD_CORRECTNESS_CHECK(!hasDatatype() && !hasLanguageTag());
absl::StrAppend(&content_, "^^"sv, datatype.toStringRepresentation());
}

// __________________________________________
std::string_view Literal::toStringRepresentation() const {
return asStringViewUnsafe(content_);
}
const std::string& Literal::toStringRepresentation() const { return content_; }

// __________________________________________
std::string& Literal::toStringRepresentation() { return content_; }

// __________________________________________
Literal Literal::fromStringRepresentation(std::string_view internal) {
Literal Literal::fromStringRepresentation(std::string internal) {
// TODO<joka921> This is a little dangerous as there might be quotes in the
// IRI which might lead to unexpected results here.
AD_CORRECTNESS_CHECK(internal.starts_with('"'));
auto endIdx = internal.rfind('"');
AD_CORRECTNESS_CHECK(endIdx > 0);
return Literal{NormalizedString{asNormalizedStringViewUnsafe(internal)},
endIdx + 1};
return Literal{std::move(internal), endIdx + 1};
}

} // namespace ad_utility::triple_component
19 changes: 12 additions & 7 deletions src/parser/Literal.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ class Literal {
// For example `"Hello World"@en` or `"With"Quote"^^<someDatatype>` (note
// that the quote in the middle is unescaped because this is the normalized
// form that QLever stores.
NormalizedString content_;
std::string content_;
// The position after the closing `"`, so either the size of the string, or
// the position of the `@` or `^^` for literals with language tags or
// datatypes.
std::size_t beginOfSuffix_;

// Create a new literal without any descriptor
explicit Literal(NormalizedString content, size_t beginOfSuffix_);
explicit Literal(std::string content, size_t beginOfSuffix_);

// Similar to `fromEscapedRdfLiteral`, except the rdfContent is expected to
// already be normalized
Expand All @@ -33,22 +33,27 @@ class Literal {

// Internal helper function. Return either the empty string (for a plain
// literal), `@langtag` or `^^<datatypeIri>`.
NormalizedStringView getSuffix() const {
NormalizedStringView result = content_;
std::string_view getSuffix() const {
std::string_view result = content_;
result.remove_prefix(beginOfSuffix_);
return result;
}

NormalizedStringView content() const {
return asNormalizedStringViewUnsafe(content_);
}

public:
template <typename H>
friend H AbslHashValue(H h, const std::same_as<Literal> auto& literal) {
return H::combine(std::move(h), asStringViewUnsafe(literal.content_));
return H::combine(std::move(h), literal.content_);
}
bool operator==(const Literal&) const = default;

std::string_view toStringRepresentation() const;
const std::string& toStringRepresentation() const;
std::string& toStringRepresentation();

static Literal fromStringRepresentation(std::string_view internal);
static Literal fromStringRepresentation(std::string internal);

// Return true if the literal has an assigned language tag
bool hasLanguageTag() const;
Expand Down
13 changes: 7 additions & 6 deletions src/parser/LiteralOrIri.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,21 @@ class LiteralOrIri {
// Create a new LiteralOrIri based on an Iri object
explicit LiteralOrIri(Iri iri);

std::string toInternalRepresentation() const {
auto impl = [](const auto& val) {
return absl::StrCat(val.toStringRepresentation());
const std::string& toStringRepresentation() const {
auto impl = [](const auto& val) -> decltype(auto) {
return val.toStringRepresentation();
};
return std::visit(impl, data_);
}

static LiteralOrIri fromInternalRepresentation(std::string_view internal) {
static LiteralOrIri fromStringRepresentation(std::string internal) {
char tag = internal.front();
if (tag == iriPrefixChar) {
return LiteralOrIri{Iri::fromStringRepresentation(internal)};
return LiteralOrIri{Iri::fromStringRepresentation(std::move(internal))};
} else {
AD_CORRECTNESS_CHECK(tag == literalPrefixChar);
return LiteralOrIri{Literal::fromStringRepresentation(internal)};
return LiteralOrIri{
Literal::fromStringRepresentation(std::move(internal))};
}
}
template <typename H>
Expand Down
4 changes: 2 additions & 2 deletions src/parser/TripleComponent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ std::string TripleComponent::toRdfLiteral() const {
} else if (isString()) {
return getString();
} else if (isLiteral()) {
return std::string{getLiteral().toStringRepresentation()};
return getLiteral().toStringRepresentation();
} else if (isIri()) {
return std::string{getIri().toStringRepresentation()};
return getIri().toStringRepresentation();
} else {
auto [value, type] =
ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue(
Expand Down
10 changes: 4 additions & 6 deletions src/parser/TripleComponent.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ class TripleComponent {
AD_CONTRACT_CHECK(!isString());
if (isLiteral() || isIri()) {
VocabIndex idx;
const std::string_view content = [&]() {
const std::string& content = [&]() -> const std::string& {
if (isLiteral()) {
return getLiteral().toStringRepresentation();
} else {
Expand Down Expand Up @@ -224,16 +224,14 @@ class TripleComponent {
// If `toValueId` could not convert to `Id`, we have a string, which we
// look up in (and potentially add to) our local vocabulary.
AD_CORRECTNESS_CHECK(isString() || isLiteral() || isIri());
// TODO<joka921> Make the internal representation of the `Literal` and
// `Iri` class movable.
std::string newWord = [&]() -> std::string {
std::string& newWord = [&]() -> std::string& {
if (isString()) {
return getString();
} else {
if (isLiteral()) {
return std::string{getLiteral().toStringRepresentation()};
return getLiteral().toStringRepresentation();
} else {
return std::string{getIri().toStringRepresentation()};
return getIri().toStringRepresentation();
}
}
}();
Expand Down
2 changes: 1 addition & 1 deletion test/util/IndexTestHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ std::function<Id(const std::string&)> makeGetId(const Index& index) {
return triple_component::LiteralOrIri::iriref(el);
} else {
AD_CONTRACT_CHECK(el.starts_with('\"'));
return triple_component::LiteralOrIri::fromInternalRepresentation(el);
return triple_component::LiteralOrIri::fromStringRepresentation(el);
}
}();
auto id = index.getId(literalOrIri);
Expand Down

0 comments on commit f43dff9

Please sign in to comment.