Skip to content

Commit

Permalink
Small changes from review.
Browse files Browse the repository at this point in the history
  • Loading branch information
joka921 committed May 2, 2024
1 parent 3270bcc commit 13a7542
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 31 deletions.
21 changes: 9 additions & 12 deletions src/engine/LocalVocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,30 +31,26 @@ LocalVocab LocalVocab::merge(std::span<const LocalVocab*> vocabs) {
// _____________________________________________________________________________
template <typename WordT>
LocalVocabIndex LocalVocab::getIndexAndAddIfNotContainedImpl(WordT&& word) {
// TODO<joka921> As soon as we store `IdOrString` in the local vocab, we
// should definitely use `insert` instead of `emplace` here for some
// transparency optimizations. We currently need `emplace` because of the
// explicit conversion from `string` to `AlignedString16`.
auto [wordIterator, isNewWord] =
primaryWordSet().emplace(std::forward<WordT>(word));
auto [wordIterator, isNewWord] = primaryWordSet().insert(AD_FWD(word));
// TODO<Libc++18> Use std::to_address (more idiomatic, but currently breaks
// the MacOS build.
return &(*wordIterator);
}

// _____________________________________________________________________________
LocalVocabIndex LocalVocab::getIndexAndAddIfNotContained(const Entry& word) {
LocalVocabIndex LocalVocab::getIndexAndAddIfNotContained(
const LiteralOrIri& word) {
return getIndexAndAddIfNotContainedImpl(word);
}

// _____________________________________________________________________________
LocalVocabIndex LocalVocab::getIndexAndAddIfNotContained(Entry&& word) {
LocalVocabIndex LocalVocab::getIndexAndAddIfNotContained(LiteralOrIri&& word) {
return getIndexAndAddIfNotContainedImpl(std::move(word));
}

// _____________________________________________________________________________
std::optional<LocalVocabIndex> LocalVocab::getIndexOrNullopt(
const Entry& word) const {
const LiteralOrIri& word) const {
auto localVocabIndex = primaryWordSet().find(word);
if (localVocabIndex != primaryWordSet().end()) {
// TODO<Libc++18> Use std::to_address (more idiomatic, but currently breaks
Expand All @@ -66,14 +62,15 @@ std::optional<LocalVocabIndex> LocalVocab::getIndexOrNullopt(
}

// _____________________________________________________________________________
const LocalVocab::Entry& LocalVocab::getWord(
const LocalVocab::LiteralOrIri& LocalVocab::getWord(
LocalVocabIndex localVocabIndex) const {
return *localVocabIndex;
}

// _____________________________________________________________________________
std::vector<LocalVocab::Entry> LocalVocab::getAllWordsForTesting() const {
std::vector<Entry> result;
std::vector<LocalVocab::LiteralOrIri> LocalVocab::getAllWordsForTesting()
const {
std::vector<LiteralOrIri> result;
std::ranges::copy(primaryWordSet(), std::back_inserter(result));
for (const auto& previous : otherWordSets_) {
std::ranges::copy(*previous, std::back_inserter(result));
Expand Down
23 changes: 10 additions & 13 deletions src/engine/LocalVocab.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,14 @@
// meant for words that are not part of the normal vocabulary (constructed from
// the input data at indexing time).
//
// TODO: This is a first version of this class with basic functionality. Note
// that the local vocabulary used to be a simple `std::vector<Entry>`
// defined inside of the `ResultTable` class. You gotta start somewhere.

class LocalVocab {
private:
using Entry = ad_utility::triple_component::LiteralOrIri;
using LiteralOrIri = ad_utility::triple_component::LiteralOrIri;
// A map of the words in the local vocabulary to their local IDs. This is a
// node hash map because we need the addresses of the words (which are of type
// `Entry`) to remain stable over their lifetime in the hash map because
// we hand out pointers to them.
using Set = absl::node_hash_set<Entry>;
// `LiteralOrIri`) to remain stable over their lifetime in the hash map
// because we hand out pointers to them.
using Set = absl::node_hash_set<LiteralOrIri>;
std::shared_ptr<Set> primaryWordSet_ = std::make_shared<Set>();

// Local vocabularies from child operations that were merged into this
Expand Down Expand Up @@ -61,12 +57,13 @@ class LocalVocab {
// Get the index of a word in the local vocabulary. If the word was already
// contained, return the already existing index. If the word was not yet
// contained, add it, and return the new index.
LocalVocabIndex getIndexAndAddIfNotContained(const Entry& word);
LocalVocabIndex getIndexAndAddIfNotContained(Entry&& word);
LocalVocabIndex getIndexAndAddIfNotContained(const LiteralOrIri& word);
LocalVocabIndex getIndexAndAddIfNotContained(LiteralOrIri&& word);

// Get the index of a word in the local vocabulary, or std::nullopt if it is
// not contained. This is useful for testing.
std::optional<LocalVocabIndex> getIndexOrNullopt(const Entry& word) const;
std::optional<LocalVocabIndex> getIndexOrNullopt(
const LiteralOrIri& word) const;

// The number of words in the vocabulary.
// Note: This is not constant time, but linear in the number of word sets.
Expand All @@ -82,14 +79,14 @@ class LocalVocab {
bool empty() const { return size() == 0; }

// Return a const reference to the word.
const Entry& getWord(LocalVocabIndex localVocabIndex) const;
const LiteralOrIri& getWord(LocalVocabIndex localVocabIndex) const;

// Create a local vocab that contains and keeps alive all the words from each
// of the `vocabs`. The primary word set of the newly created vocab is empty.
static LocalVocab merge(std::span<const LocalVocab*> vocabs);

// Return all the words from all the word sets as a vector.
std::vector<Entry> getAllWordsForTesting() const;
std::vector<LiteralOrIri> getAllWordsForTesting() const;

private:
// Common implementation for the two variants of
Expand Down
12 changes: 6 additions & 6 deletions src/parser/TripleComponent.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,18 +220,18 @@ class TripleComponent {
// If `toValueId` could not convert to `Id`, we have a string, which we
// look up in (and potentially add to) our local vocabulary.
AD_CORRECTNESS_CHECK(isLiteral() || isIri());
using LoI = ad_utility::triple_component::LiteralOrIri;
LoI newWord = [&]() -> LoI {
using LiteralOrIri = ad_utility::triple_component::LiteralOrIri;
auto moveWord = [&]() -> LiteralOrIri {
if (isLiteral()) {
return LoI{std::move(getLiteral())};
return LiteralOrIri{std::move(getLiteral())};
} else {
return LoI{std::move(getIri())};
return LiteralOrIri{std::move(getIri())};
}
}();
};
// NOTE: There is a `&&` version of `getIndexAndAddIfNotContained`.
// Otherwise, `newWord` would be copied here despite the `std::move`.
id = Id::makeFromLocalVocabIndex(
localVocab.getIndexAndAddIfNotContained(std::move(newWord)));
localVocab.getIndexAndAddIfNotContained(moveWord()));
}
return id.value();
}
Expand Down

0 comments on commit 13a7542

Please sign in to comment.