diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index c0cc982c55..3c87ff5bdc 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -31,30 +31,26 @@ LocalVocab LocalVocab::merge(std::span vocabs) { // _____________________________________________________________________________ template LocalVocabIndex LocalVocab::getIndexAndAddIfNotContainedImpl(WordT&& word) { - // TODO As soon as we store `IdOrString` in the local vocab, we - // should definitely use `insert` instead of `emplace` here for some - // transparency optimizations. We currently need `emplace` because of the - // explicit conversion from `string` to `AlignedString16`. - auto [wordIterator, isNewWord] = - primaryWordSet().emplace(std::forward(word)); + auto [wordIterator, isNewWord] = primaryWordSet().insert(AD_FWD(word)); // TODO Use std::to_address (more idiomatic, but currently breaks // the MacOS build. return &(*wordIterator); } // _____________________________________________________________________________ -LocalVocabIndex LocalVocab::getIndexAndAddIfNotContained(const Entry& word) { +LocalVocabIndex LocalVocab::getIndexAndAddIfNotContained( + const LiteralOrIri& word) { return getIndexAndAddIfNotContainedImpl(word); } // _____________________________________________________________________________ -LocalVocabIndex LocalVocab::getIndexAndAddIfNotContained(Entry&& word) { +LocalVocabIndex LocalVocab::getIndexAndAddIfNotContained(LiteralOrIri&& word) { return getIndexAndAddIfNotContainedImpl(std::move(word)); } // _____________________________________________________________________________ std::optional LocalVocab::getIndexOrNullopt( - const Entry& word) const { + const LiteralOrIri& word) const { auto localVocabIndex = primaryWordSet().find(word); if (localVocabIndex != primaryWordSet().end()) { // TODO Use std::to_address (more idiomatic, but currently breaks @@ -66,14 +62,15 @@ std::optional LocalVocab::getIndexOrNullopt( } // _____________________________________________________________________________ -const LocalVocab::Entry& LocalVocab::getWord( +const LocalVocab::LiteralOrIri& LocalVocab::getWord( LocalVocabIndex localVocabIndex) const { return *localVocabIndex; } // _____________________________________________________________________________ -std::vector LocalVocab::getAllWordsForTesting() const { - std::vector result; +std::vector LocalVocab::getAllWordsForTesting() + const { + std::vector result; std::ranges::copy(primaryWordSet(), std::back_inserter(result)); for (const auto& previous : otherWordSets_) { std::ranges::copy(*previous, std::back_inserter(result)); diff --git a/src/engine/LocalVocab.h b/src/engine/LocalVocab.h index 6981a8fb44..52ebf39f25 100644 --- a/src/engine/LocalVocab.h +++ b/src/engine/LocalVocab.h @@ -18,18 +18,14 @@ // meant for words that are not part of the normal vocabulary (constructed from // the input data at indexing time). // -// TODO: This is a first version of this class with basic functionality. Note -// that the local vocabulary used to be a simple `std::vector` -// defined inside of the `ResultTable` class. You gotta start somewhere. - class LocalVocab { private: - using Entry = ad_utility::triple_component::LiteralOrIri; + using LiteralOrIri = ad_utility::triple_component::LiteralOrIri; // A map of the words in the local vocabulary to their local IDs. This is a // node hash map because we need the addresses of the words (which are of type - // `Entry`) to remain stable over their lifetime in the hash map because - // we hand out pointers to them. - using Set = absl::node_hash_set; + // `LiteralOrIri`) to remain stable over their lifetime in the hash map + // because we hand out pointers to them. + using Set = absl::node_hash_set; std::shared_ptr primaryWordSet_ = std::make_shared(); // Local vocabularies from child operations that were merged into this @@ -61,12 +57,13 @@ class LocalVocab { // Get the index of a word in the local vocabulary. If the word was already // contained, return the already existing index. If the word was not yet // contained, add it, and return the new index. - LocalVocabIndex getIndexAndAddIfNotContained(const Entry& word); - LocalVocabIndex getIndexAndAddIfNotContained(Entry&& word); + LocalVocabIndex getIndexAndAddIfNotContained(const LiteralOrIri& word); + LocalVocabIndex getIndexAndAddIfNotContained(LiteralOrIri&& word); // Get the index of a word in the local vocabulary, or std::nullopt if it is // not contained. This is useful for testing. - std::optional getIndexOrNullopt(const Entry& word) const; + std::optional getIndexOrNullopt( + const LiteralOrIri& word) const; // The number of words in the vocabulary. // Note: This is not constant time, but linear in the number of word sets. @@ -82,14 +79,14 @@ class LocalVocab { bool empty() const { return size() == 0; } // Return a const reference to the word. - const Entry& getWord(LocalVocabIndex localVocabIndex) const; + const LiteralOrIri& getWord(LocalVocabIndex localVocabIndex) const; // Create a local vocab that contains and keeps alive all the words from each // of the `vocabs`. The primary word set of the newly created vocab is empty. static LocalVocab merge(std::span vocabs); // Return all the words from all the word sets as a vector. - std::vector getAllWordsForTesting() const; + std::vector getAllWordsForTesting() const; private: // Common implementation for the two variants of diff --git a/src/parser/TripleComponent.h b/src/parser/TripleComponent.h index e48082aac4..85fe3788f7 100644 --- a/src/parser/TripleComponent.h +++ b/src/parser/TripleComponent.h @@ -220,18 +220,18 @@ class TripleComponent { // If `toValueId` could not convert to `Id`, we have a string, which we // look up in (and potentially add to) our local vocabulary. AD_CORRECTNESS_CHECK(isLiteral() || isIri()); - using LoI = ad_utility::triple_component::LiteralOrIri; - LoI newWord = [&]() -> LoI { + using LiteralOrIri = ad_utility::triple_component::LiteralOrIri; + auto moveWord = [&]() -> LiteralOrIri { if (isLiteral()) { - return LoI{std::move(getLiteral())}; + return LiteralOrIri{std::move(getLiteral())}; } else { - return LoI{std::move(getIri())}; + return LiteralOrIri{std::move(getIri())}; } - }(); + }; // NOTE: There is a `&&` version of `getIndexAndAddIfNotContained`. // Otherwise, `newWord` would be copied here despite the `std::move`. id = Id::makeFromLocalVocabIndex( - localVocab.getIndexAndAddIfNotContained(std::move(newWord))); + localVocab.getIndexAndAddIfNotContained(moveWord())); } return id.value(); }