Skip to content

Commit

Permalink
All tests pass.
Browse files Browse the repository at this point in the history
Lets review this and get on with it:)
  • Loading branch information
joka921 committed Apr 26, 2024
1 parent 7e47ab8 commit 3270bcc
Show file tree
Hide file tree
Showing 15 changed files with 121 additions and 72 deletions.
4 changes: 3 additions & 1 deletion benchmark/GroupByHashMapBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ auto generateRandomLocalVocabAndIndicesVec = [](size_t n, size_t m) {
for (size_t j = 0; j < m; j++) {
str += alphanum.at(gen());
}
indices.push_back(localVocab.getIndexAndAddIfNotContained(str));
using namespace ad_utility::triple_component;
indices.push_back(localVocab.getIndexAndAddIfNotContained(
LiteralOrIri::literalWithoutQuotes(str)));
}

return std::make_pair(std::move(localVocab), indices);
Expand Down
39 changes: 19 additions & 20 deletions src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,23 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id,
return std::nullopt;
}
}

using LiteralOrIri = ad_utility::triple_component::LiteralOrIri;
auto handleIriOrLiteral = [&escapeFunction](const LiteralOrIri& word)
-> std::optional<std::pair<std::string, const char*>> {
if constexpr (onlyReturnLiterals) {
if (!word.isLiteral()) {
return std::nullopt;
}
}
if constexpr (removeQuotesAndAngleBrackets) {
// TODO<joka921> Can we get rid of the string copying here?
return std::pair{
escapeFunction(std::string{asStringViewUnsafe(word.getContent())}),
nullptr};
}
return std::pair{escapeFunction(word.toStringRepresentation()), nullptr};

Check warning on line 215 in src/engine/ExportQueryExecutionTrees.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/ExportQueryExecutionTrees.cpp#L215

Added line #L215 was not covered by tests
};
switch (id.getDatatype()) {
case Datatype::WordVocabIndex: {
std::optional<string> entity =
Expand All @@ -212,28 +229,10 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id,
auto litOrIri =
ad_utility::triple_component::LiteralOrIri::fromStringRepresentation(
entity.value());
if constexpr (onlyReturnLiterals) {
if (!litOrIri.isLiteral()) {
return std::nullopt;
}
}
if constexpr (removeQuotesAndAngleBrackets) {
entity = asStringViewUnsafe(litOrIri.getContent());
}
// TODO<joka921> handle the exporting of literals more correctly.
return std::pair{escapeFunction(std::move(entity.value())), nullptr};
return handleIriOrLiteral(litOrIri);
}
case LocalVocabIndex: {
auto word = localVocab.getWord(id.getLocalVocabIndex());
if constexpr (onlyReturnLiterals) {
if (!word.isLiteral()) {
return std::nullopt;
}
}
if constexpr (removeQuotesAndAngleBrackets) {
word = RdfEscaping::normalizedContentFromLiteralOrIri(std::move(word));
}
return std::pair{escapeFunction(std::move(word)), nullptr};
return handleIriOrLiteral(localVocab.getWord(id.getLocalVocabIndex()));
}
case TextRecordIndex:
return std::pair{
Expand Down
5 changes: 3 additions & 2 deletions src/engine/GroupByHashMapOptimization.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
static constexpr auto valueAdder = []() {
auto numericValueAdder =
[]<typename T>(T value, double& sum, [[maybe_unused]] const bool& error)
requires std::is_arithmetic_v<T>
{ sum += static_cast<double>(value); };
requires std::is_arithmetic_v<T> {
sum += static_cast<double>(value);
};
auto nonNumericValueAdder = [](sparqlExpression::detail::NotNumeric,
[[maybe_unused]] const double& sum,
bool& error) { error = true; };
Expand Down
8 changes: 3 additions & 5 deletions src/engine/LocalVocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include "global/Id.h"
#include "global/ValueId.h"


// _____________________________________________________________________________
LocalVocab LocalVocab::clone() const {
LocalVocab localVocabClone;
Expand Down Expand Up @@ -44,8 +43,7 @@ LocalVocabIndex LocalVocab::getIndexAndAddIfNotContainedImpl(WordT&& word) {
}

// _____________________________________________________________________________
LocalVocabIndex LocalVocab::getIndexAndAddIfNotContained(
const Entry& word) {
LocalVocabIndex LocalVocab::getIndexAndAddIfNotContained(const Entry& word) {
return getIndexAndAddIfNotContainedImpl(word);
}

Expand All @@ -68,11 +66,11 @@ std::optional<LocalVocabIndex> LocalVocab::getIndexOrNullopt(
}

// _____________________________________________________________________________
const LocalVocab::Entry& LocalVocab::getWord(LocalVocabIndex localVocabIndex) const {
const LocalVocab::Entry& LocalVocab::getWord(
LocalVocabIndex localVocabIndex) const {
return *localVocabIndex;
}


// _____________________________________________________________________________
std::vector<LocalVocab::Entry> LocalVocab::getAllWordsForTesting() const {
std::vector<Entry> result;
Expand Down
3 changes: 1 addition & 2 deletions src/engine/LocalVocab.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ class LocalVocab {

// Get the index of a word in the local vocabulary, or std::nullopt if it is
// not contained. This is useful for testing.
std::optional<LocalVocabIndex> getIndexOrNullopt(
const Entry& word) const;
std::optional<LocalVocabIndex> getIndexOrNullopt(const Entry& word) const;

// The number of words in the vocabulary.
// Note: This is not constant time, but linear in the number of word sets.
Expand Down
3 changes: 1 addition & 2 deletions src/engine/sparqlExpressions/SparqlExpressionTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,7 @@ Id constantExpressionResultToId(T&& result, LocalVocabT& localVocab) {
if constexpr (ad_utility::isSimilar<
R, ad_utility::triple_component::LiteralOrIri>) {
return Id::makeFromLocalVocabIndex(
localVocab.getIndexAndAddIfNotContained(
AD_FWD(el)));
localVocab.getIndexAndAddIfNotContained(AD_FWD(el)));
} else {
static_assert(ad_utility::isSimilar<R, Id>);
return el;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ auto EffectiveBooleanValueGetter::operator()(
: True;
}
case Datatype::LocalVocabIndex: {
return (context->_localVocab.getWord(id.getLocalVocabIndex()).getContent().empty())
return (context->_localVocab.getWord(id.getLocalVocabIndex())
.getContent()
.empty())
? False
: True;
}
Expand Down
5 changes: 3 additions & 2 deletions src/parser/TripleComponent.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,9 @@ class TripleComponent {
if (isLiteral()) {
return LoI{std::move(getLiteral())};
} else {
return LoI { std::move(getIri())};
}}();
return LoI{std::move(getIri())};
}
}();
// NOTE: There is a `&&` version of `getIndexAndAddIfNotContained`.
// Otherwise, `newWord` would be copied here despite the `std::move`.
id = Id::makeFromLocalVocabIndex(
Expand Down
17 changes: 12 additions & 5 deletions test/GroupByTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,12 @@ TEST_F(GroupByTest, doGroupBy) {

// Create an input result table with a local vocabulary.
auto localVocab = std::make_shared<LocalVocab>();
localVocab->getIndexAndAddIfNotContained("<local1>");
localVocab->getIndexAndAddIfNotContained("<local2>");
localVocab->getIndexAndAddIfNotContained("<local3>");
constexpr auto iriref = [](const std::string& s) {
return ad_utility::triple_component::LiteralOrIri::iriref(s);
};
localVocab->getIndexAndAddIfNotContained(iriref("<local1>"));
localVocab->getIndexAndAddIfNotContained(iriref("<local2>"));
localVocab->getIndexAndAddIfNotContained(iriref("<local3>"));

IdTable inputData(6, makeAllocator());
// The input data types are KB, KB, VERBATIM, TEXT, FLOAT, STRING.
Expand Down Expand Up @@ -1231,7 +1234,9 @@ TEST_F(GroupByOptimizations, hashMapOptimizationGroupConcatIndex) {

auto getId = makeGetId(qec->getIndex());
auto getLocalVocabId = [&result](const std::string& word) {
auto value = result->localVocab().getIndexOrNullopt(word);
auto lit =
ad_utility::triple_component::LiteralOrIri::literalWithoutQuotes(word);
auto value = result->localVocab().getIndexOrNullopt(lit);
if (value.has_value())
return ValueId::makeFromLocalVocabIndex(value.value());
else
Expand Down Expand Up @@ -1278,7 +1283,9 @@ TEST_F(GroupByOptimizations, hashMapOptimizationGroupConcatLocalVocab) {
auto getId = makeGetId(qec->getIndex());
auto d = DoubleId;
auto getLocalVocabId = [&result](const std::string& word) {
auto value = result->localVocab().getIndexOrNullopt(word);
auto lit =
ad_utility::triple_component::LiteralOrIri::literalWithoutQuotes(word);
auto value = result->localVocab().getIndexOrNullopt(lit);
if (value.has_value())
return ValueId::makeFromLocalVocabIndex(value.value());
else
Expand Down
65 changes: 45 additions & 20 deletions test/LocalVocabTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,15 @@

namespace {
// Get test collection of words of a given size. The words are all distinct.
std::vector<std::string> getTestCollectionOfWords(size_t size) {
std::vector<std::string> testCollectionOfWords(size);
for (size_t i = 0; i < testCollectionOfWords.size(); ++i) {
testCollectionOfWords[i] = std::to_string(i * 7635475567ULL);

using TestWords = std::vector<ad_utility::triple_component::LiteralOrIri>;

TestWords getTestCollectionOfWords(size_t size) {
using namespace ad_utility::triple_component;
TestWords testCollectionOfWords;
for (size_t i = 0; i < size; ++i) {
testCollectionOfWords.push_back(
LiteralOrIri::literalWithoutQuotes(std::to_string(i * 7635475567ULL)));
}
return testCollectionOfWords;
}
Expand All @@ -45,7 +50,7 @@ std::vector<std::string> getTestCollectionOfWords(size_t size) {
// _____________________________________________________________________________
TEST(LocalVocab, constructionAndAccess) {
// Test collection of words.
std::vector<std::string> testWords = getTestCollectionOfWords(1000);
TestWords testWords = getTestCollectionOfWords(1000);

// Create empty local vocabulary.
LocalVocab localVocab;
Expand Down Expand Up @@ -80,7 +85,11 @@ TEST(LocalVocab, constructionAndAccess) {
// not contained in the local vocab. This makes use of the fact that the words
// in our test vocabulary only contain digits as letters, see above.
for (size_t i = 0; i < testWords.size(); ++i) {
ASSERT_FALSE(localVocab.getIndexOrNullopt(testWords[i] + "A"));
std::string content{asStringViewUnsafe(testWords[i].getContent())};
auto illegalWord =
ad_utility::triple_component::LiteralOrIri::literalWithoutQuotes(
content + "A");
ASSERT_FALSE(localVocab.getIndexOrNullopt(illegalWord));
}

// Check that a move gives the expected result.
Expand Down Expand Up @@ -132,23 +141,27 @@ TEST(LocalVocab, merge) {
std::vector<LocalVocabIndex> indices;
LocalVocab vocA;
LocalVocab vocB;
indices.push_back(vocA.getIndexAndAddIfNotContained("oneA"));
indices.push_back(vocA.getIndexAndAddIfNotContained("twoA"));
indices.push_back(vocA.getIndexAndAddIfNotContained("oneB"));
indices.push_back(vocA.getIndexAndAddIfNotContained("twoB"));
constexpr auto lit = [](std::string_view s) {
return ad_utility::triple_component::LiteralOrIri::literalWithoutQuotes(s);
};
indices.push_back(vocA.getIndexAndAddIfNotContained(lit("oneA")));
indices.push_back(vocA.getIndexAndAddIfNotContained(lit("twoA")));
indices.push_back(vocA.getIndexAndAddIfNotContained(lit("oneB")));
indices.push_back(vocA.getIndexAndAddIfNotContained(lit("twoB")));

// Clone it and test that the clone contains the same words.
auto vocabs = std::vector{&std::as_const(vocA), &std::as_const(vocB)};
LocalVocab localVocabMerged = LocalVocab::merge(vocabs);
ASSERT_EQ(localVocabMerged.size(), 4u);
ASSERT_THAT(localVocabMerged.getAllWordsForTesting(),
::testing::UnorderedElementsAre("oneA", "twoA", "oneB", "twoB"));
::testing::UnorderedElementsAre(lit("oneA"), lit("twoA"),
lit("oneB"), lit("twoB")));
vocA = LocalVocab{};
vocB = LocalVocab{};
EXPECT_EQ(*indices[0], "oneA");
EXPECT_EQ(*indices[1], "twoA");
EXPECT_EQ(*indices[2], "oneB");
EXPECT_EQ(*indices[3], "twoB");
EXPECT_EQ(*indices[0], lit("oneA"));
EXPECT_EQ(*indices[1], lit("twoA"));
EXPECT_EQ(*indices[2], lit("oneB"));
EXPECT_EQ(*indices[3], lit("twoB"));
}

// _____________________________________________________________________________
Expand All @@ -162,12 +175,24 @@ TEST(LocalVocab, propagation) {
//
// NOTE: This cannot be `const Operation&` because `Operation::getResult()` is
// not `const`.
auto checkLocalVocab = [&](Operation& operation,
std::vector<std::string> expectedWords) -> void {
auto checkLocalVocab =
[&](Operation& operation,
const std::vector<std::string>& expectedWordsAsStrings) -> void {
TestWords expectedWords;
auto toLitOrIri = [](const auto& word) {
using namespace ad_utility::triple_component;
if (word.starts_with('<')) {
return LiteralOrIri::iriref(word);
} else {
return LiteralOrIri::literalWithoutQuotes(word);
}
};
std::ranges::transform(expectedWordsAsStrings,
std::back_inserter(expectedWords), toLitOrIri);
std::shared_ptr<const ResultTable> resultTable = operation.getResult();
ASSERT_TRUE(resultTable)
<< "Operation: " << operation.getDescriptor() << std::endl;
std::vector<std::string> localVocabWords =
TestWords localVocabWords =
resultTable->localVocab().getAllWordsForTesting();
// We currently allow the local vocab to have multiple IDs for the same
// word, so we have to deduplicate first.
Expand Down Expand Up @@ -278,8 +303,8 @@ TEST(LocalVocab, propagation) {
testQec, {Variable{"?x"}},
{Alias{groupConcatExpression("?y", "|"), Variable{"?concat"}}},
qet(values1));
checkLocalVocab(groupBy, std::vector<std::string>{"<xN1>", "<yN1>", "<yN2>",
"\"yN1|yN2\""});
checkLocalVocab(
groupBy, std::vector<std::string>{"<xN1>", "<yN1>", "<yN2>", "yN1|yN2"});

// DISTINCT again, but after something has been added to the local vocabulary
// (to check that the "y1|y2" added by the GROUP BY does not also appear here,
Expand Down
10 changes: 7 additions & 3 deletions test/ServiceTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,13 @@ TEST_F(ServiceTest, computeResult) {
Id idY = getId("<y>");
const auto& localVocab = result->localVocab();
EXPECT_EQ(localVocab.size(), 3);
std::optional<LocalVocabIndex> idxBla = localVocab.getIndexOrNullopt("<bla>");
std::optional<LocalVocabIndex> idxBli = localVocab.getIndexOrNullopt("<bli>");
std::optional<LocalVocabIndex> idxBlu = localVocab.getIndexOrNullopt("<blu>");
auto get = [&localVocab](const std::string& s) {
return localVocab.getIndexOrNullopt(
ad_utility::triple_component::LiteralOrIri::iriref(s));
};
std::optional<LocalVocabIndex> idxBla = get("<bla>");
std::optional<LocalVocabIndex> idxBli = get("<bli>");
std::optional<LocalVocabIndex> idxBlu = get("<blu>");
ASSERT_TRUE(idxBli.has_value());
ASSERT_TRUE(idxBla.has_value());
ASSERT_TRUE(idxBlu.has_value());
Expand Down
19 changes: 14 additions & 5 deletions test/SparqlExpressionTestHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// Author: Johannes Kalmbach <[email protected]>

#include "./util/IdTestHelpers.h"
#include "./util/TripleComponentTestHelpers.h"
#include "engine/sparqlExpressions/SparqlExpression.h"
#include "global/ValueIdComparators.h"
#include "gtest/gtest.h"
Expand Down Expand Up @@ -82,16 +83,24 @@ struct TestContext {
zz = getId("\"zz\"@en");
blank = Id::makeFromBlankNodeIndex(BlankNodeIndex::make(0));

constexpr auto lit = [](std::string_view s) {
return ad_utility::triple_component::LiteralOrIri::literalWithoutQuotes(
s);
};
constexpr auto iri = [](const std::string& s) {
return ad_utility::triple_component::LiteralOrIri::iriref(s);
};

notInVocabA = Id::makeFromLocalVocabIndex(
localVocab.getIndexAndAddIfNotContained("\"notInVocabA\""));
localVocab.getIndexAndAddIfNotContained(lit("notInVocabA")));
notInVocabB = Id::makeFromLocalVocabIndex(
localVocab.getIndexAndAddIfNotContained("\"notInVocabB\""));
localVocab.getIndexAndAddIfNotContained(lit("notInVocabB")));
notInVocabC = Id::makeFromLocalVocabIndex(
localVocab.getIndexAndAddIfNotContained("<notInVocabC>"));
localVocab.getIndexAndAddIfNotContained(iri("<notInVocabC>")));
notInVocabD = Id::makeFromLocalVocabIndex(
localVocab.getIndexAndAddIfNotContained("<notInVocabD>"));
localVocab.getIndexAndAddIfNotContained(iri("<notInVocabD>")));
notInVocabAelpha = Id::makeFromLocalVocabIndex(
localVocab.getIndexAndAddIfNotContained("\"notInVocabÄlpha\""));
localVocab.getIndexAndAddIfNotContained(lit("notInVocabÄlpha")));

// Set up the `table` that represents the previous partial query results. It
// has five columns/variables: ?ints (only integers), ?doubles (only
Expand Down
5 changes: 3 additions & 2 deletions test/ValueIdTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,9 @@ TEST(ValueId, toDebugString) {
test(ValueId::makeFromBool(false), "Bool:false");
test(ValueId::makeFromBool(true), "Bool:true");
test(makeVocabId(15), "VocabIndex:15");
auto str = ad_utility::triple_component::LiteralOrIri::literalWithoutQuotes("SomeValue");
test(ValueId::makeFromLocalVocabIndex(&str), "LocalVocabIndex:SomeValue");
auto str = ad_utility::triple_component::LiteralOrIri::literalWithoutQuotes(
"SomeValue");
test(ValueId::makeFromLocalVocabIndex(&str), "LocalVocabIndex:\"SomeValue\"");
test(makeTextRecordId(37), "TextRecordIndex:37");
test(makeWordVocabId(42), "WordVocabIndex:42");
test(makeBlankNodeId(27), "BlankNodeIndex:27");
Expand Down
3 changes: 2 additions & 1 deletion test/ValuesTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ TEST(Values, computeResult) {
const auto& table = result->idTable();
Id x = ad_utility::testing::makeGetId(testQec->getIndex())("<x>");
auto I = ad_utility::testing::IntId;
auto l = result->localVocab().getIndexOrNullopt("<y>");
auto l = result->localVocab().getIndexOrNullopt(
ad_utility::triple_component::LiteralOrIri::iriref("<y>"));
ASSERT_TRUE(l.has_value());
auto U = Id::makeUndefined();
ASSERT_EQ(table,
Expand Down
3 changes: 2 additions & 1 deletion test/util/IdTestHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ inline auto LocalVocabId = [](std::integral auto v) {
static ad_utility::Synchronized<LocalVocab> localVocab;
using namespace ad_utility::triple_component;
return Id::makeFromLocalVocabIndex(
localVocab.wlock()->getIndexAndAddIfNotContained(LiteralOrIri::literalWithoutQuotes(std::to_string(v))));
localVocab.wlock()->getIndexAndAddIfNotContained(
LiteralOrIri::literalWithoutQuotes(std::to_string(v))));
};

inline auto TextRecordId = [](const auto& t) {
Expand Down

0 comments on commit 3270bcc

Please sign in to comment.