diff --git a/src/global/IdTriple.h b/src/global/IdTriple.h new file mode 100644 index 0000000000..425fec2d10 --- /dev/null +++ b/src/global/IdTriple.h @@ -0,0 +1,12 @@ +// Copyright 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Authors: Hannah Bast + +#pragma once + +#include + +#include "global/Id.h" + +// Should we have an own class for this? We need this at several places. +using IdTriple = std::array; diff --git a/src/global/ValueId.h b/src/global/ValueId.h index c741b6dd87..27b0263716 100644 --- a/src/global/ValueId.h +++ b/src/global/ValueId.h @@ -326,10 +326,15 @@ class ValueId { /// This operator is only for debugging and testing. It returns a /// human-readable representation. friend std::ostream& operator<<(std::ostream& ostr, const ValueId& id) { - ostr << toString(id.getDatatype()) << ':'; + ostr << toString(id.getDatatype())[0] << ':'; + if (id.getDatatype() == Datatype::Undefined) { + return ostr << id.getBits(); + } + auto visitor = [&ostr](T&& value) { if constexpr (ad_utility::isSimilar) { - ostr << "Undefined"; + // already handled above + AD_FAIL(); } else if constexpr (ad_utility::isSimilar || ad_utility::isSimilar) { ostr << std::to_string(value); diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 059b97cf3e..6d23931c0e 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -679,10 +679,12 @@ IndexImpl::createPermutations(size_t numColumns, auto&& sortedTriples, } // ________________________________________________________________________ -size_t IndexImpl::createPermutationPair(size_t numColumns, auto&& sortedTriples, +template +size_t IndexImpl::createPermutationPair(size_t numColumns, + SortedTriplesType&& sortedTriples, const Permutation& p1, const Permutation& p2, - auto&&... perTripleCallbacks) { + CallbackTypes&&... perTripleCallbacks) { auto [numDistinctC0, metaData1, metaData2] = createPermutations( numColumns, AD_FWD(sortedTriples), p1, p2, AD_FWD(perTripleCallbacks)...); // Set the name of this newly created pair of `IndexMetaData` objects. diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 46c01e47c6..1d563f21a6 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -490,17 +490,20 @@ class IndexImpl { // OSP-OPS, SPO-SOP). First creates the permutation and then exchanges the // multiplicities and also writes the MetaData to disk. So we end up with // fully functional permutations. + // + // TODO: The rest of this comment looks outdated. + // // performUnique must be set for the first pair created using vec to enforce // RDF standard (no duplicate triples). // createPatternsAfterFirst is only valid when the pair is SPO-SOP because // the SPO permutation is also needed for patterns (see usage in // IndexImpl::createFromFile function) - [[nodiscard]] size_t createPermutationPair(size_t numColumns, - auto&& sortedTriples, - const Permutation& p1, - const Permutation& p2, - auto&&... perTripleCallbacks); + template + [[nodiscard]] size_t createPermutationPair( + size_t numColumns, SortedTriplesType&& sortedTriples, + const Permutation& p1, const Permutation& p2, + CallbackTypes&&... perTripleCallbacks); // wrapper for createPermutation that saves a lot of code duplications // Writes the permutation that is specified by argument permutation diff --git a/src/index/IndexMetaData.h b/src/index/IndexMetaData.h index fca769d644..32c5100f97 100644 --- a/src/index/IndexMetaData.h +++ b/src/index/IndexMetaData.h @@ -1,6 +1,7 @@ // Copyright 2015, University of Freiburg, // Chair of Algorithms and Data Structures. // Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de) + #pragma once #include @@ -13,14 +14,14 @@ #include #include -#include "../global/Id.h" -#include "../util/File.h" -#include "../util/HashMap.h" -#include "../util/MmapVector.h" -#include "../util/ReadableNumberFact.h" -#include "../util/Serializer/Serializer.h" -#include "./MetaDataHandler.h" -#include "CompressedRelation.h" +#include "global/Id.h" +#include "index/CompressedRelation.h" +#include "index/MetaDataHandler.h" +#include "util/File.h" +#include "util/HashMap.h" +#include "util/MmapVector.h" +#include "util/ReadableNumberFact.h" +#include "util/Serializer/Serializer.h" using std::array; using std::pair; @@ -218,7 +219,8 @@ template ad_utility::File& operator<<(ad_utility::File& f, const IndexMetaData& imd); -// aliases for easier use in Index class +// Aliases for easier use in classes that build or query permutations, like +// `IndexImpl`. using MetaWrapperMmap = MetaDataWrapperDense>; using MetaWrapperMmapView = MetaDataWrapperDense< diff --git a/src/index/MetaDataHandler.h b/src/index/MetaDataHandler.h index da84f1158a..6de4b6a856 100644 --- a/src/index/MetaDataHandler.h +++ b/src/index/MetaDataHandler.h @@ -1,24 +1,40 @@ -// Copyright 2018, University of Freiburg, +// Copyright 2018 - 2023, University of Freiburg // Chair of Algorithms and Data Structures -// Author: Johannes Kalmbach (johannes.kalmbach@gmail.com) -// +// Authors: Johannes Kalmbach +// Hannah Bast + #pragma once #include #include -#include "../global/Id.h" -#include "../util/Exception.h" -#include "../util/HashMap.h" -#include "../util/Iterators.h" -#include "../util/Log.h" -#include "../util/Serializer/Serializer.h" -#include "./CompressedRelation.h" - -// _____________________________________________________________________ +#include "global/Id.h" +#include "index/CompressedRelation.h" +#include "util/Exception.h" +#include "util/HashMap.h" +#include "util/Iterators.h" +#include "util/Log.h" +#include "util/Serializer/Serializer.h" + +// Wrapper class for access to `CompressedRelationMetadata` objects (one per +// relation) stored in a vector. Specifically, our index uses this with `M = +// MmapVector>`; see `index/IndexMetaData.h` at the +// bottom. +// +// TODO: We needed this at some point because we used to have two implementation +// of `IndexMetaData`, one using mmaps and one using hash maps, and we wanted to +// have a common interface for both. We no longer use the hash map +// implementation and so the wrapper class (and the complexity that goes along +// with it) is probably no longer needed. template class MetaDataWrapperDense { + private: + // A vector of metadata objects. + M _vec; + public: + // An iterator with an additional method `getId()` that gives the relation ID + // of the current metadata object. template struct AddGetIdIterator : BaseIterator { using BaseIterator::BaseIterator; @@ -39,6 +55,7 @@ class MetaDataWrapperDense { // The underlying array is sorted, so all iterators are ordered iterators using ConstOrderedIterator = ConstIterator; + // The type of the stored metadata objects. using value_type = typename M::value_type; // _________________________________________________________ @@ -88,7 +105,7 @@ class MetaDataWrapperDense { // ____________________________________________________________ void set(Id id, const value_type& value) { - // Assert that the ids are ascending. + // Check that the `Id`s are added in strictly ascending order. AD_CONTRACT_CHECK(_vec.size() == 0 || _vec.back().col0Id_ < id); _vec.push_back(value); } @@ -116,5 +133,10 @@ class MetaDataWrapperDense { }; return std::lower_bound(_vec.begin(), _vec.end(), id, cmp); } - M _vec; + Iterator lower_bound(Id id) { + auto cmp = [](const auto& metaData, Id id) { + return metaData.col0Id_ < id; + }; + return std::lower_bound(_vec.begin(), _vec.end(), id, cmp); + } }; diff --git a/test/CompressedRelationsTest.cpp b/test/CompressedRelationsTest.cpp index d333702227..c5e79c510b 100644 --- a/test/CompressedRelationsTest.cpp +++ b/test/CompressedRelationsTest.cpp @@ -507,5 +507,5 @@ TEST(CompressedRelationReader, PermutedTripleToString) { auto tr = CompressedBlockMetadata::PermutedTriple{V(12), V(13), V(27)}; std::stringstream str; str << tr; - ASSERT_EQ(str.str(), "Triple: VocabIndex:12 VocabIndex:13 VocabIndex:27\n"); + ASSERT_EQ(str.str(), "Triple: V:12 V:13 V:27\n"); } diff --git a/test/SparqlExpressionTypesTest.cpp b/test/SparqlExpressionTypesTest.cpp index a05d650185..e95ba962f4 100644 --- a/test/SparqlExpressionTypesTest.cpp +++ b/test/SparqlExpressionTypesTest.cpp @@ -34,7 +34,7 @@ TEST(SparqlExpressionTypes, printIdOrString) { IdOrLiteralOrIri idOrString{Id::makeUndefined()}; PrintTo(idOrString, &str); - ASSERT_EQ(str.str(), "Undefined:Undefined"); + ASSERT_EQ(str.str(), "U:0"); idOrString = LiteralOrIri::literalWithoutQuotes("bimm"); // Clear the stringstream. str.str({}); diff --git a/test/ValueIdTest.cpp b/test/ValueIdTest.cpp index 58c02c2219..341d988e3a 100644 --- a/test/ValueIdTest.cpp +++ b/test/ValueIdTest.cpp @@ -289,21 +289,27 @@ TEST(ValueId, toDebugString) { stream << id; ASSERT_EQ(stream.str(), expected); }; - test(ValueId::makeUndefined(), "Undefined:Undefined"); - test(ValueId::makeFromInt(-42), "Int:-42"); - test(ValueId::makeFromDouble(42.0), "Double:42.000000"); - test(ValueId::makeFromBool(false), "Bool:false"); - test(ValueId::makeFromBool(true), "Bool:true"); - test(makeVocabId(15), "VocabIndex:15"); + test(ValueId::makeUndefined(), "U:0"); + // Values with type undefined can usually only have one value (all data bits + // zero). Sometimes ValueIds with type undefined but non-zero data bits are + // used. The following test tests one of these internal ValueIds. + ValueId customUndefined = ValueId::fromBits( + ValueId::IntegerType::fromNBit(100) | + (static_cast(Datatype::Undefined) << ValueId::numDataBits)); + test(customUndefined, "U:100"); + test(ValueId::makeFromDouble(42.0), "D:42.000000"); + test(ValueId::makeFromBool(false), "B:false"); + test(ValueId::makeFromBool(true), "B:true"); + test(makeVocabId(15), "V:15"); auto str = ad_utility::triple_component::LiteralOrIri::literalWithoutQuotes( "SomeValue"); - test(ValueId::makeFromLocalVocabIndex(&str), "LocalVocabIndex:\"SomeValue\""); - test(makeTextRecordId(37), "TextRecordIndex:37"); - test(makeWordVocabId(42), "WordVocabIndex:42"); - test(makeBlankNodeId(27), "BlankNodeIndex:27"); + test(ValueId::makeFromLocalVocabIndex(&str), "L:\"SomeValue\""); + test(makeTextRecordId(37), "T:37"); + test(makeWordVocabId(42), "W:42"); + test(makeBlankNodeId(27), "B:27"); test(ValueId::makeFromDate( DateOrLargeYear{123456, DateOrLargeYear::Type::Year}), - "Date:123456"); + "D:123456"); // make an ID with an invalid datatype ASSERT_ANY_THROW(test(ValueId::max(), "blim")); } diff --git a/test/ValuesForTestingTest.cpp b/test/ValuesForTestingTest.cpp index b47e5d3b38..95ce76ce46 100644 --- a/test/ValuesForTestingTest.cpp +++ b/test/ValuesForTestingTest.cpp @@ -27,8 +27,7 @@ TEST(ValuesForTesting, valuesForTesting) { ASSERT_THAT( v.getCacheKey(), - ::testing::StartsWith( - "Values for testing with 2 columns. VocabIndex:3 VocabIndex:12")); + ::testing::StartsWith("Values for testing with 2 columns. V:3 V:12")); ASSERT_THAT(v.getCacheKey(), ::testing::EndsWith("Supports limit: 0")); ASSERT_EQ(v.getDescriptor(), "explicit values for testing"); ASSERT_TRUE(v.resultSortedOn().empty());