From d231e678236429385b37eef6795da7835f4cffa6 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Sat, 2 Dec 2023 04:01:51 +0100 Subject: [PATCH] Give each index a unique ID The ID is a string that can be obtained via the API with cmd=get-index-id. It is currently just a concatenation of the index name and several statistics on the number of triples (which is reasonably but not perfectly unique). This feature is useful for applications like https://github.com/ad-freiburg/qlever-petrimaps, which have an internal cache that depends on the index. When the index changes, the application should be able to notice this an clear its cache. With an index ID it can. --- src/engine/Server.cpp | 4 ++++ src/index/Index.cpp | 3 +++ src/index/Index.h | 2 ++ src/index/IndexImpl.cpp | 9 ++++++++- src/index/IndexImpl.h | 3 +++ test/IndexTest.cpp | 13 +++++++++++++ 6 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index e5008fe8ec..4d5c3dabff 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -312,6 +312,10 @@ Awaitable Server::process( } else if (auto cmd = checkParameter("cmd", "get-settings")) { logCommand(cmd, "get server settings"); response = createJsonResponse(RuntimeParameters().toMap(), request); + } else if (auto cmd = checkParameter("cmd", "get-index-id")) { + logCommand(cmd, "get index ID"); + response = createOkResponse(index_.getIndexId(), request, + ad_utility::MediaType::textPlain); } else if (auto cmd = checkParameter("cmd", "dump-active-queries", accessTokenOk)) { logCommand(cmd, "dump active queries"); diff --git a/src/index/Index.cpp b/src/index/Index.cpp index 419320737b..c82852da3b 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -266,6 +266,9 @@ const std::string& Index::getTextName() const { return pimpl_->getTextName(); } // ____________________________________________________________________________ const std::string& Index::getKbName() const { return pimpl_->getKbName(); } +// ____________________________________________________________________________ +const std::string& Index::getIndexId() const { return pimpl_->getIndexId(); } + // ____________________________________________________________________________ Index::NumNormalAndInternal Index::numTriples() const { return pimpl_->numTriples(); diff --git a/src/index/Index.h b/src/index/Index.h index bfe56f0cd7..10b9c0e1dc 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -237,6 +237,8 @@ class Index { const std::string& getKbName() const; + const std::string& getIndexId() const; + NumNormalAndInternal numTriples() const; size_t getNofTextRecords() const; diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 6c37326691..09331b323d 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -1025,12 +1025,19 @@ void IndexImpl::readConfiguration() { }; loadDataMember("has-all-permutations", loadAllPermutations_, true); - loadDataMember("num-predicates-normal", numPredicatesNormal_); // These might be missing if there are only two permutations. loadDataMember("num-subjects-normal", numSubjectsNormal_, 0); loadDataMember("num-objects-normal", numObjectsNormal_, 0); loadDataMember("num-triples-normal", numTriplesNormal_); + + // Compute unique ID for this index. + // + // TODO: This is a simplistic way. It would be better to incorporate bytes + // from the index files. + indexId_ = absl::StrCat("#", getKbName(), ".", numTriplesNormal_, ".", + numSubjectsNormal_, ".", numPredicatesNormal_, ".", + numObjectsNormal_); } // ___________________________________________________________________________ diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 49d133d548..3fa2e0e959 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -158,6 +158,7 @@ class IndexImpl { size_t numPredicatesNormal_ = 0; size_t numObjectsNormal_ = 0; size_t numTriplesNormal_ = 0; + string indexId_; /** * @brief Maps pattern ids to sets of predicate ids. */ @@ -421,6 +422,8 @@ class IndexImpl { const string& getKbName() const { return pso_.metaData().getName(); } + const string& getIndexId() const { return indexId_; } + size_t getNofTextRecords() const { return textMeta_.getNofTextRecords(); } size_t getNofWordPostings() const { return textMeta_.getNofWordPostings(); } size_t getNofEntityPostings() const { diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index a4dc459999..5edd4c6044 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -215,6 +215,19 @@ TEST(IndexTest, createFromOnDiskIndexTest) { ASSERT_TRUE(index.POS().metaData().getMetaData(b2).isFunctional()); }; +TEST(IndexTest, indexId) { + std::string kb = + " .\n" + " .\n" + " .\n" + " ."; + // Build index with all permutations (arg 2) and no patterns (arg 3). That + // way, we get four triples, two distinct subjects, one distinct predicate + // and two distinct objects. + const Index& index = getQec(kb, true, false)->getIndex(); + ASSERT_EQ(index.getIndexId(), "#.4.3.1.2"); +} + TEST(IndexTest, scanTest) { auto testWithAndWithoutPrefixCompression = [](bool useCompression) { using enum Permutation::Enum;