From 84a4bdf9ee790390ffaea5d8eeebfbb52f61f8c7 Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@cs.uni-freiburg.de>
Date: Fri, 9 Jun 2023 16:30:57 +0200
Subject: [PATCH 1/4] Code for locating triples in an existing index

This is the first part of a series of PRs split of from the large
proof-of-concept PR https://github.com/ad-freiburg/qlever/pull/916,
which realizes SPARQL 1.1 Update
---
 src/global/IdTriple.h          |  18 ++
 src/index/CMakeLists.txt       |   1 +
 src/index/CompressedRelation.h |   1 +
 src/index/IndexMetaData.h      |  20 +-
 src/index/LocatedTriples.cpp   | 349 +++++++++++++++++++++++++++++++++
 src/index/LocatedTriples.h     | 196 ++++++++++++++++++
 src/index/MetaDataHandler.h    |  51 +++--
 test/CMakeLists.txt            |   2 +
 test/LocatedTriplesTest.cpp    | 173 ++++++++++++++++
 9 files changed, 789 insertions(+), 22 deletions(-)
 create mode 100644 src/global/IdTriple.h
 create mode 100644 src/index/LocatedTriples.cpp
 create mode 100644 src/index/LocatedTriples.h
 create mode 100644 test/LocatedTriplesTest.cpp
diff --git a/src/global/IdTriple.h b/src/global/IdTriple.h
new file mode 100644
index 0000000000..0353b8c747
--- /dev/null
+++ b/src/global/IdTriple.h
@@ -0,0 +1,18 @@
+// Copyright 2023, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Hannah Bast <bast@cs.uni-freiburg.de>
+
+#pragma once
+
+#include <array>
+
+#include "global/Id.h"
+
+// Should we have an own class for this? We need this at several places.
+using IdTriple = std::array<Id, 3>;
+
+// Hash value for such triple.
+template <typename H>
+H AbslHashValue(H h, const IdTriple& triple) {
+  return H::combine(std::move(h), triple[0], triple[1], triple[2]);
+}
diff --git a/src/index/CMakeLists.txt b/src/index/CMakeLists.txt
index 4bbf53f647..fd65af2bd4 100644
--- a/src/index/CMakeLists.txt
+++ b/src/index/CMakeLists.txt
@@ -8,6 +8,7 @@ add_library(index
         VocabularyOnDisk.h VocabularyOnDisk.cpp
         IndexMetaData.h IndexMetaDataImpl.h
         MetaDataHandler.h
+        LocatedTriples.h LocatedTriples.cpp
         StxxlSortFunctors.h
         TextMetaData.cpp TextMetaData.h
         DocsDB.cpp DocsDB.h
diff --git a/src/index/CompressedRelation.h b/src/index/CompressedRelation.h
index 3c6c5df80a..63d39a28ba 100644
--- a/src/index/CompressedRelation.h
+++ b/src/index/CompressedRelation.h
@@ -305,6 +305,7 @@ class CompressedRelationReader {
   static void decompressColumn(const std::vector<char>& compressedColumn,
                                size_t numRowsToRead, Iterator iterator);
 
+ public:
   // Read the block that is identified by the `blockMetaData` from the `file`,
   // decompress and return it.
   // If `columnIndices` is `nullopt`, then all columns of the block are read,
diff --git a/src/index/IndexMetaData.h b/src/index/IndexMetaData.h
index 4e3ef4b38f..3039c0ba28 100644
--- a/src/index/IndexMetaData.h
+++ b/src/index/IndexMetaData.h
@@ -1,6 +1,7 @@
 // Copyright 2015, University of Freiburg,
 // Chair of Algorithms and Data Structures.
 // Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de)
+
 #pragma once
 
 #include <stdio.h>
@@ -13,14 +14,14 @@
 #include <utility>
 #include <vector>
 
-#include "../global/Id.h"
-#include "../util/File.h"
-#include "../util/HashMap.h"
-#include "../util/MmapVector.h"
-#include "../util/ReadableNumberFact.h"
-#include "../util/Serializer/Serializer.h"
-#include "./MetaDataHandler.h"
-#include "CompressedRelation.h"
+#include "global/Id.h"
+#include "index/CompressedRelation.h"
+#include "index/MetaDataHandler.h"
+#include "util/File.h"
+#include "util/HashMap.h"
+#include "util/MmapVector.h"
+#include "util/ReadableNumberFact.h"
+#include "util/Serializer/Serializer.h"
 
 using std::array;
 using std::pair;
@@ -86,7 +87,10 @@ class IndexMetaData {
   // name and the variable name are terrible.
 
   // For each relation, its meta data.
+ public:
   MapType _data;
+
+ private:
   // For each compressed block, its meta data.
   BlocksType _blockData;
 
diff --git a/src/index/LocatedTriples.cpp b/src/index/LocatedTriples.cpp
new file mode 100644
index 0000000000..acd6988675
--- /dev/null
+++ b/src/index/LocatedTriples.cpp
@@ -0,0 +1,349 @@
+// Copyright 2023, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Hannah Bast <bast@cs.uni-freiburg.de>
+
+#include "index/LocatedTriples.h"
+
+#include <algorithm>
+
+#include "index/CompressedRelation.h"
+#include "index/IndexMetaData.h"
+#include "index/Permutations.h"
+
+// ____________________________________________________________________________
+LocatedTriple LocatedTriple::locateTripleInPermutation(
+    Id id1, Id id2, Id id3, const Permutation& permutation) {
+  // Get the internal data structures from the permutation.
+  auto& file = permutation._file;
+  const auto& meta = permutation._meta;
+  const auto& reader = permutation._reader;
+
+  // Find the index of the first block where the last triple is not smaller.
+  //
+  // NOTE: Since `_col2LastId` has been added to `CompressedBlockMetadata`, this
+  // can be computed without having to decompress any blocks.
+  const vector<CompressedBlockMetadata>& blocks = meta.blockData();
+  auto matchingBlock = std::lower_bound(
+      blocks.begin(), blocks.end(), std::array<Id, 3>{id1, id2, id3},
+      [&](const CompressedBlockMetadata& block, const auto& triple) -> bool {
+        if (block.col0LastId_ < triple[0]) {
+          return true;
+        } else if (block.col0LastId_ == triple[0]) {
+          if (block.col1LastId_ < triple[1]) {
+            return true;
+          } else if (block.col1LastId_ == triple[1]) {
+            return block.col2LastId_ < triple[2];
+          }
+        }
+        return false;
+      });
+  size_t blockIndex = matchingBlock - blocks.begin();
+
+  // Preliminary `FindTripleResult` object with the correct `blockIndex` and
+  // `Id`s, and a special `rowIndexInBlock` (see below) and `existsInIndex` set
+  // to `false`.
+  LocatedTriple locatedTriple{blockIndex, NO_ROW_INDEX, id1, id2, id3, false};
+
+  // If all `Id`s from all blocks are smaller, we return the index of the last
+  // block plus one (typical "end" semantics) and the special row index
+  // `NO_ROW_INDEX` (see how this is considered in `mergeTriples`).
+  if (matchingBlock == blocks.end()) {
+    AD_CORRECTNESS_CHECK(blockIndex == blocks.size());
+    return locatedTriple;
+  }
+
+  // Read and decompress the block.
+  DecompressedBlock blockTuples =
+      reader.readAndDecompressBlock(*matchingBlock, file, std::nullopt);
+
+  // Find the smallest relation `Id` that is not smaller than `id1` and get its
+  // metadata and the position of the first and last triple with that `Id` in
+  // the block.
+  //
+  // IMPORTANT: If relation `id1` exists in the index, but our triple is larger
+  // than all triples of that relation in the index and the last triple of that
+  // relation ends a block, then our block search above (correctly) landed us at
+  // the next block. We can detect this by checking whether the first relation
+  // `Id` of the block is larger than `id1` and then we should get the metadata
+  // for the `Id` and not for `id1` (which would pertain to a previous block).
+  //
+  // TODO: There is still a bug in `MetaDataWrapperHashMap::lower_bound`,
+  // which is relevant in the rare case where a triple is inserted with an
+  // `Id` for predicate that is not a new `Id`, but has not been used for a
+  // predicate in the original index.
+  //
+  // NOTE: Since we have already handled the case, where all `Id`s in the
+  // permutation are smaller, above, such a relation should exist.
+  Id searchId =
+      matchingBlock->col0FirstId_ > id1 ? matchingBlock->col0FirstId_ : id1;
+  const auto& it = meta._data.lower_bound(searchId);
+  AD_CORRECTNESS_CHECK(it != meta._data.end());
+  Id id = it.getId();
+  const auto& relationMetadata = meta.getMetaData(id);
+  size_t offsetBegin = relationMetadata.offsetInBlock_;
+  size_t offsetEnd = offsetBegin + relationMetadata.numRows_;
+  // Note: If the relation spans multiple blocks, we know that the block we
+  // found above contains only triples from that relation.
+  if (offsetBegin == std::numeric_limits<uint64_t>::max()) {
+    offsetBegin = 0;
+    offsetEnd = blockTuples.size();
+  }
+  AD_CORRECTNESS_CHECK(offsetBegin <= blockTuples.size());
+  AD_CORRECTNESS_CHECK(offsetEnd <= blockTuples.size());
+
+  // If we have found `id1`, we can do a binary search in the portion of the
+  // block that pertains to it (note the special case mentioned above, where
+  // we are already at the beginning of the next block).
+  //
+  // Otherwise, `id` is the next larger `Id` and the position of the first
+  // triple of that relation is exactly the position we are looking for.
+  if (id == id1) {
+    locatedTriple.rowIndexInBlock =
+        std::lower_bound(blockTuples.begin() + offsetBegin,
+                         blockTuples.begin() + offsetEnd,
+                         std::array<Id, 2>{id2, id3},
+                         [](const auto& a, const auto& b) {
+                           return a[0] < b[0] || (a[0] == b[0] && a[1] < b[1]);
+                         }) -
+        blockTuples.begin();
+    // Check if the triple at the found position is equal to `id1 id2 id3`.
+    // Note that our default for `existsInIndex` was set to `false` above.
+    const size_t& i = locatedTriple.rowIndexInBlock;
+    AD_CORRECTNESS_CHECK(i < blockTuples.size());
+    if (i < offsetEnd && blockTuples(i, 0) == id2 && blockTuples(i, 1) == id3) {
+      locatedTriple.existsInIndex = true;
+    }
+  } else {
+    AD_CORRECTNESS_CHECK(id1 < id);
+    locatedTriple.rowIndexInBlock = offsetBegin;
+  }
+
+  // Return the result.
+  return locatedTriple;
+}
+
+// ____________________________________________________________________________
+template <LocatedTriplesPerBlock::MatchMode matchMode>
+std::pair<size_t, size_t> LocatedTriplesPerBlock::numTriplesImpl(
+    size_t blockIndex, Id id1, Id id2) const {
+  // If no located triples for `blockIndex` exist, there is no entry in `map_`.
+  if (!map_.contains(blockIndex)) {
+    return {0, 0};
+  }
+
+  // Otherwise iterate over all located triples and count how many of them exist
+  // in the index ("to be deleted") and how many are new ("to be inserted").
+  size_t countExists = 0;
+  size_t countNew = 0;
+  for (const LocatedTriple& locatedTriple : map_.at(blockIndex)) {
+    // Helper lambda for increasing the right counter.
+    auto increaseCountIf = [&](bool increase) {
+      if (increase) {
+        if (locatedTriple.existsInIndex) {
+          ++countExists;
+        } else {
+          ++countNew;
+        }
+      }
+    };
+    // Increase depending on the mode.
+    if constexpr (matchMode == MatchMode::MatchAll) {
+      increaseCountIf(true);
+    } else if constexpr (matchMode == MatchMode::MatchId1) {
+      increaseCountIf(locatedTriple.id1 == id1);
+    } else if constexpr (matchMode == MatchMode::MatchId1AndId2) {
+      increaseCountIf(locatedTriple.id1 == id1 && locatedTriple.id2 == id2);
+    }
+  }
+  return {countNew, countExists};
+}
+
+// ____________________________________________________________________________
+std::pair<size_t, size_t> LocatedTriplesPerBlock::numTriples(
+    size_t blockIndex) const {
+  return numTriplesImpl<MatchMode::MatchAll>(blockIndex);
+}
+
+// ____________________________________________________________________________
+std::pair<size_t, size_t> LocatedTriplesPerBlock::numTriples(size_t blockIndex,
+                                                             Id id1) const {
+  return numTriplesImpl<MatchMode::MatchId1>(blockIndex, id1);
+}
+
+// ____________________________________________________________________________
+std::pair<size_t, size_t> LocatedTriplesPerBlock::numTriples(size_t blockIndex,
+                                                             Id id1,
+                                                             Id id2) const {
+  return numTriplesImpl<MatchMode::MatchId1AndId2>(blockIndex, id1, id2);
+}
+
+// ____________________________________________________________________________
+template <LocatedTriplesPerBlock::MatchMode matchMode>
+size_t LocatedTriplesPerBlock::mergeTriples(size_t blockIndex,
+                                            std::optional<IdTable> block,
+                                            IdTable& result,
+                                            size_t offsetInResult, Id id1,
+                                            Id id2, size_t rowIndexInBlockBegin,
+                                            size_t rowIndexInBlockEnd) const {
+  // This method should only be called if there are located triples in the
+  // specified block.
+  AD_CONTRACT_CHECK(map_.contains(blockIndex));
+
+  // The special case `block == std::nullopt` (write only located triples to
+  // `result`) is only allowed, when `id1` or `id1` and `id2` are specified.
+  AD_CONTRACT_CHECK(block.has_value() || matchMode != MatchMode::MatchAll);
+
+  // If `rowIndexInBlockEnd` has the default value (see `LocatedTriples.h`), the
+  // intended semantics is that we read the whole block (note that we can't have
+  // a default value that depends on the values of previous arguments).
+  if (rowIndexInBlockEnd == LocatedTriple::NO_ROW_INDEX && block.has_value()) {
+    rowIndexInBlockEnd = block.value().size();
+  }
+
+  // Check that `rowIndexInBlockBegin` and `rowIndexInBlockEnd` define a valid
+  // and non-emtpy range and that it is a subrange of `block` (unless the latter
+  // is `std::nullopt`).
+  if (block.has_value()) {
+    AD_CONTRACT_CHECK(rowIndexInBlockBegin < block.value().size());
+    AD_CONTRACT_CHECK(rowIndexInBlockEnd <= block.value().size());
+  }
+  AD_CONTRACT_CHECK(rowIndexInBlockBegin < rowIndexInBlockEnd);
+
+  // If we restrict `id1` and `id2`, the index block and the result must have
+  // one column (for the `id3`). Otherwise, they must have two columns (for the
+  // `id2` and the `id3`).
+  if constexpr (matchMode == MatchMode::MatchId1AndId2) {
+    AD_CONTRACT_CHECK(!block.has_value() || block.value().numColumns() == 1);
+    AD_CONTRACT_CHECK(result.numColumns() == 1);
+  } else {
+    AD_CONTRACT_CHECK(!block.has_value() || block.value().numColumns() == 2);
+    AD_CONTRACT_CHECK(result.numColumns() == 2);
+  }
+
+  auto resultEntry = result.begin() + offsetInResult;
+  const auto& locatedTriples = map_.at(blockIndex);
+  auto locatedTriple = locatedTriples.begin();
+
+  // Helper lambda that checks whether the given located triple should be
+  // considered, given the `matchMode`.
+  auto locatedTripleMatches = [&]() {
+    if constexpr (matchMode == MatchMode::MatchAll) {
+      return true;
+    } else if constexpr (matchMode == MatchMode::MatchId1) {
+      return locatedTriple->id1 == id1;
+    } else if constexpr (matchMode == MatchMode::MatchId1AndId2) {
+      return locatedTriple->id1 == id1 && locatedTriple->id2 == id2;
+    }
+  };
+
+  // Advance to the first located triple in the specified range.
+  while (locatedTriple != locatedTriples.end() &&
+         locatedTriple->rowIndexInBlock < rowIndexInBlockBegin) {
+    ++locatedTriple;
+  }
+
+  // Iterate over all located triples in the specified range. In the special
+  // case `block == std::nullopt` (only write located triples to `result`), all
+  // relevant located triples have `rowIndexInBlock == NO_ROW_INDEX` (here we
+  // need that `NO_ROW_INDEX` is the maximal `size_t` value minus one).
+  if (!block.has_value()) {
+    rowIndexInBlockBegin = LocatedTriple::NO_ROW_INDEX;
+    rowIndexInBlockEnd = rowIndexInBlockBegin + 1;
+    AD_CORRECTNESS_CHECK(rowIndexInBlockBegin < rowIndexInBlockEnd);
+  }
+  for (size_t rowIndex = rowIndexInBlockBegin; rowIndex < rowIndexInBlockEnd;
+       ++rowIndex) {
+    // Append triples that are marked for insertion at this `rowIndex` to the
+    // result.
+    while (locatedTriple != locatedTriples.end() &&
+           locatedTriple->rowIndexInBlock == rowIndex &&
+           locatedTriple->existsInIndex == false) {
+      if (locatedTripleMatches()) {
+        if constexpr (matchMode == MatchMode::MatchId1AndId2) {
+          (*resultEntry)[0] = locatedTriple->id3;
+        } else {
+          (*resultEntry)[0] = locatedTriple->id2;
+          (*resultEntry)[1] = locatedTriple->id3;
+        }
+        ++resultEntry;
+      }
+      ++locatedTriple;
+    }
+
+    // Append the triple at this position to the result if and only if it is not
+    // marked for deletion and matches (also skip it if it does not match).
+    bool deleteThisEntry = false;
+    if (locatedTriple != locatedTriples.end() &&
+        locatedTriple->rowIndexInBlock == rowIndex &&
+        locatedTriple->existsInIndex == true) {
+      deleteThisEntry = locatedTripleMatches();
+      ++locatedTriple;
+    }
+    if (block.has_value() && !deleteThisEntry) {
+      *resultEntry++ = block.value()[rowIndex];
+    }
+  };
+
+  // Return the number of rows written to `result`.
+  return resultEntry - (result.begin() + offsetInResult);
+}
+
+// ____________________________________________________________________________
+size_t LocatedTriplesPerBlock::mergeTriples(size_t blockIndex,
+                                            std::optional<IdTable> block,
+                                            IdTable& result,
+                                            size_t offsetInResult) const {
+  return mergeTriples<MatchMode::MatchAll>(blockIndex, std::move(block), result,
+                                           offsetInResult);
+}
+
+// ____________________________________________________________________________
+size_t LocatedTriplesPerBlock::mergeTriples(size_t blockIndex,
+                                            std::optional<IdTable> block,
+                                            IdTable& result,
+                                            size_t offsetInResult, Id id1,
+                                            size_t rowIndexInBlockBegin) const {
+  return mergeTriples<MatchMode::MatchId1>(
+      blockIndex, std::move(block), result, offsetInResult, id1,
+      Id::makeUndefined(), rowIndexInBlockBegin);
+}
+
+// ____________________________________________________________________________
+size_t LocatedTriplesPerBlock::mergeTriples(size_t blockIndex,
+                                            std::optional<IdTable> block,
+                                            IdTable& result,
+                                            size_t offsetInResult, Id id1,
+                                            Id id2, size_t rowIndexInBlockBegin,
+                                            size_t rowIndexInBlockEnd) const {
+  return mergeTriples<MatchMode::MatchId1AndId2>(
+      blockIndex, std::move(block), result, offsetInResult, id1, id2,
+      rowIndexInBlockBegin, rowIndexInBlockEnd);
+}
+
+// ____________________________________________________________________________
+std::ostream& operator<<(std::ostream& os, const LocatedTriple& lt) {
+  os << "LT(" << lt.blockIndex << " "
+     << (lt.rowIndexInBlock == LocatedTriple::NO_ROW_INDEX
+             ? "NO_ROW_INDEX"
+             : std::to_string(lt.rowIndexInBlock))
+     << " " << lt.id1 << " " << lt.id2 << " " << lt.id3 << " "
+     << lt.existsInIndex << ")";
+  return os;
+}
+
+// ____________________________________________________________________________
+std::ostream& operator<<(std::ostream& os, const LocatedTriples& lts) {
+  os << "{";
+  std::copy(lts.begin(), lts.end(),
+            std::ostream_iterator<LocatedTriple>(std::cout, " "));
+  os << "}";
+  return os;
+}
+
+// ____________________________________________________________________________
+std::ostream& operator<<(std::ostream& os, const LocatedTriplesPerBlock& ltpb) {
+  for (auto [blockIndex, lts] : ltpb.map_) {
+    os << "Block #" << blockIndex << ": " << lts << std::endl;
+  }
+  return os;
+}
diff --git a/src/index/LocatedTriples.h b/src/index/LocatedTriples.h
new file mode 100644
index 0000000000..bb967bfe95
--- /dev/null
+++ b/src/index/LocatedTriples.h
@@ -0,0 +1,196 @@
+// Copyright 2023, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Authors: Hannah Bast <bast@cs.uni-freiburg.de>
+
+#pragma once
+
+#include "engine/idTable/IdTable.h"
+#include "global/IdTriple.h"
+#include "util/HashMap.h"
+
+class Permutation;
+
+// A triple and its location in a particular permutation.
+//
+// If a triple is not contained in the permutation, the location is the location
+// of the next larger triple (which may be in the next block or beyond the last
+// block). For a detailed definition of all border cases, see the definition at
+// the end of this file.
+//
+// NOTE: Technically, `blockIndex` and the `existsInIndex` are redundant in this
+// record because they can be derived when the class is used. However, they are
+// useful for testing, and for a small nuber of delta triples (think millions),
+// space efficiency is not a significant issue for this class.
+struct LocatedTriple {
+  // The index of the block and the location within that block, according to the
+  // definition above.
+  size_t blockIndex;
+  size_t rowIndexInBlock;
+  // The `Id`s of the triple in the order of the permutation. For example,
+  // for an object pertaining to the SPO permutation: `id1` is the subject,
+  // `id2` is the predicate, and `id3` is the object.
+  Id id1;
+  Id id2;
+  Id id3;
+  // Flag that is true if and only if the triple exists in the permutation. It
+  // is then equal to the triple at the position given by `blockIndex` and
+  // `rowIndexInBlock`.
+  bool existsInIndex;
+
+  // Locate the given triple in the given permutation.
+  static LocatedTriple locateTripleInPermutation(
+      Id id1, Id id2, Id id3, const Permutation& permutation);
+
+  // Special row index for triples that belong to the previous block (see the
+  // definition for the location of a triple at the end of this file).
+  //
+  // NOTE: It is important that `NO_ROW_INDEX + 1 > NO_ROW_INDEX`, hence it is
+  // defined as `max() - 1` and not as the seemingly more natural `max()`.
+  static const size_t NO_ROW_INDEX = std::numeric_limits<size_t>::max() - 1;
+};
+
+// A sorted set of located triples. In `LocatedTriplesPerBlock` below, we use
+// this to store all located triples with the same `blockIndex`.
+//
+// NOTE: We could also overload `std::less` here, but the explicit specification
+// of the order makes it clearer.
+struct LocatedTripleCompare {
+  bool operator()(const LocatedTriple& x, const LocatedTriple& y) const {
+    return IdTriple{x.id1, x.id2, x.id3} < IdTriple{y.id1, y.id2, y.id3};
+  }
+};
+using LocatedTriples = std::set<LocatedTriple, LocatedTripleCompare>;
+
+// Sorted sets of located triples, grouped by block. We use this to store all
+// located triples for a permutation.
+class LocatedTriplesPerBlock {
+ private:
+  // The total number of `LocatedTriple` objects stored (for all blocks).
+  size_t numTriples_ = 0;
+
+ public:
+  // For each block with a non-empty set of located triples, the located triples
+  // in that block.
+  //
+  // NOTE: This is currently not private because we want access to
+  // `map_.size()`, `map_.clear()`, `map_.contains(...)`, and `map_.at(...)`.
+  // We could also make `LocatedTriplesPerBlock` a subclass of `HashMap<size_t,
+  // LocatedTriples>`, but not sure whether that is good style.
+  ad_utility::HashMap<size_t, LocatedTriples> map_;
+
+ public:
+  // Get the number of located triples for the given block that match `id1` (if
+  // provided) and `id2` (if provided). The return value is a pair of numbers:
+  // first, the number of existing triples ("to be deleted") and second, the
+  // number of new triples ("to be inserted").
+  std::pair<size_t, size_t> numTriples(size_t blockIndex) const;
+  std::pair<size_t, size_t> numTriples(size_t blockIndex, Id id1) const;
+  std::pair<size_t, size_t> numTriples(size_t blockIndex, Id id1, Id id2) const;
+
+  // Merge located triples for `blockIndex` with the given index `block` and
+  // write to `result`, starting from position `offsetInResult`. Consider only
+  // located triples in the range specified by `rowIndexInBlockBegin` and
+  // `rowIndexInBlockEnd`. Consider only triples that match `id1` (if provided)
+  // and `id2` (if provided). Return the number of rows written to `result`.
+  //
+  // PRECONDITIONS:
+  //
+  // 1. The set of located triples for `blockIndex` must be non-empty.
+  // Otherwise, there is no need for merging and this method shouldn't be
+  // called for efficiency reasons.
+  //
+  // 2. It is the resposibility of the caller that there is enough space for the
+  // result of the merge in `result` starting from `offsetInResult`.
+  //
+  // 3. If `block == std::nullopt`, we are adding to `result` the located
+  // triples for block `blockIndex` where the `rowIndexInBlock` is
+  // `NO_ROW_INDEX`. These actually belong to the previous block, but were
+  // larger than all triples there. This requires that `id1` or both `id1` and
+  // `id2` are specified.
+  //
+  size_t mergeTriples(size_t blockIndex, std::optional<IdTable> block,
+                      IdTable& result, size_t offsetInResult) const;
+  size_t mergeTriples(size_t blockIndex, std::optional<IdTable> block,
+                      IdTable& result, size_t offsetInResult, Id id1,
+                      size_t rowIndexInBlockBegin = 0) const;
+  size_t mergeTriples(
+      size_t blockIndex, std::optional<IdTable> block, IdTable& result,
+      size_t offsetInResult, Id id1, Id id2, size_t rowIndexInBlockBegin = 0,
+      size_t rowIndexInBlockEnd = LocatedTriple::NO_ROW_INDEX) const;
+
+  // Add the given `locatedTriple` to the given `LocatedTriplesPerBlock`.
+  // Return a handle to where it was added (`LocatedTriples` is a sorted set,
+  // see above). We need this handle so that we can easily remove the
+  // `locatedTriple` again from the set in case we need to.
+  //
+  // The `locatedTriple` must not already exist in `LocatedTriplesPerBlock`.
+  LocatedTriples::iterator add(const LocatedTriple& locatedTriple) {
+    LocatedTriples& locatedTriples = map_[locatedTriple.blockIndex];
+    auto [handle, wasInserted] = locatedTriples.emplace(locatedTriple);
+    AD_CORRECTNESS_CHECK(wasInserted == true);
+    AD_CORRECTNESS_CHECK(handle != locatedTriples.end());
+    ++numTriples_;
+    return handle;
+  };
+
+  // Get the total number of `LocatedTriple` objects (for all blocks).
+  size_t numTriples() const { return numTriples_; }
+
+  // Get the number of blocks with a non-empty set of located triples.
+  size_t numBlocks() const { return map_.size(); }
+
+  // Remove all located triples.
+  void clear() {
+    map_.clear();
+    numTriples_ = 0;
+  }
+
+ private:
+  // Match modes for `numTriplesInBlockImpl` and `mergeTriplesIntoBlockImpl`.
+  enum struct MatchMode { MatchAll, MatchId1, MatchId1AndId2 };
+
+  // The Implementation behind the public method `numTriplesInBlock` above.
+  template <MatchMode matchMode>
+  std::pair<size_t, size_t> numTriplesImpl(size_t blockIndex,
+                                           Id id1 = Id::makeUndefined(),
+                                           Id id2 = Id::makeUndefined()) const;
+
+  // The Implementation behind the public method `mergeTriplesIntoBlock` above.
+  // The only reason that the arguments `id1` and `id2` come at the end here is
+  // so that we can give them default values.
+  template <MatchMode matchMode>
+  size_t mergeTriples(
+      size_t blockIndex, std::optional<IdTable> block, IdTable& result,
+      size_t offsetInResult, Id id1 = Id::makeUndefined(),
+      Id id2 = Id::makeUndefined(), size_t rowIndexInBlockBegin = 0,
+      size_t rowIndexInBlockEnd = LocatedTriple::NO_ROW_INDEX) const;
+};
+
+// Human-readable representation of `LocatedTriple`, `LocatedTriples`, and
+// `LocatedTriplesPerBlock`, which are very useful for debugging.
+std::ostream& operator<<(std::ostream& os, const LocatedTriple& lt);
+std::ostream& operator<<(std::ostream& os, const LocatedTriples& lts);
+std::ostream& operator<<(std::ostream& os, const LocatedTriplesPerBlock& ltpb);
+
+// DEFINITION OF THE POSITION OF A LOCATED TRIPLE IN A PERMUTATION
+//
+// 1. The position is defined by the index of a block in the permutation and the
+// index of a row within that block.
+//
+// 2. If the triple in contained in the permutation, it is contained exactly
+// once and so there is a well defined block and position in that block.
+//
+// 2. If there is a block, where the first triple is smaller and the last triple
+// is larger, then that is the block and the position in that block is that of
+// the first triple that is (not smaller and hence) larger.
+//
+// 3. If the triple falls "between two blocks" (the last triple of the previous
+// block is smaller and the first triple of the next block is larger), then the
+// position is the first position in that next block.
+//
+// 4. As a special case of 3, if the triple is smaller than all triples in the
+// permutation, the position is the first position of the first block.
+//
+// 5. If the triple is larger than all triples in the permutation, the block
+// index is one after the largest block index and the position within that
+// non-existing block is arbitrary.
diff --git a/src/index/MetaDataHandler.h b/src/index/MetaDataHandler.h
index da84f1158a..e24e33fe5c 100644
--- a/src/index/MetaDataHandler.h
+++ b/src/index/MetaDataHandler.h
@@ -1,29 +1,39 @@
-// Copyright 2018, University of Freiburg,
+// Copyright 2018 - 2023, University of Freiburg
 // Chair of Algorithms and Data Structures
-// Author: Johannes Kalmbach (johannes.kalmbach@gmail.com)
-//
+// Authors: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+//          Hannah Bast <bast@cs.uni-freiburg.de>
+
 #pragma once
 
 #include <cassert>
 #include <stxxl/vector>
 
-#include "../global/Id.h"
-#include "../util/Exception.h"
-#include "../util/HashMap.h"
-#include "../util/Iterators.h"
-#include "../util/Log.h"
-#include "../util/Serializer/Serializer.h"
-#include "./CompressedRelation.h"
-
-// _____________________________________________________________________
+#include "global/Id.h"
+#include "index/CompressedRelation.h"
+#include "util/Exception.h"
+#include "util/HashMap.h"
+#include "util/Iterators.h"
+#include "util/Log.h"
+#include "util/Serializer/Serializer.h"
+
+// Class for access to relation metadata stored in a vector. Specifically, our
+// index uses this with `M = MmapVector<CompressedRelationMetadata>>`; see
+// `index/IndexMetaData.h`
 template <class M>
 class MetaDataWrapperDense {
+ private:
+  // A vector of metadata objects.
+  M _vec;
+
  public:
+  // An iterator with an additional method `getId()` that gives the relation ID
+  // of the current metadata object.
   template <typename BaseIterator>
   struct AddGetIdIterator : BaseIterator {
     using BaseIterator::BaseIterator;
     AddGetIdIterator(BaseIterator base) : BaseIterator{base} {}
     [[nodiscard]] Id getId() const { return getIdFromElement(*(*this)); }
+    [[nodiscard]] const auto& getMetaData() const { return *(*this); }
     static Id getIdFromElement(const typename BaseIterator::value_type& v) {
       return v.col0Id_;
     }
@@ -39,6 +49,7 @@ class MetaDataWrapperDense {
   // The underlying array is sorted, so all iterators are ordered iterators
   using ConstOrderedIterator = ConstIterator;
 
+  // The type of the stored metadata objects.
   using value_type = typename M::value_type;
 
   // _________________________________________________________
@@ -109,12 +120,24 @@ class MetaDataWrapperDense {
   // ___________________________________________________________
   std::string getFilename() const { return _vec.getFilename(); }
 
- private:
+  // The following used to be private (because they were only used as
+  // subroutines in the above), but we now need them in
+  // `DeltaTriples::findTripleResult`.
   ConstIterator lower_bound(Id id) const {
     auto cmp = [](const auto& metaData, Id id) {
       return metaData.col0Id_ < id;
     };
     return std::lower_bound(_vec.begin(), _vec.end(), id, cmp);
   }
-  M _vec;
+  Iterator lower_bound(Id id) {
+    auto cmp = [](const auto& metaData, Id id) {
+      return metaData.col0Id_ < id;
+    };
+    return std::lower_bound(_vec.begin(), _vec.end(), id, cmp);
+  }
 };
+
+// =======
+//   M _vec;
+// };
+// >>>>>>> master
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index f94b54c063..c81ecaa9c5 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -104,6 +104,8 @@ addLinkAndDiscoverTest(IndexMetaDataTest index)
 # TODO<qup42, joka921> fix this
 addLinkAndDiscoverTestSerial(IndexTest index)
 
+addLinkAndDiscoverTestSerial(LocatedTriplesTest index)
+
 addLinkAndDiscoverTest(FTSAlgorithmsTest index)
 
 addLinkAndDiscoverTest(EngineTest engine)
diff --git a/test/LocatedTriplesTest.cpp b/test/LocatedTriplesTest.cpp
new file mode 100644
index 0000000000..ce4d0b909e
--- /dev/null
+++ b/test/LocatedTriplesTest.cpp
@@ -0,0 +1,173 @@
+//  Copyright 2023, University of Freiburg,
+//  Chair of Algorithms and Data Structures.
+//  Author: Hannah Bast <bast@cs.uni-freiburg.de>
+
+#include <gtest/gtest.h>
+
+#include "./util/IdTableHelpers.h"
+#include "./util/IdTestHelpers.h"
+#include "index/CompressedRelation.h"
+#include "index/IndexMetaData.h"
+#include "index/LocatedTriples.h"
+#include "index/Permutations.h"
+
+// TODO: Why the namespace here? (copied from `test/IndexMetaDataTest.cpp`)
+namespace {
+auto V = ad_utility::testing::VocabId;
+}
+
+// Fixture with helper functions.
+class LocatedTriplesTest : public ::testing::Test {
+ protected:
+  // Make `LocatedTriplesPerBlock` from a list of `LocatedTriple` objects (the
+  // order in which the objects are given does not matter).
+  LocatedTriplesPerBlock makeLocatedTriplesPerBlock(
+      std::vector<LocatedTriple> locatedTriples) {
+    LocatedTriplesPerBlock result;
+    for (auto locatedTriple : locatedTriples) {
+      result.add(locatedTriple);
+    }
+    return result;
+  }
+};
+
+// Test the method that counts the number of `LocatedTriple's in a block.
+TEST_F(LocatedTriplesTest, numTriplesInBlock) {
+  // Set up lists of located triples for three blocks.
+  auto locatedTriplesPerBlock = makeLocatedTriplesPerBlock(
+      {LocatedTriple{1, 0, V(10), V(1), V(0), true},
+       LocatedTriple{1, 0, V(10), V(2), V(1), true},
+       LocatedTriple{1, 0, V(11), V(3), V(0), false},
+       LocatedTriple{2, 0, V(20), V(4), V(0), false},
+       LocatedTriple{2, 0, V(21), V(5), V(0), false},
+       LocatedTriple{3, 0, V(30), V(6), V(0), false},
+       LocatedTriple{3, 0, V(32), V(7), V(0), true}});
+  ASSERT_EQ(locatedTriplesPerBlock.numBlocks(), 3);
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(), 7);
+
+  auto P = [](size_t n1, size_t n2) -> std::pair<size_t, size_t> {
+    return {n1, n2};
+  };
+
+  // Check the total counts per block.
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(1), P(1, 2));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(2), P(2, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(3), P(1, 1));
+
+  // Check the counts per block for a given `id1`.
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(1, V(10)), P(0, 2));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(1, V(11)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(2, V(20)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(2, V(21)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(3, V(30)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(3, V(32)), P(0, 1));
+
+  // Check the counts per block for a given `id1` and `id2`.
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(1, V(10), V(1)), P(0, 1));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(1, V(10), V(2)), P(0, 1));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(1, V(11), V(3)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(2, V(20), V(4)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(2, V(21), V(5)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(3, V(30), V(6)), P(1, 0));
+  ASSERT_EQ(locatedTriplesPerBlock.numTriples(3, V(32), V(7)), P(0, 1));
+}
+
+// Test the method that merges the matching `LocatedTriple`s from a block into a
+// part of an `IdTable`.
+TEST_F(LocatedTriplesTest, mergeTriples) {
+  // A block, as it could come from an index scan.
+  IdTable block = makeIdTableFromVector({{10, 10},    // Row 0
+                                         {15, 20},    // Row 1
+                                         {15, 30},    // Row 2
+                                         {20, 10},    // Row 3
+                                         {30, 20},    // Row 4
+                                         {30, 30}});  // Row 5
+
+  // A set of located triples for that block.
+  auto locatedTriplesPerBlock = makeLocatedTriplesPerBlock(
+      {LocatedTriple{1, 0, V(1), V(10), V(10), true},    // Delete row 0
+       LocatedTriple{1, 1, V(1), V(10), V(11), false},   // Insert before row 1
+       LocatedTriple{1, 1, V(2), V(11), V(10), false},   // Insert before row 1
+       LocatedTriple{1, 4, V(2), V(21), V(11), false},   // Insert before row 4
+       LocatedTriple{1, 4, V(2), V(30), V(10), false},   // Insert before row 4
+       LocatedTriple{1, 4, V(2), V(30), V(20), true},    // Delete row 4
+       LocatedTriple{1, 5, V(3), V(30), V(30), true}});  // Delete row 5
+
+  // Merge all these triples into `block` and check that the result is as
+  // expected (four triples inserted and three triples deleted).
+  {
+    IdTable resultExpected = makeIdTableFromVector({{10, 11},    // Row 0
+                                                    {11, 10},    // Row 1
+                                                    {15, 20},    // Row 2
+                                                    {15, 30},    // Row 3
+                                                    {20, 10},    // Row 4
+                                                    {21, 11},    // Row 5
+                                                    {30, 10}});  // Row 6
+    IdTable result(2, ad_utility::testing::makeAllocator());
+    result.resize(resultExpected.size());
+    locatedTriplesPerBlock.mergeTriples(1, block.clone(), result, 0);
+    ASSERT_EQ(result, resultExpected);
+  }
+
+  // Merge only the triples with `id1 == V(2)` into `block` (three triples
+  // inserted and one triple deleted).
+  {
+    IdTable resultExpected = makeIdTableFromVector({{10, 10},    // Row 0
+                                                    {11, 10},    // Row 1
+                                                    {15, 20},    // Row 2
+                                                    {15, 30},    // Row 3
+                                                    {20, 10},    // Row 4
+                                                    {21, 11},    // Row 5
+                                                    {30, 10},    // Row 6
+                                                    {30, 30}});  // Row 7
+    IdTable result(2, ad_utility::testing::makeAllocator());
+    result.resize(resultExpected.size());
+    locatedTriplesPerBlock.mergeTriples(1, block.clone(), result, 0, V(2));
+    ASSERT_EQ(result, resultExpected);
+  }
+
+  // Repeat but with a partial block that leaves out the first two elements of
+  // `block`.
+  {
+    IdTable resultExpected = makeIdTableFromVector({{15, 30},    // Row 0
+                                                    {20, 10},    // Row 1
+                                                    {21, 11},    // Row 2
+                                                    {30, 10},    // Row 3
+                                                    {30, 30}});  // Row 4
+    IdTable result(2, ad_utility::testing::makeAllocator());
+    result.resize(resultExpected.size());
+    locatedTriplesPerBlock.mergeTriples(1, block.clone(), result, 0, V(2), 2);
+    ASSERT_EQ(result, resultExpected);
+  }
+
+  // Merge only the triples with `id1 == V(2)` and `id2 == V(30)` into the
+  // corresponding partial block (one triple inserted, one triple deleted).
+  {
+    IdTable blockColumnId3(1, ad_utility::testing::makeAllocator());
+    blockColumnId3.resize(block.size());
+    for (size_t i = 0; i < block.size(); ++i) {
+      blockColumnId3(i, 0) = block(i, 1);
+    }
+    IdTable resultExpected = makeIdTableFromVector({{10}, {30}});
+    IdTable result(1, ad_utility::testing::makeAllocator());
+    result.resize(resultExpected.size());
+    locatedTriplesPerBlock.mergeTriples(1, std::move(blockColumnId3), result, 0,
+                                        V(2), V(30), 4, 6);
+    ASSERT_EQ(result, resultExpected);
+  }
+
+  // Merge special triples.
+  {
+    size_t NRI = LocatedTriple::NO_ROW_INDEX;
+    auto locatedTriplesPerBlock = makeLocatedTriplesPerBlock(
+        {LocatedTriple{2, NRI, V(1), V(30), V(40), true},
+         LocatedTriple{2, NRI, V(1), V(30), V(50), true},
+         LocatedTriple{2, NRI, V(1), V(40), V(10), true}});
+    IdTable resultExpected = makeIdTableFromVector({{30, 40},    // Row 0
+                                                    {30, 50},    // Row 1
+                                                    {40, 10}});  // Row 2
+    IdTable result(2, ad_utility::testing::makeAllocator());
+    result.resize(resultExpected.size());
+    locatedTriplesPerBlock.mergeTriples(2, std::nullopt, result, 0, V(1));
+  }
+}

From d8781a4c6f97dfe1a0607b68248339727a475ca0 Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@cs.uni-freiburg.de>
Date: Sat, 10 Jun 2023 18:40:07 +0200
Subject: [PATCH 2/4] Add test for the `locatedTriple` method + address some of
 the comments from Johannes' review

---
 src/global/ValueId.h          |   4 +-
 src/index/IndexMetaData.h     |   3 +-
 src/index/LocatedTriples.cpp  |  40 ++++++--
 src/index/LocatedTriples.h    |   3 +
 src/index/MetaDataHandler.h   |  25 ++---
 test/LocatedTriplesTest.cpp   | 173 ++++++++++++++++++++++++++++++++++
 test/ValueIdTest.cpp          |  14 +--
 test/ValuesForTestingTest.cpp |   4 +-
 8 files changed, 236 insertions(+), 30 deletions(-)

diff --git a/src/global/ValueId.h b/src/global/ValueId.h
index b51d0aef16..26ce023ab8 100644
--- a/src/global/ValueId.h
+++ b/src/global/ValueId.h
@@ -282,10 +282,10 @@ class ValueId {
   /// This operator is only for debugging and testing. It returns a
   /// human-readable representation.
   friend std::ostream& operator<<(std::ostream& ostr, const ValueId& id) {
-    ostr << toString(id.getDatatype()) << ':';
+    ostr << toString(id.getDatatype())[0] << ':';
     auto visitor = [&ostr]<typename T>(T&& value) {
       if constexpr (ad_utility::isSimilar<T, ValueId::UndefinedType>) {
-        ostr << "Undefined";
+        ostr << "xx";
       } else if constexpr (ad_utility::isSimilar<T, double> ||
                            ad_utility::isSimilar<T, int64_t>) {
         ostr << std::to_string(value);
diff --git a/src/index/IndexMetaData.h b/src/index/IndexMetaData.h
index 3039c0ba28..9842faeb69 100644
--- a/src/index/IndexMetaData.h
+++ b/src/index/IndexMetaData.h
@@ -226,7 +226,8 @@ template <class MapType>
 ad_utility::File& operator<<(ad_utility::File& f,
                              const IndexMetaData<MapType>& imd);
 
-// aliases for easier use in Index class
+// Aliases for easier use in classes that build or query permutations, like
+// `IndexImpl`.
 using MetaWrapperMmap =
     MetaDataWrapperDense<ad_utility::MmapVector<CompressedRelationMetadata>>;
 using MetaWrapperMmapView = MetaDataWrapperDense<
diff --git a/src/index/LocatedTriples.cpp b/src/index/LocatedTriples.cpp
index acd6988675..c8aef6c363 100644
--- a/src/index/LocatedTriples.cpp
+++ b/src/index/LocatedTriples.cpp
@@ -6,6 +6,7 @@
 
 #include <algorithm>
 
+#include "absl/strings/str_join.h"
 #include "index/CompressedRelation.h"
 #include "index/IndexMetaData.h"
 #include "index/Permutations.h"
@@ -190,7 +191,8 @@ size_t LocatedTriplesPerBlock::mergeTriples(size_t blockIndex,
   AD_CONTRACT_CHECK(map_.contains(blockIndex));
 
   // The special case `block == std::nullopt` (write only located triples to
-  // `result`) is only allowed, when `id1` or `id1` and `id2` are specified.
+  // `result`) is only allowed, when the `matchMode` is `MatchId1` or
+  // `MatchId1AndId2`, but not `MatchAll`.
   AD_CONTRACT_CHECK(block.has_value() || matchMode != MatchMode::MatchAll);
 
   // If `rowIndexInBlockEnd` has the default value (see `LocatedTriples.h`), the
@@ -324,7 +326,7 @@ size_t LocatedTriplesPerBlock::mergeTriples(size_t blockIndex,
 std::ostream& operator<<(std::ostream& os, const LocatedTriple& lt) {
   os << "LT(" << lt.blockIndex << " "
      << (lt.rowIndexInBlock == LocatedTriple::NO_ROW_INDEX
-             ? "NO_ROW_INDEX"
+             ? "x"
              : std::to_string(lt.rowIndexInBlock))
      << " " << lt.id1 << " " << lt.id2 << " " << lt.id3 << " "
      << lt.existsInIndex << ")";
@@ -333,17 +335,43 @@ std::ostream& operator<<(std::ostream& os, const LocatedTriple& lt) {
 
 // ____________________________________________________________________________
 std::ostream& operator<<(std::ostream& os, const LocatedTriples& lts) {
-  os << "{";
+  os << "{ ";
   std::copy(lts.begin(), lts.end(),
-            std::ostream_iterator<LocatedTriple>(std::cout, " "));
+            std::ostream_iterator<LocatedTriple>(os, " "));
   os << "}";
   return os;
 }
 
 // ____________________________________________________________________________
 std::ostream& operator<<(std::ostream& os, const LocatedTriplesPerBlock& ltpb) {
-  for (auto [blockIndex, lts] : ltpb.map_) {
-    os << "Block #" << blockIndex << ": " << lts << std::endl;
+  // Get the block indices in sorted order.
+  std::vector<size_t> blockIndices;
+  std::transform(ltpb.map_.begin(), ltpb.map_.end(),
+                 std::back_inserter(blockIndices),
+                 [](const auto& entry) { return entry.first; });
+  std::ranges::sort(blockIndices);
+  for (auto blockIndex : blockIndices) {
+    os << "Block #" << blockIndex << ": " << ltpb.map_.at(blockIndex)
+       << std::endl;
   }
   return os;
 }
+
+// ____________________________________________________________________________
+std::ostream& operator<<(std::ostream& os,
+                         const columnBasedIdTable::Row<Id>& idTableRow) {
+  os << "(";
+  for (size_t i = 0; i < idTableRow.numColumns(); ++i) {
+    os << idTableRow[i] << (i < idTableRow.numColumns() - 1 ? " " : ")");
+  }
+  return os;
+}
+
+// ____________________________________________________________________________
+std::ostream& operator<<(std::ostream& os, const IdTable& idTable) {
+  os << "{ ";
+  std::copy(idTable.begin(), idTable.end(),
+            std::ostream_iterator<columnBasedIdTable::Row<Id>>(os, " "));
+  os << "}";
+  return os;
+}
diff --git a/src/index/LocatedTriples.h b/src/index/LocatedTriples.h
index bb967bfe95..e2a9735dc4 100644
--- a/src/index/LocatedTriples.h
+++ b/src/index/LocatedTriples.h
@@ -171,6 +171,9 @@ class LocatedTriplesPerBlock {
 std::ostream& operator<<(std::ostream& os, const LocatedTriple& lt);
 std::ostream& operator<<(std::ostream& os, const LocatedTriples& lts);
 std::ostream& operator<<(std::ostream& os, const LocatedTriplesPerBlock& ltpb);
+std::ostream& operator<<(std::ostream& os,
+                         const columnBasedIdTable::Row<Id>& idTableRow);
+std::ostream& operator<<(std::ostream& os, const IdTable& idTable);
 
 // DEFINITION OF THE POSITION OF A LOCATED TRIPLE IN A PERMUTATION
 //
diff --git a/src/index/MetaDataHandler.h b/src/index/MetaDataHandler.h
index e24e33fe5c..437f0e3e71 100644
--- a/src/index/MetaDataHandler.h
+++ b/src/index/MetaDataHandler.h
@@ -16,9 +16,16 @@
 #include "util/Log.h"
 #include "util/Serializer/Serializer.h"
 
-// Class for access to relation metadata stored in a vector. Specifically, our
-// index uses this with `M = MmapVector<CompressedRelationMetadata>>`; see
-// `index/IndexMetaData.h`
+// Wrapper class for access to `CompressedRelationMetadata` objects (one per
+// relation) stored in a vector. Specifically, our index uses this with `M =
+// MmapVector<CompressedRelationMetadata>>`; see `index/IndexMetaData.h` at the
+// bottom.
+//
+// TODO: We needed this at some point because we used to have two implementation
+// of `IndexMetaData`, one using mmaps and one using hash maps, and we wanted to
+// have a common interface for both. We no longer use the hash map
+// implementation and so the wrapper class (and the complexity that goes along
+// with it) is probably no longer needed.
 template <class M>
 class MetaDataWrapperDense {
  private:
@@ -99,7 +106,7 @@ class MetaDataWrapperDense {
 
   // ____________________________________________________________
   void set(Id id, const value_type& value) {
-    // Assert that the ids are ascending.
+    // Check that the `Id`s are added in strictly ascending order.
     AD_CONTRACT_CHECK(_vec.size() == 0 || _vec.back().col0Id_ < id);
     _vec.push_back(value);
   }
@@ -120,9 +127,8 @@ class MetaDataWrapperDense {
   // ___________________________________________________________
   std::string getFilename() const { return _vec.getFilename(); }
 
-  // The following used to be private (because they were only used as
-  // subroutines in the above), but we now need them in
-  // `DeltaTriples::findTripleResult`.
+  // NOTE: The following used to be private (they were only used as subroutines
+  // in the above), but we now need them in `LocatedTriples::locateTriple`.
   ConstIterator lower_bound(Id id) const {
     auto cmp = [](const auto& metaData, Id id) {
       return metaData.col0Id_ < id;
@@ -136,8 +142,3 @@ class MetaDataWrapperDense {
     return std::lower_bound(_vec.begin(), _vec.end(), id, cmp);
   }
 };
-
-// =======
-//   M _vec;
-// };
-// >>>>>>> master
diff --git a/test/LocatedTriplesTest.cpp b/test/LocatedTriplesTest.cpp
index ce4d0b909e..1fc681170a 100644
--- a/test/LocatedTriplesTest.cpp
+++ b/test/LocatedTriplesTest.cpp
@@ -171,3 +171,176 @@ TEST_F(LocatedTriplesTest, mergeTriples) {
     locatedTriplesPerBlock.mergeTriples(2, std::nullopt, result, 0, V(1));
   }
 }
+
+// Test the locating of triples in a permutation using `locatedTriple`.
+TEST_F(LocatedTriplesTest, locatedTriple) {
+  // The actual test, for a given block size.
+  //
+  // TODO: Also make the permutation an argument, right now it's only PSO.
+  auto testWithGivenBlockSize =
+      [](const IdTable& triplesInIndex, const IdTable& triplesToLocate,
+         size_t blockSizeInBytes,
+         const ad_utility::HashMap<size_t, std::string>&
+             expectedLocatedTriplesPerBlock) {
+        std::string basename = "LocatedTriplesTest.scanWithMergeTriples";
+        std::string permutationFilename = basename + ".index.pso";
+
+        // We currently assume that all triples in `triplesInIndex` have the
+        // same `Id` in the first column.
+        std::vector<Id> relationIds = {triplesInIndex(0, 0)};
+        for (size_t i = 1; i < triplesInIndex.size(); ++i) {
+          ASSERT_EQ(triplesInIndex(i, 0), relationIds[0]);
+        }
+
+        // Helper lambda for creating a `BufferedIdTable` from all triples in
+        // the given `IdTable` matching `relationId`.
+        //
+        // This is needed need for `CompressedRelationWriter` below, which
+        // expects a `BufferedIdTable` with two columns.
+        //
+        // TODO: Something like this is also used in `CompressedRelationsTest`,
+        // so it should be in a helper class.
+        auto getBufferedIdTable = [](const IdTable& idTable,
+                                     Id relationId) -> BufferedIdTable {
+          // Note that these files are never created because we set the
+          // threshold for writing to disk so large.
+          std::string bufferFilename1 = "compressedRelationWriter.buffer1.dat";
+          std::string bufferFilename2 = "compressedRelationWriter.buffer2.dat";
+          AD_CONTRACT_CHECK(idTable.numColumns() == 3);
+          BufferedIdTable bufferedIdTable{
+              2, std::array{
+                     ad_utility::BufferedVector<Id>{
+                         std::numeric_limits<size_t>::max(), bufferFilename1},
+                     ad_utility::BufferedVector<Id>{
+                         std::numeric_limits<size_t>::max(), bufferFilename2}}};
+          for (size_t i = 0; i < idTable.size(); ++i) {
+            if (idTable(i, 0) == relationId) {
+              bufferedIdTable.push_back({idTable(i, 1), idTable(i, 2)});
+            }
+          }
+          return bufferedIdTable;
+        };
+
+        // Write the permutation to disk (adapted from
+        // `CompressedRelationsTest`, `IndexImpl::createPermutationPairImpl`,
+        // and `IndexImpl::).
+        {
+          ad_utility::File permutationFileForWritingRelations{
+              permutationFilename, "w"};
+          IndexMetaDataMmap metadataMmap;
+          metadataMmap.setup(permutationFilename + MMAP_FILE_SUFFIX,
+                             ad_utility::CreateTag{});
+          CompressedRelationWriter writer{
+              std::move(permutationFileForWritingRelations), blockSizeInBytes};
+          for (size_t i = 0; i < relationIds.size(); ++i) {
+            // The third argument is the number of distinct elements. We set it
+            // to 1 here because it is irrelevant for the purposes of this test.
+            Id relationId = relationIds[i];
+            auto relationMetadata = writer.addRelation(
+                relationId, getBufferedIdTable(triplesInIndex, relationId), 1);
+            metadataMmap.add(relationMetadata);
+          }
+          metadataMmap.blockData() = std::move(writer).getFinishedBlocks();
+          ad_utility::File permutationFileForWritingMetadata(
+              permutationFilename, "r+");
+          metadataMmap.appendToFile(&permutationFileForWritingMetadata);
+        }
+
+        // Create a permutation based on this.
+        Permutation permutation{"PSO", ".pso", {1, 0, 2}};
+        permutation.loadFromDisk(basename);
+
+        // Check that the permutation indeed consists of the relations that we
+        // have written to it.
+        {
+          IdTable result(2, ad_utility::testing::makeAllocator());
+          for (Id relationId : relationIds) {
+            permutation.scan(relationId, &result);
+            std::cout << "Relation " << relationId << ": " << result
+                      << std::endl;
+          }
+        }
+
+        // Now locate the triples from `triplesToLocate` in the permutation.
+        LocatedTriplesPerBlock locatedTriplesPerBlock;
+        for (size_t i = 0; i < triplesToLocate.size(); ++i) {
+          locatedTriplesPerBlock.add(LocatedTriple::locateTripleInPermutation(
+              triplesToLocate(i, 0), triplesToLocate(i, 1),
+              triplesToLocate(i, 2), permutation));
+        }
+
+        std::cout << locatedTriplesPerBlock;
+        for (auto [blockIndex, locatedTriplesString] :
+             expectedLocatedTriplesPerBlock) {
+          ASSERT_TRUE(locatedTriplesPerBlock.map_.contains(blockIndex))
+              << "blockIndex = " << blockIndex << " not found";
+          std::ostringstream os;
+          os << locatedTriplesPerBlock.map_.at(blockIndex);
+          ASSERT_EQ(os.str(), locatedTriplesString)
+              << "blockIndex = " << blockIndex;
+        }
+
+        // Delete the permutation files.
+        ad_utility::deleteFile(permutationFilename);
+        ad_utility::deleteFile(permutationFilename + MMAP_FILE_SUFFIX);
+      };
+
+  // Triples in the index.
+  IdTable triplesInIndex = makeIdTableFromVector({{1, 10, 10},    // Row 0
+                                                  {1, 15, 20},    // Row 1
+                                                  {1, 15, 30},    // Row 2
+                                                  {1, 20, 10},    // Row 3
+                                                  {1, 30, 20},    // Row 4
+                                                  {1, 30, 30}});  // Row 5
+
+  // Locate the following triples, some of which exist in the relation and some
+  // of which do not, and which cover a variety of positons, including triples
+  // that are larger than all existing triples.
+  IdTable triplesToLocate =
+      makeIdTableFromVector({{1, 15, 20},    // Exists.
+                             {1, 14, 20},    // Does not exist.
+                             {1, 20, 10},    // Exists.
+                             {1, 30, 20},    // Exists.
+                             {1, 30, 30},    // Exists.
+                             {1, 30, 31},    // Larger than all existing.
+                             {1, 30, 32}});  // Larger than all existing.
+
+  // Now test for multiple block sizes (16 bytes is the minimum).
+  // testing::internal::CaptureStdout();
+  std::cout << "Index triples: " << triplesInIndex << std::endl;
+  std::cout << "Delta triples: " << triplesToLocate << std::endl;
+
+  // With block size 16, we have each triple in its own block.
+  testWithGivenBlockSize(
+      triplesInIndex, triplesToLocate, 16,
+      {{1, "{ LT(1 0 V:1 V:14 V:20 0) LT(1 0 V:1 V:15 V:20 1) }"},
+       {3, "{ LT(3 0 V:1 V:20 V:10 1) }"},
+       {4, "{ LT(4 0 V:1 V:30 V:20 1) }"},
+       {5, "{ LT(5 0 V:1 V:30 V:30 1) }"},
+       {6, "{ LT(6 x V:1 V:30 V:31 0) LT(6 x V:1 V:30 V:32 0) }"}});
+
+  // With block size 32, we have three blocks à two triples each.
+  testWithGivenBlockSize(
+      triplesInIndex, triplesToLocate, 32,
+      {{0, "{ LT(0 1 V:1 V:14 V:20 0) LT(0 1 V:1 V:15 V:20 1) }"},
+       {1, "{ LT(1 1 V:1 V:20 V:10 1) }"},
+       {2, "{ LT(2 0 V:1 V:30 V:20 1) LT(2 1 V:1 V:30 V:30 1) }"},
+       {3, "{ LT(3 x V:1 V:30 V:31 0) LT(3 x V:1 V:30 V:32 0) }"}});
+
+  // With block size 48, we have two blocks à three triples each.
+  testWithGivenBlockSize(
+      triplesInIndex, triplesToLocate, 48,
+      {{0, "{ LT(0 1 V:1 V:14 V:20 0) LT(0 1 V:1 V:15 V:20 1) }"},
+       {1,
+        "{ LT(1 0 V:1 V:20 V:10 1) LT(1 1 V:1 V:30 V:20 1)"
+        " LT(1 2 V:1 V:30 V:30 1) }"},
+       {2, "{ LT(2 x V:1 V:30 V:31 0) LT(2 x V:1 V:30 V:32 0) }"}});
+
+  // With block size 100'000, we have one block.
+  testWithGivenBlockSize(
+      triplesInIndex, triplesToLocate, 100'000,
+      {{0,
+        "{ LT(0 1 V:1 V:14 V:20 0) LT(0 1 V:1 V:15 V:20 1) LT(0 3 V:1 V:20 "
+        "V:10 1) LT(0 4 V:1 V:30 V:20 1) LT(0 5 V:1 V:30 V:30 1) }"},
+       {1, "{ LT(1 x V:1 V:30 V:31 0) LT(1 x V:1 V:30 V:32 0) }"}});
+}
diff --git a/test/ValueIdTest.cpp b/test/ValueIdTest.cpp
index dab815e207..3963e6eca5 100644
--- a/test/ValueIdTest.cpp
+++ b/test/ValueIdTest.cpp
@@ -278,15 +278,15 @@ TEST(ValueId, toDebugString) {
     stream << id;
     ASSERT_EQ(stream.str(), expected);
   };
-  test(ValueId::makeUndefined(), "Undefined:Undefined");
-  test(ValueId::makeFromInt(-42), "Int:-42");
-  test(ValueId::makeFromDouble(42.0), "Double:42.000000");
-  test(makeVocabId(15), "VocabIndex:15");
-  test(makeLocalVocabId(25), "LocalVocabIndex:25");
-  test(makeTextRecordId(37), "TextRecordIndex:37");
+  test(ValueId::makeUndefined(), "U:xx");
+  test(ValueId::makeFromInt(-42), "I:-42");
+  test(ValueId::makeFromDouble(42.0), "D:42.000000");
+  test(makeVocabId(15), "V:15");
+  test(makeLocalVocabId(25), "L:25");
+  test(makeTextRecordId(37), "T:37");
   test(ValueId::makeFromDate(
            DateOrLargeYear{123456, DateOrLargeYear::Type::Year}),
-       "Date:123456");
+       "D:123456");
 }
 
 TEST(ValueId, InvalidDatatypeEnumValue) {
diff --git a/test/ValuesForTestingTest.cpp b/test/ValuesForTestingTest.cpp
index 44d95a3cc6..42e1ec7258 100644
--- a/test/ValuesForTestingTest.cpp
+++ b/test/ValuesForTestingTest.cpp
@@ -27,8 +27,8 @@ TEST(ValuesForTesting, valuesForTesting) {
   ASSERT_EQ(v.getMultiplicity(1), 84.0);
 
   ASSERT_THAT(v.asString(),
-              ::testing::StartsWith("Values for testing with 2 columns and "
-                                    "contents VocabIndex:3 VocabIndex:12"));
+              ::testing::StartsWith(
+                  "Values for testing with 2 columns and contents V:3 V:12"));
   ASSERT_EQ(v.getDescriptor(), "explicit values for testing");
   ASSERT_TRUE(v.resultSortedOn().empty());
   ASSERT_TRUE(v.getChildren().empty());

From 258231d49d55dcfc4ea49e55c563d9c9387ad4ac Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@cs.uni-freiburg.de>
Date: Sat, 10 Jun 2023 21:50:34 +0200
Subject: [PATCH 3/4] Improve the `locateTriple` test (three relations instead
 of just one)

---
 test/LocatedTriplesTest.cpp | 109 +++++++++++++++++++++---------------
 1 file changed, 65 insertions(+), 44 deletions(-)

diff --git a/test/LocatedTriplesTest.cpp b/test/LocatedTriplesTest.cpp
index 1fc681170a..a1d3f2136a 100644
--- a/test/LocatedTriplesTest.cpp
+++ b/test/LocatedTriplesTest.cpp
@@ -185,12 +185,13 @@ TEST_F(LocatedTriplesTest, locatedTriple) {
         std::string basename = "LocatedTriplesTest.scanWithMergeTriples";
         std::string permutationFilename = basename + ".index.pso";
 
-        // We currently assume that all triples in `triplesInIndex` have the
-        // same `Id` in the first column.
-        std::vector<Id> relationIds = {triplesInIndex(0, 0)};
-        for (size_t i = 1; i < triplesInIndex.size(); ++i) {
-          ASSERT_EQ(triplesInIndex(i, 0), relationIds[0]);
+        // Collect the distinct relation `Id`s.
+        std::vector<Id> relationIds;
+        for (size_t i = 0; i < triplesInIndex.numRows(); ++i) {
+          relationIds.push_back(triplesInIndex(i, 0));
+          ASSERT_TRUE(i == 0 || relationIds[i - 1] <= relationIds[i]);
         }
+        relationIds = ad_utility::removeDuplicates(relationIds);
 
         // Helper lambda for creating a `BufferedIdTable` from all triples in
         // the given `IdTable` matching `relationId`.
@@ -256,7 +257,7 @@ TEST_F(LocatedTriplesTest, locatedTriple) {
           IdTable result(2, ad_utility::testing::makeAllocator());
           for (Id relationId : relationIds) {
             permutation.scan(relationId, &result);
-            std::cout << "Relation " << relationId << ": " << result
+            std::cout << "Relation: " << relationId << " -> " << result
                       << std::endl;
           }
         }
@@ -269,16 +270,28 @@ TEST_F(LocatedTriplesTest, locatedTriple) {
               triplesToLocate(i, 2), permutation));
         }
 
+        // Check that the locations are as expected. Process in order of
+        // increasing block index because it's easier to debug.
         std::cout << locatedTriplesPerBlock;
-        for (auto [blockIndex, locatedTriplesString] :
+        std::vector<size_t> blockIndices;
+        for (auto [blockIndex, expectedLocatedTriples] :
              expectedLocatedTriplesPerBlock) {
+          blockIndices.push_back(blockIndex);
+        }
+        std::sort(blockIndices.begin(), blockIndices.end());
+        for (auto blockIndex : blockIndices) {
           ASSERT_TRUE(locatedTriplesPerBlock.map_.contains(blockIndex))
               << "blockIndex = " << blockIndex << " not found";
           std::ostringstream os;
           os << locatedTriplesPerBlock.map_.at(blockIndex);
-          ASSERT_EQ(os.str(), locatedTriplesString)
+          std::string computedLocatedTriples = os.str();
+          std::string expectedLocatedTriples =
+              expectedLocatedTriplesPerBlock.at(blockIndex);
+          ASSERT_EQ(computedLocatedTriples, expectedLocatedTriples)
               << "blockIndex = " << blockIndex;
         }
+        ASSERT_EQ(locatedTriplesPerBlock.map_.size(),
+                  expectedLocatedTriplesPerBlock.size());
 
         // Delete the permutation files.
         ad_utility::deleteFile(permutationFilename);
@@ -287,23 +300,25 @@ TEST_F(LocatedTriplesTest, locatedTriple) {
 
   // Triples in the index.
   IdTable triplesInIndex = makeIdTableFromVector({{1, 10, 10},    // Row 0
-                                                  {1, 15, 20},    // Row 1
-                                                  {1, 15, 30},    // Row 2
-                                                  {1, 20, 10},    // Row 3
-                                                  {1, 30, 20},    // Row 4
-                                                  {1, 30, 30}});  // Row 5
+                                                  {2, 10, 10},    // Row 1
+                                                  {2, 15, 20},    // Row 2
+                                                  {2, 15, 30},    // Row 3
+                                                  {2, 20, 10},    // Row 4
+                                                  {2, 30, 20},    // Row 5
+                                                  {2, 30, 30},    // Row 6
+                                                  {3, 10, 10}});  // Row 7
 
   // Locate the following triples, some of which exist in the relation and some
   // of which do not, and which cover a variety of positons, including triples
   // that are larger than all existing triples.
   IdTable triplesToLocate =
-      makeIdTableFromVector({{1, 15, 20},    // Exists.
-                             {1, 14, 20},    // Does not exist.
-                             {1, 20, 10},    // Exists.
-                             {1, 30, 20},    // Exists.
-                             {1, 30, 30},    // Exists.
-                             {1, 30, 31},    // Larger than all existing.
-                             {1, 30, 32}});  // Larger than all existing.
+      makeIdTableFromVector({{2, 15, 20},    // Equals Row 2
+                             {2, 14, 20},    // Before Row 2
+                             {2, 20, 10},    // Equals Row 4
+                             {2, 30, 20},    // Equals Row 5
+                             {2, 30, 30},    // Equals Row 6
+                             {2, 30, 31},    // Before Row 7
+                             {9, 30, 32}});  // Larger than all.
 
   // Now test for multiple block sizes (16 bytes is the minimum).
   // testing::internal::CaptureStdout();
@@ -313,34 +328,40 @@ TEST_F(LocatedTriplesTest, locatedTriple) {
   // With block size 16, we have each triple in its own block.
   testWithGivenBlockSize(
       triplesInIndex, triplesToLocate, 16,
-      {{1, "{ LT(1 0 V:1 V:14 V:20 0) LT(1 0 V:1 V:15 V:20 1) }"},
-       {3, "{ LT(3 0 V:1 V:20 V:10 1) }"},
-       {4, "{ LT(4 0 V:1 V:30 V:20 1) }"},
-       {5, "{ LT(5 0 V:1 V:30 V:30 1) }"},
-       {6, "{ LT(6 x V:1 V:30 V:31 0) LT(6 x V:1 V:30 V:32 0) }"}});
-
-  // With block size 32, we have three blocks à two triples each.
+      {{2, "{ LT(2 0 V:2 V:14 V:20 0) LT(2 0 V:2 V:15 V:20 1) }"},
+       {4, "{ LT(4 0 V:2 V:20 V:10 1) }"},
+       {5, "{ LT(5 0 V:2 V:30 V:20 1) }"},
+       {6, "{ LT(6 0 V:2 V:30 V:30 1) }"},
+       {7, "{ LT(7 0 V:2 V:30 V:31 0) }"},
+       {8, "{ LT(8 x V:9 V:30 V:32 0) }"}});
+
+  // With block size 32, we have five blocks (Block 0 = Row 0, Block 1 = Row
+  // 1+2, Block 2 = Row 3+4, Block 3 = Row 5+6, Block 4 = Row 7). Note that a
+  // relation that spans multiple blocks has these blocks on its own.
   testWithGivenBlockSize(
       triplesInIndex, triplesToLocate, 32,
-      {{0, "{ LT(0 1 V:1 V:14 V:20 0) LT(0 1 V:1 V:15 V:20 1) }"},
-       {1, "{ LT(1 1 V:1 V:20 V:10 1) }"},
-       {2, "{ LT(2 0 V:1 V:30 V:20 1) LT(2 1 V:1 V:30 V:30 1) }"},
-       {3, "{ LT(3 x V:1 V:30 V:31 0) LT(3 x V:1 V:30 V:32 0) }"}});
-
-  // With block size 48, we have two blocks à three triples each.
+      {{1, "{ LT(1 1 V:2 V:14 V:20 0) LT(1 1 V:2 V:15 V:20 1) }"},
+       {2, "{ LT(2 1 V:2 V:20 V:10 1) }"},
+       {3, "{ LT(3 0 V:2 V:30 V:20 1) LT(3 1 V:2 V:30 V:30 1) }"},
+       {4, "{ LT(4 0 V:2 V:30 V:31 0) }"},
+       {5, "{ LT(5 x V:9 V:30 V:32 0) }"}});
+
+  // With block size 48, we have four blocks (Block 0 = Row 0, Block 1 = Row
+  // 1+2+3, Block 2 = Row 4+5+6, Block 3 = Row 7).
   testWithGivenBlockSize(
       triplesInIndex, triplesToLocate, 48,
-      {{0, "{ LT(0 1 V:1 V:14 V:20 0) LT(0 1 V:1 V:15 V:20 1) }"},
-       {1,
-        "{ LT(1 0 V:1 V:20 V:10 1) LT(1 1 V:1 V:30 V:20 1)"
-        " LT(1 2 V:1 V:30 V:30 1) }"},
-       {2, "{ LT(2 x V:1 V:30 V:31 0) LT(2 x V:1 V:30 V:32 0) }"}});
+      {{1, "{ LT(1 1 V:2 V:14 V:20 0) LT(1 1 V:2 V:15 V:20 1) }"},
+       {2,
+        "{ LT(2 0 V:2 V:20 V:10 1) LT(2 1 V:2 V:30 V:20 1)"
+        " LT(2 2 V:2 V:30 V:30 1) }"},
+       {3, "{ LT(3 0 V:2 V:30 V:31 0) }"},
+       {4, "{ LT(4 x V:9 V:30 V:32 0) }"}});
 
   // With block size 100'000, we have one block.
-  testWithGivenBlockSize(
-      triplesInIndex, triplesToLocate, 100'000,
-      {{0,
-        "{ LT(0 1 V:1 V:14 V:20 0) LT(0 1 V:1 V:15 V:20 1) LT(0 3 V:1 V:20 "
-        "V:10 1) LT(0 4 V:1 V:30 V:20 1) LT(0 5 V:1 V:30 V:30 1) }"},
-       {1, "{ LT(1 x V:1 V:30 V:31 0) LT(1 x V:1 V:30 V:32 0) }"}});
+  testWithGivenBlockSize(triplesInIndex, triplesToLocate, 100'000,
+                         {{0,
+                           "{ LT(0 2 V:2 V:14 V:20 0) LT(0 2 V:2 V:15 V:20 1) "
+                           "LT(0 4 V:2 V:20 V:10 1) LT(0 5 V:2 V:30 V:20 1) "
+                           "LT(0 6 V:2 V:30 V:30 1) LT(0 7 V:2 V:30 V:31 0) }"},
+                          {1, "{ LT(1 x V:9 V:30 V:32 0) }"}});
 }

From 59aae8e983c1cb76c7ff66c0ad1f70715afb4e7a Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@cs.uni-freiburg.de>
Date: Tue, 13 Feb 2024 16:31:07 +0100
Subject: [PATCH 4/4] A few minor improvements

---
 src/global/IdTriple.h        | 8 +-------
 src/index/IndexMetaData.h    | 2 --
 src/index/LocatedTriples.cpp | 4 ++--
 src/index/LocatedTriples.h   | 9 +++++----
 test/LocatedTriplesTest.cpp  | 3 ++-
 5 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/src/global/IdTriple.h b/src/global/IdTriple.h
index 0353b8c747..425fec2d10 100644
--- a/src/global/IdTriple.h
+++ b/src/global/IdTriple.h
@@ -1,4 +1,4 @@
-// Copyright 2023, University of Freiburg
+// Copyright 2024, University of Freiburg
 // Chair of Algorithms and Data Structures
 // Authors: Hannah Bast <bast@cs.uni-freiburg.de>
 
@@ -10,9 +10,3 @@
 
 // Should we have an own class for this? We need this at several places.
 using IdTriple = std::array<Id, 3>;
-
-// Hash value for such triple.
-template <typename H>
-H AbslHashValue(H h, const IdTriple& triple) {
-  return H::combine(std::move(h), triple[0], triple[1], triple[2]);
-}
diff --git a/src/index/IndexMetaData.h b/src/index/IndexMetaData.h
index 9842faeb69..4460cc39ab 100644
--- a/src/index/IndexMetaData.h
+++ b/src/index/IndexMetaData.h
@@ -87,10 +87,8 @@ class IndexMetaData {
   // name and the variable name are terrible.
 
   // For each relation, its meta data.
- public:
   MapType _data;
 
- private:
   // For each compressed block, its meta data.
   BlocksType _blockData;
 
diff --git a/src/index/LocatedTriples.cpp b/src/index/LocatedTriples.cpp
index a4b05aa7db..2349b300b9 100644
--- a/src/index/LocatedTriples.cpp
+++ b/src/index/LocatedTriples.cpp
@@ -79,8 +79,8 @@ LocatedTriple LocatedTriple::locateTripleInPermutation(
   Id searchId = matchingBlock->firstTriple_.col0Id_ > id1
                     ? matchingBlock->firstTriple_.col0Id_
                     : id1;
-  const auto& it = meta._data.lower_bound(searchId);
-  AD_CORRECTNESS_CHECK(it != meta._data.end());
+  const auto& it = meta.data().lower_bound(searchId);
+  AD_CORRECTNESS_CHECK(it != meta.data().end());
   Id id = it.getId();
   const auto& relationMetadata = meta.getMetaData(id);
   size_t offsetBegin = relationMetadata.offsetInBlock_;
diff --git a/src/index/LocatedTriples.h b/src/index/LocatedTriples.h
index e2a9735dc4..920f6d246e 100644
--- a/src/index/LocatedTriples.h
+++ b/src/index/LocatedTriples.h
@@ -121,9 +121,10 @@ class LocatedTriplesPerBlock {
   // Add the given `locatedTriple` to the given `LocatedTriplesPerBlock`.
   // Return a handle to where it was added (`LocatedTriples` is a sorted set,
   // see above). We need this handle so that we can easily remove the
-  // `locatedTriple` again from the set in case we need to.
+  // `locatedTriple` from the set again in case we need to.
   //
-  // The `locatedTriple` must not already exist in `LocatedTriplesPerBlock`.
+  // Precondition: The `locatedTriple` must not already exist in
+  // `LocatedTriplesPerBlock`.
   LocatedTriples::iterator add(const LocatedTriple& locatedTriple) {
     LocatedTriples& locatedTriples = map_[locatedTriple.blockIndex];
     auto [handle, wasInserted] = locatedTriples.emplace(locatedTriple);
@@ -133,7 +134,7 @@ class LocatedTriplesPerBlock {
     return handle;
   };
 
-  // Get the total number of `LocatedTriple` objects (for all blocks).
+  // Get the total number of `LocatedTriple`s (for all blocks).
   size_t numTriples() const { return numTriples_; }
 
   // Get the number of blocks with a non-empty set of located triples.
@@ -180,7 +181,7 @@ std::ostream& operator<<(std::ostream& os, const IdTable& idTable);
 // 1. The position is defined by the index of a block in the permutation and the
 // index of a row within that block.
 //
-// 2. If the triple in contained in the permutation, it is contained exactly
+// 2. If the triple is contained in the permutation, it is contained exactly
 // once and so there is a well defined block and position in that block.
 //
 // 2. If there is a block, where the first triple is smaller and the last triple
diff --git a/test/LocatedTriplesTest.cpp b/test/LocatedTriplesTest.cpp
index 4365b60466..4753104186 100644
--- a/test/LocatedTriplesTest.cpp
+++ b/test/LocatedTriplesTest.cpp
@@ -47,6 +47,7 @@ TEST_F(LocatedTriplesTest, numTriplesInBlock) {
   ASSERT_EQ(locatedTriplesPerBlock.numBlocks(), 3);
   ASSERT_EQ(locatedTriplesPerBlock.numTriples(), 7);
 
+  // Shorthand for creating a pair of counts.
   auto P = [](size_t n1, size_t n2) -> std::pair<size_t, size_t> {
     return {n1, n2};
   };
@@ -201,7 +202,7 @@ TEST_F(LocatedTriplesTest, locatedTriple) {
         IndexImpl indexBuilder(testAllocator);
         indexBuilder.setOnDiskBase(testIndexBasename);
         indexBuilder.blocksizePermutationPerColumn() = blockSize;
-        // The
+        // The function `createPermutationPair` expects a generator.
         IndexImpl::BlocksOfTriples blocksOfTriples =
             [&triplesInIndex]() -> cppcoro::generator<IdTableStatic<0>> {
           co_yield triplesInIndex.clone();