From fd1bdf86eeafd3046ef8dcd63761332e9d9be880 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 30 Jan 2024 13:17:52 +0100 Subject: [PATCH 01/92] Added graphblas dependecies --- CMakeLists.txt | 2 +- Dockerfile | 4 ++-- src/engine/CMakeLists.txt | 2 +- test/CMakeLists.txt | 4 +++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 40b32018d3..24832b41d7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -393,7 +393,7 @@ add_executable(IndexBuilderMain src/index/IndexBuilderMain.cpp) qlever_target_link_libraries(IndexBuilderMain index ${CMAKE_THREAD_LIBS_INIT} Boost::program_options) add_executable(ServerMain src/ServerMain.cpp) -qlever_target_link_libraries (ServerMain engine ${CMAKE_THREAD_LIBS_INIT} Boost::program_options) +qlever_target_link_libraries (ServerMain engine ${CMAKE_THREAD_LIBS_INIT} Boost::program_options graphblas) target_precompile_headers(ServerMain REUSE_FROM engine) add_executable(PrefixHeuristicEvaluatorMain src/PrefixHeuristicEvaluatorMain.cpp) diff --git a/Dockerfile b/Dockerfile index 611c014507..d1815356b3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository -y ppa:mhier/libboost-latest FROM base as builder -RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev +RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev libgraphblas6 libgraphblas-dev COPY . /app/ @@ -21,7 +21,7 @@ RUN ctest --rerun-failed --output-on-failure FROM base as runtime WORKDIR /app ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev +RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev libgraphblas6 libgraphblas-dev ARG UID=1000 RUN groupadd -r qlever && useradd --no-log-init -r -u $UID -g qlever qlever && chown qlever:qlever /app diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 239e10dc56..acd89c4e2e 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -11,5 +11,5 @@ add_library(engine Values.cpp Bind.cpp Minus.cpp RuntimeInformation.cpp CheckUsePatternTrick.cpp VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp - idTable/CompressedExternalIdTable.h) + idTable/CompressedExternalIdTable.h GrbMatrix.cpp) qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 65b94250c7..b082b593a4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -7,7 +7,7 @@ add_subdirectory(util) # general test utilities and all libraries that are specified as additional # arguments. function (linkTest basename) - qlever_target_link_libraries(${basename} ${ARGN} GTest::gtest GTest::gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) + qlever_target_link_libraries(${basename} ${ARGN} GTest::gtest GTest::gmock_main testUtil graphblas ${CMAKE_THREAD_LIBS_INIT}) endfunction() # Add the exectutable ${basename} that is compiled from the source file @@ -149,6 +149,8 @@ addLinkAndDiscoverTest(MultiColumnJoinTest engine) addLinkAndDiscoverTest(IdTableTest util) +addLinkAndDiscoverTest(GrbMatrixTest engine) + addLinkAndDiscoverTest(TransitivePathTest engine) addLinkAndDiscoverTest(BatchedPipelineTest) From 285528bbf7a46cb489e3e0e55b0d5f2e4d173051 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 30 Jan 2024 13:18:19 +0100 Subject: [PATCH 02/92] Added wrapper for graphblas matrix --- src/engine/GrbMatrix.cpp | 247 +++++++++++++++++++++++++++++++++++++++ src/engine/GrbMatrix.h | 93 +++++++++++++++ test/GrbMatrixTest.cpp | 230 ++++++++++++++++++++++++++++++++++++ 3 files changed, 570 insertions(+) create mode 100644 src/engine/GrbMatrix.cpp create mode 100644 src/engine/GrbMatrix.h create mode 100644 test/GrbMatrixTest.cpp diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp new file mode 100644 index 0000000000..d02936c904 --- /dev/null +++ b/src/engine/GrbMatrix.cpp @@ -0,0 +1,247 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.roland.herrmann@mars.uni-freiburg.de) + +#include "GrbMatrix.h" + +#include + +#include +#include + +#include "util/Exception.h" + +// _____________________________________________________________________________ +GrbMatrix GrbMatrix::copy() const { + GrB_Matrix matrixCopy; + auto info = GrB_Matrix_new(&matrixCopy, GrB_BOOL, nrows(), ncols()); + handleError(info); + info = GrB_Matrix_dup(&matrixCopy, *matrix_); + handleError(info); + + auto returnMatrix = GrbMatrix(); + returnMatrix.matrix_ = std::make_unique(matrixCopy); + + return returnMatrix; +} + +// _____________________________________________________________________________ +void GrbMatrix::setElement(size_t row, size_t col, bool value) { + auto info = GrB_Matrix_setElement_BOOL(*matrix_, value, row, col); + handleError(info); +} + +// _____________________________________________________________________________ +bool GrbMatrix::getElement(size_t row, size_t col) const { + bool result; + auto info = GrB_Matrix_extractElement_BOOL(&result, *matrix_, row, col); + handleError(info); + if (info == GrB_NO_VALUE) { + return false; + } + return result; +} + +// _____________________________________________________________________________ +GrbMatrix GrbMatrix::build(const std::vector rowIndices, + const std::vector colIndices, size_t numRows, + size_t numCols) { + auto matrix = GrbMatrix(numRows, numCols); + GrB_Index nvals = rowIndices.size(); + if (nvals == 0) { + return matrix; + } + + bool values[nvals]; + for (size_t i = 0; i < nvals; i++) { + values[i] = true; + } + auto info = + GrB_Matrix_build_BOOL(matrix.getMatrix(), &rowIndices[0], &colIndices[0], + values, nvals, GxB_IGNORE_DUP); + GrbMatrix::handleError(info); + return matrix; +} + +// _____________________________________________________________________________ +GrbMatrix GrbMatrix::diag(size_t nvals) { + auto result = GrbMatrix(nvals, nvals); + + for (size_t i = 0; i < nvals; i++) { + result.setElement(i, i, true); + } + + return result; +} + +// _____________________________________________________________________________ +std::vector> GrbMatrix::extractTuples() const { + size_t n = nvals(); + size_t rowIndices[n]; + size_t colIndices[n]; + bool values[n]; + auto info = GrB_Matrix_extractTuples_BOOL(rowIndices, colIndices, values, &n, + *matrix_); + GrbMatrix::handleError(info); + + std::vector> result; + for (size_t i = 0; i < n; i++) { + if (values[i]) { + result.push_back(std::make_pair(rowIndices[i], colIndices[i])); + } + } + return result; +} + +// _____________________________________________________________________________ +std::vector GrbMatrix::extractColumn(size_t colIndex) const { + std::unique_ptr columnVector = std::make_unique(); + size_t numRows = nrows(); + auto info = GrB_Vector_new(columnVector.get(), GrB_BOOL, numRows); + handleError(info); + + info = GrB_Col_extract(*columnVector, GrB_NULL, GrB_NULL, *matrix_, GrB_ALL, + numRows, colIndex, GrB_NULL); + handleError(info); + + size_t indices[numRows]; + bool values[numRows]; + std::unique_ptr nvals = std::make_unique(numRows); + info = GrB_Vector_extractTuples_BOOL(indices, values, nvals.get(), + *columnVector); + handleError(info); + + info = GrB_Vector_free(columnVector.get()); + handleError(info); + + std::vector vec; + vec.insert(vec.begin(), indices, indices + *nvals); + return vec; +} + +// _____________________________________________________________________________ +std::vector GrbMatrix::extractRow(size_t rowIndex) const { + GrbMatrix transposed = transpose(); + return transposed.extractColumn(rowIndex); +} + +// _____________________________________________________________________________ +size_t GrbMatrix::nvals() const { + size_t nvals; + auto info = GrB_Matrix_nvals(&nvals, *matrix_); + GrbMatrix::handleError(info); + return nvals; +} + +// _____________________________________________________________________________ +size_t GrbMatrix::nrows() const { + size_t nrows; + auto info = GrB_Matrix_nrows(&nrows, *matrix_); + GrbMatrix::handleError(info); + return nrows; +} + +// _____________________________________________________________________________ +size_t GrbMatrix::ncols() const { + size_t ncols; + auto info = GrB_Matrix_ncols(&ncols, *matrix_); + GrbMatrix::handleError(info); + return ncols; +} + +// _____________________________________________________________________________ +GrbMatrix GrbMatrix::transpose() const { + GrB_Matrix transposed; + auto info = GrB_Matrix_new(&transposed, GrB_BOOL, ncols(), nrows()); + handleError(info); + info = GrB_transpose(transposed, GrB_NULL, GrB_NULL, *matrix_, GrB_NULL); + handleError(info); + + GrbMatrix result = GrbMatrix(); + result.matrix_ = std::make_unique(transposed); + + return result; +} + +// _____________________________________________________________________________ +void GrbMatrix::accumulateMultiply(const GrbMatrix& otherMatrix) const { + auto info = GrB_mxm(*matrix_, GrB_NULL, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, + *matrix_, otherMatrix.getMatrix(), GrB_NULL); + handleError(info); +} + +// _____________________________________________________________________________ +GrbMatrix GrbMatrix::multiply(const GrbMatrix& otherMatrix) const { + size_t resultNumRows = nrows(); + size_t resultNumCols = otherMatrix.ncols(); + GrB_Matrix resultMatrix; + auto info = + GrB_Matrix_new(&resultMatrix, GrB_BOOL, resultNumRows, resultNumCols); + + info = GrB_mxm(resultMatrix, GrB_NULL, GrB_NULL, GrB_LOR_LAND_SEMIRING_BOOL, + *matrix_, otherMatrix.getMatrix(), GrB_NULL); + handleError(info); + + GrbMatrix result = GrbMatrix(resultNumRows, resultNumCols); + result.matrix_ = std::make_unique(resultMatrix); + return result; +} + +// _____________________________________________________________________________ +void GrbMatrix::handleError(GrB_Info info) { + switch (info) { + case GrB_SUCCESS: + return; + case GrB_NO_VALUE: + return; + // case GxB_EXHAUSTED: + // return; + case GrB_UNINITIALIZED_OBJECT: + AD_THROW("GraphBLAS error: object has not been initialized"); + case GrB_NULL_POINTER: + AD_THROW("GraphBLAS error: input pointer is NULL"); + case GrB_INVALID_VALUE: + AD_THROW("GraphBLAS error: generic error code; some value is bad"); + case GrB_INVALID_INDEX: + AD_THROW("GraphBLAS error: a row or column index is out of bounds"); + case GrB_DOMAIN_MISMATCH: + AD_THROW("GraphBLAS error: object domains are not compatible"); + case GrB_DIMENSION_MISMATCH: + AD_THROW("GraphBLAS error: matrix dimensions do not match"); + case GrB_OUTPUT_NOT_EMPTY: + AD_THROW("GraphBLAS error: output matrix already has values in it"); + case GrB_NOT_IMPLEMENTED: + AD_THROW("GraphBLAS error: not implemented in SuiteSparse:GraphBLAS"); + case GrB_PANIC: + AD_THROW("GraphBLAS error: unrecoverable error"); + case GrB_OUT_OF_MEMORY: + AD_THROW("GraphBLAS error: out of memory"); + case GrB_INSUFFICIENT_SPACE: + AD_THROW("GraphBLAS error: output array not large enough"); + case GrB_INVALID_OBJECT: + AD_THROW("GraphBLAS error: object is corrupted"); + case GrB_INDEX_OUT_OF_BOUNDS: + AD_THROW("GraphBLAS error: a row or column is out of bounds"); + case GrB_EMPTY_OBJECT: + AD_THROW("GraphBLAS error: an input scalar has no entry"); + } + AD_FAIL(); +} + +bool GrbMatrix::isInitialized_ = false; + +// _____________________________________________________________________________ +void GrbMatrix::initialize() { + if (!GrbMatrix::isInitialized_) { + GrB_init(GrB_NONBLOCKING); + GrbMatrix::isInitialized_ = true; + } +} + +// _____________________________________________________________________________ +void GrbMatrix::finalize() { + if (GrbMatrix::isInitialized_) { + GrB_finalize(); + GrbMatrix::isInitialized_ = false; + } +} diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h new file mode 100644 index 0000000000..50f11599ce --- /dev/null +++ b/src/engine/GrbMatrix.h @@ -0,0 +1,93 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.roland.herrmann@mars.uni-freiburg.de) + +#pragma once + +#include + +#include +#include + +// This class wraps the functionality of the GraphBLAS object GrB_Matrix. +// Currently only boolean matrices are supported. +class GrbMatrix { + private: + std::unique_ptr matrix_; + static bool isInitialized_; + + public: + GrbMatrix(size_t numRows, size_t numCols) { + matrix_ = std::make_unique(); + auto info = GrB_Matrix_new(matrix_.get(), GrB_BOOL, numRows, numCols); + handleError(info); + } + + GrbMatrix() { matrix_ = std::make_unique(); } + + // Move constructor + GrbMatrix(GrbMatrix&& otherMatrix) { + matrix_ = std::move(otherMatrix.matrix_); + }; + + // Disable copy constructor and assignment operator + GrbMatrix(const GrbMatrix&) = delete; + GrbMatrix& operator=(const GrbMatrix&) = delete; + + ~GrbMatrix() { GrB_Matrix_free(matrix_.get()); } + + GrbMatrix copy() const; + + void setElement(size_t row, size_t col, bool value); + + bool getElement(size_t row, size_t col) const; + + // Create a matrix from the given lists of indices. + // For each given pair of indices, the corresponding entry in the result + // matrix is set to true. All other entries are false (by default). + static GrbMatrix build(const std::vector rowIndices, + const std::vector colIndices, size_t numRows, + size_t numCols); + + // Create a square, diagonal matrix. All entries on the diagonal are set to + // true, all others to false. The resulting matrix will have nvals rows and + // columns. + static GrbMatrix diag(size_t nvals); + + // Extract all true entries from the matrix. The first entry in the pair is + // the row index, the second entry is the column index. + std::vector> extractTuples() const; + + // Extract a column from the matrix. Returns all row indices where this + // column's entries are true. + std::vector extractColumn(size_t colIndex) const; + + // Extract a row from the matrix. Returns all column indices where this + // rows's entries are true. + std::vector extractRow(size_t rowIndex) const; + + // Number of "true" values in the matrix. + size_t nvals() const; + + size_t nrows() const; + + size_t ncols() const; + + GrbMatrix transpose() const; + + // Multiply this matrix with the other matrix and accumulate the result in + // this matrix. Logical or is used for accumulation. + void accumulateMultiply(const GrbMatrix& otherMatrix) const; + + // Multiply this matrix with another matrix and write the result to a new + // matrix. + GrbMatrix multiply(const GrbMatrix& otherMatrix) const; + + static void initialize(); + + static void finalize(); + + private: + GrB_Matrix& getMatrix() const { return *matrix_; } + static void handleError(GrB_Info info); +}; diff --git a/test/GrbMatrixTest.cpp b/test/GrbMatrixTest.cpp new file mode 100644 index 0000000000..1d17cae50a --- /dev/null +++ b/test/GrbMatrixTest.cpp @@ -0,0 +1,230 @@ +#include + +#include "engine/GrbMatrix.h" +#include "gmock/gmock.h" + +TEST(GrbMatrixTest, constructor) { + GrbMatrix::initialize(); + + GrbMatrix matrix = GrbMatrix(2, 3); + size_t numRows = matrix.nrows(); + size_t numCols = matrix.ncols(); + size_t nvals = matrix.nvals(); + + GrbMatrix::finalize(); + + EXPECT_EQ(nvals, 0); + EXPECT_EQ(numRows, 2); + EXPECT_EQ(numCols, 3); +} + +TEST(GrbMatrixTest, copy) { + GrbMatrix::initialize(); + + GrbMatrix matrix1 = GrbMatrix(2, 2); + matrix1.setElement(0, 0, true); + + GrbMatrix matrix2 = matrix1.copy(); + + matrix1.setElement(1, 1, true); + + EXPECT_EQ(matrix2.getElement(0, 0), true); + EXPECT_EQ(matrix2.getElement(0, 1), false); + EXPECT_EQ(matrix2.getElement(1, 0), false); + EXPECT_EQ(matrix2.getElement(1, 1), false); + + GrbMatrix::finalize(); +} + +TEST(GrbMatrixTest, getSetElement) { + GrbMatrix::initialize(); + + GrbMatrix matrix = GrbMatrix(3, 3); + matrix.setElement(1, 0, true); + matrix.setElement(0, 2, true); + + bool elemOneZero = matrix.getElement(1, 0); + bool elemZeroTwo = matrix.getElement(0, 2); + bool elemOneTwo = matrix.getElement(1, 2); + size_t nvals = matrix.nvals(); + + GrbMatrix::finalize(); + + EXPECT_EQ(nvals, 2); + EXPECT_EQ(elemOneZero, true); + EXPECT_EQ(elemZeroTwo, true); + EXPECT_EQ(elemOneTwo, false); +} + +TEST(GrbMatrixTest, build) { + GrbMatrix::initialize(); + + std::vector rowIndices{0, 0, 1}; + std::vector colIndices{1, 2, 2}; + + GrbMatrix matrix = GrbMatrix::build(rowIndices, colIndices, 3, 3); + + EXPECT_EQ(false, matrix.getElement(0, 0)); + EXPECT_EQ(true, matrix.getElement(0, 1)); + EXPECT_EQ(true, matrix.getElement(0, 2)); + + EXPECT_EQ(false, matrix.getElement(1, 0)); + EXPECT_EQ(false, matrix.getElement(1, 1)); + EXPECT_EQ(true, matrix.getElement(1, 2)); + + EXPECT_EQ(false, matrix.getElement(2, 0)); + EXPECT_EQ(false, matrix.getElement(2, 1)); + EXPECT_EQ(false, matrix.getElement(2, 2)); + + GrbMatrix::finalize(); +} + +TEST(GrbMatrixTest, diag) { + GrbMatrix::initialize(); + + auto matrix = GrbMatrix::diag(3); + + EXPECT_EQ(true, matrix.getElement(0, 0)); + EXPECT_EQ(false, matrix.getElement(0, 1)); + EXPECT_EQ(false, matrix.getElement(0, 2)); + + EXPECT_EQ(false, matrix.getElement(1, 0)); + EXPECT_EQ(true, matrix.getElement(1, 1)); + EXPECT_EQ(false, matrix.getElement(1, 2)); + + EXPECT_EQ(false, matrix.getElement(2, 0)); + EXPECT_EQ(false, matrix.getElement(2, 1)); + EXPECT_EQ(true, matrix.getElement(2, 2)); + + GrbMatrix::finalize(); +} + +TEST(GrbMatrixTest, extractTuples) { + GrbMatrix::initialize(); + + GrbMatrix matrix = GrbMatrix(3, 3); + + matrix.setElement(0, 1, true); + matrix.setElement(0, 2, true); + matrix.setElement(1, 2, true); + + std::vector> tuples = matrix.extractTuples(); + + GrbMatrix::finalize(); + + std::vector> expected; + expected.push_back({0, 1}); + expected.push_back({0, 2}); + expected.push_back({1, 2}); + + EXPECT_THAT(tuples, testing::UnorderedElementsAreArray(expected)); +} + +TEST(GrbMatrixTest, extractColumn) { + GrbMatrix::initialize(); + + GrbMatrix matrix = GrbMatrix(3, 3); + + matrix.setElement(0, 1, true); + matrix.setElement(2, 1, true); + + std::vector colIndices = matrix.extractColumn(1); + + GrbMatrix::finalize(); + + std::vector expected{0, 2}; + + EXPECT_THAT(colIndices, testing::UnorderedElementsAreArray(expected)); +} + +TEST(GrbMatrixTest, multiplySquareMatrices) { + GrbMatrix::initialize(); + + GrbMatrix matrix1 = GrbMatrix(2, 2); + matrix1.setElement(0, 0, true); + matrix1.setElement(1, 1, true); + + GrbMatrix matrix2 = GrbMatrix(2, 2); + matrix2.setElement(0, 0, true); + matrix2.setElement(1, 0, true); + + GrbMatrix matrix3 = matrix1.multiply(matrix2); + + EXPECT_EQ(matrix3.getElement(0, 0), true); + EXPECT_EQ(matrix3.getElement(0, 1), false); + EXPECT_EQ(matrix3.getElement(1, 0), true); + EXPECT_EQ(matrix3.getElement(1, 1), false); + + GrbMatrix::finalize(); +} + +TEST(GrbMatrixTest, multiplyShapedMatrices) { + GrbMatrix::initialize(); + + GrbMatrix matrix1 = GrbMatrix(2, 3); + matrix1.setElement(0, 0, true); + matrix1.setElement(1, 1, true); + + GrbMatrix matrix2 = GrbMatrix(3, 2); + matrix2.setElement(0, 0, true); + matrix2.setElement(1, 0, true); + matrix2.setElement(2, 0, true); + + GrbMatrix matrix3 = matrix1.multiply(matrix2); + + EXPECT_EQ(matrix3.nrows(), 2); + EXPECT_EQ(matrix3.ncols(), 2); + EXPECT_EQ(matrix3.getElement(0, 0), true); + EXPECT_EQ(matrix3.getElement(0, 1), false); + EXPECT_EQ(matrix3.getElement(1, 0), true); + EXPECT_EQ(matrix3.getElement(1, 1), false); + + GrbMatrix::finalize(); +} + +TEST(GrbMatrixTest, transpose) { + GrbMatrix::initialize(); + + auto matrix = GrbMatrix(2, 3); + + matrix.setElement(0, 0, true); + matrix.setElement(0, 1, true); + matrix.setElement(0, 2, true); + + GrbMatrix result = matrix.transpose(); + + EXPECT_EQ(3, result.nrows()); + EXPECT_EQ(2, result.ncols()); + + EXPECT_EQ(true, result.getElement(0, 0)); + EXPECT_EQ(false, result.getElement(0, 1)); + + EXPECT_EQ(true, result.getElement(1, 0)); + EXPECT_EQ(false, result.getElement(1, 1)); + + EXPECT_EQ(true, result.getElement(2, 0)); + EXPECT_EQ(false, result.getElement(2, 1)); + + GrbMatrix::finalize(); +} + +TEST(GrbMatrixTest, accumulateMultiply) { + GrbMatrix::initialize(); + + GrbMatrix matrix1 = GrbMatrix(2, 2); + matrix1.setElement(0, 0, true); + matrix1.setElement(1, 1, true); + + GrbMatrix matrix2 = GrbMatrix(2, 2); + matrix2.setElement(0, 1, true); + matrix2.setElement(1, 0, true); + + matrix1.accumulateMultiply(matrix2); + + EXPECT_EQ(matrix1.getElement(0, 0), true); + EXPECT_EQ(matrix1.getElement(0, 1), true); + EXPECT_EQ(matrix1.getElement(1, 0), true); + EXPECT_EQ(matrix1.getElement(1, 1), true); + + GrbMatrix::finalize(); +} From 935bc0d8d746381b111969d7a9aacedc698737e1 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 30 Jan 2024 13:18:39 +0100 Subject: [PATCH 03/92] Replaced transitiveHull computation --- src/engine/TransitivePath.cpp | 343 ++++++++++++++++------------------ src/engine/TransitivePath.h | 182 ++++++++++++------ test/TransitivePathTest.cpp | 3 - 3 files changed, 289 insertions(+), 239 deletions(-) diff --git a/src/engine/TransitivePath.cpp b/src/engine/TransitivePath.cpp index c086561faf..5ff0b85a64 100644 --- a/src/engine/TransitivePath.cpp +++ b/src/engine/TransitivePath.cpp @@ -5,6 +5,9 @@ #include "TransitivePath.h" #include +#include +#include +#include #include "engine/CallFixedSize.h" #include "engine/ExportQueryExecutionTrees.h" @@ -187,19 +190,30 @@ void TransitivePath::computeTransitivePathBound( const TransitivePathSide& targetSide, const IdTable& startSideTable) const { IdTableStatic res = std::move(*dynRes).toStatic(); - auto [edges, nodes] = setupMapAndNodes( - dynSub, startSide, targetSide, startSideTable); + const IdTableView sub = dynSub.asStaticView(); + decltype(auto) startCol = sub.getColumn(startSide.subCol_); + decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); + + GrbMatrix::initialize(); + auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); + + std::span startNodes = + startSideTable.getColumn(startSide.treeAndCol_->second); + GrbMatrix startNodeMatrix = + setupStartNodeMatrix(startNodes, graph.nrows(), mapping); - Map hull(allocator()); + auto hull = std::make_unique( + transitiveHull(graph, std::make_optional(std::move(startNodeMatrix)))); if (!targetSide.isVariable()) { - hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); - } else { - hull = transitiveHull(edges, nodes, std::nullopt); + Id target = std::get(targetSide.value_); + size_t targetIndex = mapping.getIndex(target); + hull = std::make_unique(getTargetRow(*hull, targetIndex)); } TransitivePath::fillTableWithHull( - res, hull, nodes, startSide.outputCol_, targetSide.outputCol_, - startSideTable, startSide.treeAndCol_.value().second); + res, *hull, mapping, startSideTable, startNodes, startSide.outputCol_, + targetSide.outputCol_, startSide.treeAndCol_.value().second); + GrbMatrix::finalize(); *dynRes = std::move(res).toDynamic(); } @@ -211,19 +225,41 @@ void TransitivePath::computeTransitivePath( const TransitivePathSide& targetSide) const { IdTableStatic res = std::move(*dynRes).toStatic(); - auto [edges, nodes] = - setupMapAndNodes(dynSub, startSide, targetSide); + const IdTableView sub = dynSub.asStaticView(); + decltype(auto) startCol = sub.getColumn(startSide.subCol_); + decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); + + GrbMatrix::initialize(); + auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); - Map hull{allocator()}; - if (!targetSide.isVariable()) { - hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); + std::unique_ptr hull; + if (!startSide.isVariable()) { + const Id startNode[]{std::get(startSide.value_)}; + GrbMatrix startMatrix = + setupStartNodeMatrix(startNode, graph.nrows(), mapping); + hull = std::make_unique( + transitiveHull(graph, std::make_optional(std::move(startMatrix)))); } else { - hull = transitiveHull(edges, nodes, std::nullopt); + hull = std::make_unique(transitiveHull(graph, std::nullopt)); } - TransitivePath::fillTableWithHull(res, hull, startSide.outputCol_, - targetSide.outputCol_); + if (!targetSide.isVariable()) { + Id target = std::get(targetSide.value_); + size_t targetIndex = mapping.getIndex(target); + hull = std::make_unique(getTargetRow(*hull, targetIndex)); + } + + if (!startSide.isVariable()) { + const Id startNode[]{std::get(startSide.value_)}; + TransitivePath::fillTableWithHull(res, *hull, mapping, startNode, + startSide.outputCol_, + targetSide.outputCol_); + } else { + TransitivePath::fillTableWithHull( + res, *hull, mapping, startSide.outputCol_, targetSide.outputCol_); + } + GrbMatrix::finalize(); *dynRes = std::move(res).toDynamic(); } @@ -342,199 +378,154 @@ bool TransitivePath::isBoundOrId() const { } // _____________________________________________________________________________ -TransitivePath::Map TransitivePath::transitiveHull( - const Map& edges, const std::vector& startNodes, - std::optional target) const { - using MapIt = Map::const_iterator; - // For every node do a dfs on the graph - Map hull{allocator()}; - - // Stores nodes we already have a path to. This avoids cycles. - ad_utility::HashSetWithMemoryLimit marks{ - getExecutionContext()->getAllocator()}; - - // The stack used to store the dfs' progress - std::vector positions; - - // Used to store all edges leading away from a node for every level. - // Reduces access to the hashmap, and is safe as the map will not - // be modified after this point. - std::vector edgeCache; - - for (Id currentStartNode : startNodes) { - if (hull.contains(currentStartNode)) { - // We have already computed the hull for this node - continue; - } +GrbMatrix TransitivePath::transitiveHull( + const GrbMatrix& graph, std::optional startNodes) const { + size_t pathLength = 0; + std::unique_ptr result; - // Reset for this iteration - marks.clear(); + if (startNodes) { + result = std::make_unique(std::move(startNodes.value())); + } else { + result = std::make_unique(GrbMatrix::diag(graph.nrows())); + } - MapIt rootEdges = edges.find(currentStartNode); - if (rootEdges != edges.end()) { - positions.push_back(rootEdges->second.begin()); - edgeCache.push_back(&rootEdges->second); - } - if (minDist_ == 0 && - (!target.has_value() || currentStartNode == target.value())) { - insertIntoMap(hull, currentStartNode, currentStartNode); - } + if (minDist_ > 0) { + result = std::make_unique(result->multiply(graph)); + pathLength++; + } - // While we have not found the entire transitive hull and have not reached - // the max step limit - while (!positions.empty()) { - checkCancellation(); - size_t stackIndex = positions.size() - 1; - // Process the next child of the node at the top of the stack - Set::const_iterator& pos = positions[stackIndex]; - const Set* nodeEdges = edgeCache.back(); - - if (pos == nodeEdges->end()) { - // We finished processing this node - positions.pop_back(); - edgeCache.pop_back(); - continue; - } - - Id child = *pos; - ++pos; - size_t childDepth = positions.size(); - if (childDepth <= maxDist_ && marks.count(child) == 0) { - // process the child - if (childDepth >= minDist_) { - marks.insert(child); - if (!target.has_value() || child == target.value()) { - insertIntoMap(hull, currentStartNode, child); - } - } - // Add the child to the stack - MapIt it = edges.find(child); - if (it != edges.end()) { - positions.push_back(it->second.begin()); - edgeCache.push_back(&it->second); - } - } - } + size_t previousNvals = 0; + size_t nvals = result->nvals(); + while (nvals > previousNvals && pathLength < maxDist_) { + previousNvals = result->nvals(); + result->accumulateMultiply(graph); + nvals = result->nvals(); + pathLength++; } - return hull; + return std::move(*result); } // _____________________________________________________________________________ -template +template void TransitivePath::fillTableWithHull(IdTableStatic& table, - const Map& hull, std::vector& nodes, + const GrbMatrix& hull, + const IdMapping& mapping, size_t startSideCol, - size_t targetSideCol, - const IdTable& startSideTable, - size_t skipCol) { - IdTableView startView = - startSideTable.asStaticView(); + size_t targetSideCol) { + std::vector> pairs = hull.extractTuples(); + for (size_t i = 0; i < pairs.size(); i++) { + table.emplace_back(); + auto [startIndex, targetIndex] = pairs[i]; + Id startId = mapping.getId(startIndex); + Id targetId = mapping.getId(targetIndex); + table(i, startSideCol) = startId; + table(i, targetSideCol) = targetId; + } +} +// _____________________________________________________________________________ +template +void TransitivePath::fillTableWithHull(IdTableStatic& table, + const GrbMatrix& hull, + const IdMapping& mapping, + std::span startNodes, + size_t startSideCol, + size_t targetSideCol) { + size_t resultRowIndex = 0; size_t rowIndex = 0; - for (size_t i = 0; i < nodes.size(); i++) { - Id node = nodes[i]; - auto it = hull.find(node); - if (it == hull.end()) { - continue; - } - for (Id otherNode : it->second) { + for (auto startNode : startNodes) { + std::vector indices = hull.extractRow(rowIndex); + for (size_t index : indices) { + Id targetNode = mapping.getId(index); table.emplace_back(); - table(rowIndex, startSideCol) = node; - table(rowIndex, targetSideCol) = otherNode; - - TransitivePath::copyColumns(startView, table, i, - rowIndex, skipCol); - - rowIndex++; + table(resultRowIndex, startSideCol) = startNode; + table(resultRowIndex, targetSideCol) = targetNode; + resultRowIndex++; } + rowIndex++; } } // _____________________________________________________________________________ -template +template void TransitivePath::fillTableWithHull(IdTableStatic& table, - const Map& hull, size_t startSideCol, - size_t targetSideCol) { + const GrbMatrix& hull, + const IdMapping& mapping, + const IdTable& startSideTable, + std::span startNodes, + size_t startSideCol, + size_t targetSideCol, size_t skipCol) { + IdTableView startView = + startSideTable.asStaticView(); + + size_t resultRowIndex = 0; size_t rowIndex = 0; - for (auto const& [node, linkedNodes] : hull) { - for (Id linkedNode : linkedNodes) { + for (auto startNode : startNodes) { + std::vector indices = hull.extractRow(rowIndex); + for (size_t index : indices) { + Id targetNode = mapping.getId(index); table.emplace_back(); - table(rowIndex, startSideCol) = node; - table(rowIndex, targetSideCol) = linkedNode; + table(resultRowIndex, startSideCol) = startNode; + table(resultRowIndex, targetSideCol) = targetNode; - rowIndex++; + TransitivePath::copyColumns( + startView, table, rowIndex, resultRowIndex, skipCol); + resultRowIndex++; } + rowIndex++; } } // _____________________________________________________________________________ -template -std::pair> -TransitivePath::setupMapAndNodes(const IdTable& sub, - const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, - const IdTable& startSideTable) const { - std::vector nodes; - Map edges = setupEdgesMap(sub, startSide, targetSide); - - // Bound -> var|id - std::span startNodes = setupNodes( - startSideTable, startSide.treeAndCol_.value().second); - nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); - - return {std::move(edges), std::move(nodes)}; +GrbMatrix TransitivePath::getTargetRow(GrbMatrix& hull, + size_t targetIndex) const { + GrbMatrix transformer = GrbMatrix(hull.ncols(), hull.ncols()); + transformer.setElement(targetIndex, targetIndex, true); + return hull.multiply(transformer); } // _____________________________________________________________________________ -template -std::pair> -TransitivePath::setupMapAndNodes(const IdTable& sub, - const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const { - std::vector nodes; - Map edges = setupEdgesMap(sub, startSide, targetSide); - - // id -> var|id - if (!startSide.isVariable()) { - nodes.push_back(std::get(startSide.value_)); - // var -> var - } else { - std::span startNodes = - setupNodes(sub, startSide.subCol_); - nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); - if (minDist_ == 0) { - std::span targetNodes = - setupNodes(sub, targetSide.subCol_); - nodes.insert(nodes.end(), targetNodes.begin(), targetNodes.end()); - } +std::tuple TransitivePath::setupMatrix( + std::span startCol, std::span targetCol, + size_t numRows) const { + std::vector rowIndices; + std::vector colIndices; + IdMapping mapping; + + for (size_t i = 0; i < numRows; i++) { + auto startId = startCol[i]; + auto targetId = targetCol[i]; + auto startIndex = mapping.addId(startId); + auto targetIndex = mapping.addId(targetId); + + rowIndices.push_back(startIndex); + colIndices.push_back(targetIndex); } - return {std::move(edges), std::move(nodes)}; + auto matrix = + GrbMatrix::build(rowIndices, colIndices, mapping.size(), mapping.size()); + return {std::move(matrix), std::move(mapping)}; } // _____________________________________________________________________________ -template -TransitivePath::Map TransitivePath::setupEdgesMap( - const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const { - const IdTableView sub = dynSub.asStaticView(); - Map edges{allocator()}; - decltype(auto) startCol = sub.getColumn(startSide.subCol_); - decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); - - for (size_t i = 0; i < sub.size(); i++) { - checkCancellation(); - insertIntoMap(edges, startCol[i], targetCol[i]); +GrbMatrix TransitivePath::setupStartNodeMatrix(std::span startIds, + size_t numCols, + IdMapping mapping) const { + // stardIds.size() is the maximum possible number of columns for the + // startMatrix, but if some start node does not have a link in the graph it + // will be skipped, resulting in a zero column at the end of the startMatrix + GrbMatrix startMatrix = GrbMatrix(startIds.size(), numCols); + size_t rowIndex = 0; + for (Id id : startIds) { + if (!mapping.isContained(id)) { + continue; + } + size_t colIndex = mapping.getIndex(id); + startMatrix.setElement(rowIndex, colIndex, true); + rowIndex++; } - return edges; -} - -// _____________________________________________________________________________ -template -std::span TransitivePath::setupNodes(const IdTable& table, - size_t col) { - return table.getColumn(col); + return startMatrix; } // _____________________________________________________________________________ @@ -556,9 +547,3 @@ void TransitivePath::copyColumns(const IdTableView& inputTable, outCol++; } } - -// _____________________________________________________________________________ -void TransitivePath::insertIntoMap(Map& map, Id key, Id value) const { - auto [it, success] = map.try_emplace(key, allocator()); - it->second.insert(value); -} diff --git a/src/engine/TransitivePath.h b/src/engine/TransitivePath.h index ebae62d3fa..ab3c4414f8 100644 --- a/src/engine/TransitivePath.h +++ b/src/engine/TransitivePath.h @@ -4,8 +4,10 @@ #pragma once +#include #include +#include "engine/GrbMatrix.h" #include "engine/Operation.h" #include "engine/QueryExecutionTree.h" #include "engine/idTable/IdTable.h" @@ -56,6 +58,36 @@ struct TransitivePathSide { } }; +// This struct keeps track of the mapping between Ids and matrix indices +struct IdMapping { + constexpr static auto hash = [](Id id) { + return std::hash{}(id.getBits()); + }; + std::unordered_map> idMap_{}; + + std::vector indexMap_; + + size_t nextIndex_ = 0; + + bool isContained(Id id) { return idMap_.contains(id); } + + size_t addId(Id id) { + if (!idMap_.contains(id)) { + idMap_.insert({id, nextIndex_}); + indexMap_.push_back(id); + nextIndex_++; + return nextIndex_ - 1; + } + return idMap_[id]; + } + + Id getId(size_t index) const { return indexMap_.at(index); } + + size_t getIndex(const Id& id) const { return idMap_.at(id); } + + size_t size() const { return indexMap_.size(); } +}; + class TransitivePath : public Operation { // We deliberately use the `std::` variants of a hash set and hash map because // `absl`s types are not exception safe. @@ -206,20 +238,18 @@ class TransitivePath : public Operation { bool isLeft) const; /** - * @brief Compute the transitive hull starting at the given nodes, - * using the given Map. + * @brief Compute the transitive hull of the graph. If given startNodes, + * compute the transitive hull starting at the startNodes. * - * @param edges Adjacency lists, mapping Ids (nodes) to their connected - * Ids. - * @param nodes A list of Ids. These Ids are used as starting points for the - * transitive hull. Thus, this parameter guides the performance of this - * algorithm. - * @param target Optional target Id. If supplied, only paths which end - * in this Id are added to the hull. - * @return Map Maps each Id to its connected Ids in the transitive hull + * @param graph Boolean, square, sparse, adjacency matrix. Row i, column j is + * true, iff. there is an edge going from i to j in the graph. + * @param startNodes Boolean, sparse, adjacency matrix, marking the start + * nodes. There is one row for each start node. The number of columns has to + * be equal to the number of columns of the graph matrix. + * @return An adjacency matrix containing the transitive hull */ - Map transitiveHull(const Map& edges, const std::vector& startNodes, - std::optional target) const; + GrbMatrix transitiveHull(const GrbMatrix& graph, + std::optional startNodes) const; /** * @brief Fill the given table with the transitive hull and use the @@ -241,11 +271,6 @@ class TransitivePath : public Operation { * @param skipCol This column contains the Ids of the start side in the * startSideTable and will be skipped. */ - template - static void fillTableWithHull(IdTableStatic& table, const Map& hull, - std::vector& nodes, size_t startSideCol, - size_t targetSideCol, - const IdTable& startSideTable, size_t skipCol); /** * @brief Fill the given table with the transitive hull. @@ -259,62 +284,105 @@ class TransitivePath : public Operation { * @param targetSideCol The column of the result table for the targetSide of * the hull */ + + /** + * @brief Fill the IdTable with the given transitive hull. + * + * @tparam WIDTH The number of columns of the result table. + * @param table The result table which will be filled. + * @param hull The transitive hull. Represented by a sparse, boolean adjacency + * matrix + * @param mapping IdMapping, which maps Ids to matrix indices and vice versa. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull + */ template - static void fillTableWithHull(IdTableStatic& table, const Map& hull, + static void fillTableWithHull(IdTableStatic& table, + const GrbMatrix& hull, const IdMapping& mapping, size_t startSideCol, size_t targetSideCol); /** - * @brief Prepare a Map and a nodes vector for the transitive hull - * computation. + * @brief Fill the IdTable with the given transitive hull. This function is + * used in case the hull computation has one (or more) Ids as start nodes. * - * @tparam SUB_WIDTH Number of columns of the sub table - * @tparam SIDE_WIDTH Number of columns of the startSideTable - * @param sub The sub table result - * @param startSide The TransitivePathSide where the edges start - * @param targetSide The TransitivePathSide where the edges end - * @param startSideTable An IdTable containing the Ids for the startSide - * @return std::pair> A Map and Id vector (nodes) for the - * transitive hull computation + * @tparam WIDTH The number of columns of the result table. + * @param table The result table which will be filled. + * @param hull The transitive hull. Represented by a sparse, boolean adjacency + * matrix + * @param mapping IdMapping, which maps Ids to matrix indices and vice versa. + * @param startNodes Ids of the start nodes. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull */ - template - std::pair> setupMapAndNodes( - const IdTable& sub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, - const IdTable& startSideTable) const; + template + static void fillTableWithHull(IdTableStatic& table, + const GrbMatrix& hull, const IdMapping& mapping, + std::span startNodes, + size_t startSideCol, size_t targetSideCol); /** - * @brief Prepare a Map and a nodes vector for the transitive hull - * computation. + * @brief Fill the IdTable with the given transitive hull. This function is + * used if the start side was already bound and there is an IdTable from which + * data has to be copied to the result table. * - * @tparam SUB_WIDTH Number of columns of the sub table - * @param sub The sub table result - * @param startSide The TransitivePathSide where the edges start - * @param targetSide The TransitivePathSide where the edges end - * @return std::pair> A Map and Id vector (nodes) for the - * transitive hull computation + * @tparam WIDTH The number of columns of the result table. + * @tparam START_WIDTH The number of columns of the start table. + * @param table The result table which will be filled. + * @param hull The transitive hull. Represented by a sparse, boolean adjacency + * matrix + * @param mapping IdMapping, which maps Ids to matrix indices and vice versa. + * @param startNodes Ids of the start nodes. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull + * @param skipCol This column contains the Ids of the start side in the + * startSideTable and will be skipped. */ - template - std::pair> setupMapAndNodes( - const IdTable& sub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const; + template + static void fillTableWithHull(IdTableStatic& table, + const GrbMatrix& hull, const IdMapping& mapping, + const IdTable& startSideTable, + std::span startNodes, + size_t startSideCol, size_t targetSideCol, + size_t skipCol); - // initialize the map from the subresult - template - Map setupEdgesMap(const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const; + GrbMatrix getTargetRow(GrbMatrix& hull, size_t targetIndex) const; - // initialize a vector for the starting nodes (Ids) - template - static std::span setupNodes(const IdTable& table, size_t col); + /** + * @brief Create a boolean, sparse adjacency matrix from the given edges. The + * edges are given as lists, where one list contains the start node of the + * edge and the other list contains the target node of the edge. + * Also create an IdMapping, which maps the given Ids to matrix indices. + * + * @param startCol Column from the IdTable, which contains edge start nodes + * @param targetCol Column from the IdTable, which contains edge target nodes + * @param numRows Number of rows in the IdTable + */ + std::tuple setupMatrix(std::span startCol, + std::span targetCol, + size_t numRows) const; + + /** + * @brief Create a boolean, sparse, adjacency matrix which holds the starting + * nodes for the transitive hull computation. + * + * @param startIds List of Ids where the transitive hull computation should + * start + * @param numRows Number of rows in the IdTable where startIds comes from + * @param mapping An IdMapping between Ids and matrix indices + * @return Matrix with one row for each start node + */ + GrbMatrix setupStartNodeMatrix(std::span startIds, size_t numRows, + IdMapping mapping) const; // Copy the columns from the input table to the output table template static void copyColumns(const IdTableView& inputTable, IdTableStatic& outputTable, size_t inputRow, size_t outputRow, size_t skipCol); - - // A small helper function: Insert the `value` to the set at `map[key]`. - // As the sets all have an allocator with memory limit, this construction is a - // little bit more involved, so this can be a separate helper function. - void insertIntoMap(Map& map, Id key, Id value) const; }; diff --git a/test/TransitivePathTest.cpp b/test/TransitivePathTest.cpp index 8cd803755d..e1a631c110 100644 --- a/test/TransitivePathTest.cpp +++ b/test/TransitivePathTest.cpp @@ -4,13 +4,10 @@ #include -#include - #include "./IndexTestHelpers.h" #include "./util/AllocatorTestHelpers.h" #include "./util/IdTestHelpers.h" #include "engine/TransitivePath.h" -#include "global/Id.h" using ad_utility::testing::getQec; using ad_utility::testing::makeAllocator; From 069819fff247ed817fb30313bcb3daaabd4b9770 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 31 Jan 2024 14:52:46 +0100 Subject: [PATCH 04/92] Added extern keyword around include --- Dockerfile | 4 ++-- src/engine/GrbMatrix.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index d1815356b3..0af15c76f8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository -y ppa:mhier/libboost-latest FROM base as builder -RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev libgraphblas6 libgraphblas-dev +RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev libgraphblas-dev COPY . /app/ @@ -21,7 +21,7 @@ RUN ctest --rerun-failed --output-on-failure FROM base as runtime WORKDIR /app ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev libgraphblas6 libgraphblas-dev +RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev libgraphblas-dev ARG UID=1000 RUN groupadd -r qlever && useradd --no-log-init -r -u $UID -g qlever qlever && chown qlever:qlever /app diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h index 50f11599ce..102194ec24 100644 --- a/src/engine/GrbMatrix.h +++ b/src/engine/GrbMatrix.h @@ -4,7 +4,9 @@ #pragma once +extern "C" { #include +} #include #include From 4b257fe487dab155eb3f1963cb867e29afc43590 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 31 Jan 2024 14:57:48 +0100 Subject: [PATCH 05/92] Replaced std map with abseil map --- src/engine/TransitivePath.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/engine/TransitivePath.h b/src/engine/TransitivePath.h index ab3c4414f8..d50abd47e3 100644 --- a/src/engine/TransitivePath.h +++ b/src/engine/TransitivePath.h @@ -11,6 +11,7 @@ #include "engine/Operation.h" #include "engine/QueryExecutionTree.h" #include "engine/idTable/IdTable.h" +#include "util/HashMap.h" using TreeAndCol = std::pair, size_t>; struct TransitivePathSide { @@ -60,10 +61,7 @@ struct TransitivePathSide { // This struct keeps track of the mapping between Ids and matrix indices struct IdMapping { - constexpr static auto hash = [](Id id) { - return std::hash{}(id.getBits()); - }; - std::unordered_map> idMap_{}; + ad_utility::HashMap idMap_{}; std::vector indexMap_; From d4ba6f6b3d1004e6252e63c4d5f7288b259ca2cf Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 31 Jan 2024 15:28:08 +0100 Subject: [PATCH 06/92] Added graphblas dependency for GH action --- .github/workflows/install-dependencies-ubuntu/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/install-dependencies-ubuntu/action.yml b/.github/workflows/install-dependencies-ubuntu/action.yml index b5c248dfed..204c8f0448 100644 --- a/.github/workflows/install-dependencies-ubuntu/action.yml +++ b/.github/workflows/install-dependencies-ubuntu/action.yml @@ -19,7 +19,7 @@ runs: - name: Install third-party libraries if: inputs.install-third-party-libraries == 'true' run: | - sudo apt-get install -y libicu-dev tzdata libzstd-dev libjemalloc-dev + sudo apt-get install -y libicu-dev tzdata libzstd-dev libjemalloc-dev libgraphblas-dev shell: bash - name: Install boost from PPA From 1b20a1e176cd0afdc1f75a2d9b3f4dcda47027cb Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 31 Jan 2024 15:34:39 +0100 Subject: [PATCH 07/92] Added library for mac build --- .github/workflows/macos.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index eb9de07034..2d33bc8a89 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -41,6 +41,7 @@ jobs: run: | brew install llvm@16 brew install conan@2 + brew install suite-sparse echo 'export PATH="/usr/local/opt/llvm@16/bin:$PATH"' >> ~/.bash_profile echo PATH="/usr/local/opt/llvm@16/bin:$PATH" >> $GITHUB_ENV echo 'export LDFLAGS="-L/usr/local/opt/llvm@16/lib -L/usr/local/opt/llvm@16/lib/c++ -Wl,-rpath,/usr/local/opt/llvm@16/lib/c++"' >> ~/.bash_profile From 92621d086d3d45db15b933b6f7a5156f9c4a31b6 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 31 Jan 2024 15:44:59 +0100 Subject: [PATCH 08/92] Removed finalize() --- src/engine/TransitivePath.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engine/TransitivePath.cpp b/src/engine/TransitivePath.cpp index 5ff0b85a64..275ddbf0fc 100644 --- a/src/engine/TransitivePath.cpp +++ b/src/engine/TransitivePath.cpp @@ -213,7 +213,6 @@ void TransitivePath::computeTransitivePathBound( TransitivePath::fillTableWithHull( res, *hull, mapping, startSideTable, startNodes, startSide.outputCol_, targetSide.outputCol_, startSide.treeAndCol_.value().second); - GrbMatrix::finalize(); *dynRes = std::move(res).toDynamic(); } @@ -259,7 +258,6 @@ void TransitivePath::computeTransitivePath( res, *hull, mapping, startSide.outputCol_, targetSide.outputCol_); } - GrbMatrix::finalize(); *dynRes = std::move(res).toDynamic(); } @@ -398,7 +396,9 @@ GrbMatrix TransitivePath::transitiveHull( size_t nvals = result->nvals(); while (nvals > previousNvals && pathLength < maxDist_) { previousNvals = result->nvals(); + // Row major, Column major result->accumulateMultiply(graph); + // Add check cancellation nvals = result->nvals(); pathLength++; } From b3d71bd1cf5174ff4a092225438b20ea933f9a15 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 31 Jan 2024 21:07:46 +0100 Subject: [PATCH 09/92] Added fallback for GraphBLAS --- src/engine/TransitivePath.cpp | 292 ++++++++++++++++++++++++++++++++-- src/engine/TransitivePath.h | 118 ++++++++++++++ 2 files changed, 399 insertions(+), 11 deletions(-) diff --git a/src/engine/TransitivePath.cpp b/src/engine/TransitivePath.cpp index 275ddbf0fc..f1391450cf 100644 --- a/src/engine/TransitivePath.cpp +++ b/src/engine/TransitivePath.cpp @@ -217,6 +217,30 @@ void TransitivePath::computeTransitivePathBound( *dynRes = std::move(res).toDynamic(); } +// _____________________________________________________________________________ +template +void TransitivePath::computeTransitivePathBoundFallback( + IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, const IdTable& startSideTable) const { + IdTableStatic res = std::move(*dynRes).toStatic(); + + auto [edges, nodes] = setupMapAndNodes( + dynSub, startSide, targetSide, startSideTable); + + Map hull(allocator()); + if (!targetSide.isVariable()) { + hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); + } else { + hull = transitiveHull(edges, nodes, std::nullopt); + } + + TransitivePath::fillTableWithHull( + res, hull, nodes, startSide.outputCol_, targetSide.outputCol_, + startSideTable, startSide.treeAndCol_.value().second); + + *dynRes = std::move(res).toDynamic(); +} + // _____________________________________________________________________________ template void TransitivePath::computeTransitivePath( @@ -261,6 +285,29 @@ void TransitivePath::computeTransitivePath( *dynRes = std::move(res).toDynamic(); } +// _____________________________________________________________________________ +template +void TransitivePath::computeTransitivePathFallback( + IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + IdTableStatic res = std::move(*dynRes).toStatic(); + + auto [edges, nodes] = + setupMapAndNodes(dynSub, startSide, targetSide); + + Map hull{allocator()}; + if (!targetSide.isVariable()) { + hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); + } else { + hull = transitiveHull(edges, nodes, std::nullopt); + } + + TransitivePath::fillTableWithHull(res, hull, startSide.outputCol_, + targetSide.outputCol_); + + *dynRes = std::move(res).toDynamic(); +} + // _____________________________________________________________________________ ResultTable TransitivePath::computeResult() { if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() && @@ -277,17 +324,26 @@ ResultTable TransitivePath::computeResult() { size_t subWidth = subRes->idTable().numColumns(); - auto computeForOneSide = [this, &idTable, subRes, subWidth]( + bool useFallback = false; + + auto computeForOneSide = [this, &idTable, subRes, subWidth, useFallback]( auto& boundSide, auto& otherSide) -> ResultTable { shared_ptr sideRes = boundSide.treeAndCol_.value().first->getResult(); size_t sideWidth = sideRes->idTable().numColumns(); - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), - &TransitivePath::computeTransitivePathBound, this, &idTable, - subRes->idTable(), boundSide, otherSide, - sideRes->idTable()); + if (useFallback) { + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), + &TransitivePath::computeTransitivePathBoundFallback, this, + &idTable, subRes->idTable(), boundSide, otherSide, + sideRes->idTable()); + } else { + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), + &TransitivePath::computeTransitivePathBound, this, + &idTable, subRes->idTable(), boundSide, otherSide, + sideRes->idTable()); + } return {std::move(idTable), resultSortedOn(), ResultTable::getSharedLocalVocabFromNonEmptyOf(*sideRes, *subRes)}; @@ -299,15 +355,27 @@ ResultTable TransitivePath::computeResult() { return computeForOneSide(rhs_, lhs_); // Right side is an Id } else if (!rhs_.isVariable()) { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePath::computeTransitivePath, this, &idTable, - subRes->idTable(), rhs_, lhs_); + if (useFallback) { + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), + &TransitivePath::computeTransitivePathFallback, this, + &idTable, subRes->idTable(), rhs_, lhs_); + } else { + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), + &TransitivePath::computeTransitivePath, this, &idTable, + subRes->idTable(), rhs_, lhs_); + } // No side is a bound variable, the right side is an unbound variable // and the left side is either an unbound Variable or an ID. } else { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePath::computeTransitivePath, this, &idTable, - subRes->idTable(), lhs_, rhs_); + if (useFallback) { + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), + &TransitivePath::computeTransitivePathFallback, this, + &idTable, subRes->idTable(), lhs_, rhs_); + } else { + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), + &TransitivePath::computeTransitivePath, this, &idTable, + subRes->idTable(), lhs_, rhs_); + } } // NOTE: The only place, where the input to a transitive path operation is not @@ -405,6 +473,84 @@ GrbMatrix TransitivePath::transitiveHull( return std::move(*result); } +// _____________________________________________________________________________ +TransitivePath::Map TransitivePath::transitiveHull( + const Map& edges, const std::vector& startNodes, + std::optional target) const { + using MapIt = Map::const_iterator; + // For every node do a dfs on the graph + Map hull{allocator()}; + + // Stores nodes we already have a path to. This avoids cycles. + ad_utility::HashSetWithMemoryLimit marks{ + getExecutionContext()->getAllocator()}; + + // The stack used to store the dfs' progress + std::vector positions; + + // Used to store all edges leading away from a node for every level. + // Reduces access to the hashmap, and is safe as the map will not + // be modified after this point. + std::vector edgeCache; + + for (Id currentStartNode : startNodes) { + if (hull.contains(currentStartNode)) { + // We have already computed the hull for this node + continue; + } + + // Reset for this iteration + marks.clear(); + + MapIt rootEdges = edges.find(currentStartNode); + if (rootEdges != edges.end()) { + positions.push_back(rootEdges->second.begin()); + edgeCache.push_back(&rootEdges->second); + } + if (minDist_ == 0 && + (!target.has_value() || currentStartNode == target.value())) { + insertIntoMap(hull, currentStartNode, currentStartNode); + } + + // While we have not found the entire transitive hull and have not reached + // the max step limit + while (!positions.empty()) { + checkCancellation(); + size_t stackIndex = positions.size() - 1; + // Process the next child of the node at the top of the stack + Set::const_iterator& pos = positions[stackIndex]; + const Set* nodeEdges = edgeCache.back(); + + if (pos == nodeEdges->end()) { + // We finished processing this node + positions.pop_back(); + edgeCache.pop_back(); + continue; + } + + Id child = *pos; + ++pos; + size_t childDepth = positions.size(); + if (childDepth <= maxDist_ && marks.count(child) == 0) { + // process the child + if (childDepth >= minDist_) { + marks.insert(child); + if (!target.has_value() || child == target.value()) { + insertIntoMap(hull, currentStartNode, child); + } + } + // Add the child to the stack + MapIt it = edges.find(child); + if (it != edges.end()) { + positions.push_back(it->second.begin()); + edgeCache.push_back(&it->second); + } + } + } + } + return hull; +} + // _____________________________________________________________________________ template void TransitivePath::fillTableWithHull(IdTableStatic& table, @@ -477,6 +623,124 @@ void TransitivePath::fillTableWithHull(IdTableStatic& table, } } +// _____________________________________________________________________________ +template +void TransitivePath::fillTableWithHull(IdTableStatic& table, + const Map& hull, std::vector& nodes, + size_t startSideCol, + size_t targetSideCol, + const IdTable& startSideTable, + size_t skipCol) { + IdTableView startView = + startSideTable.asStaticView(); + + size_t rowIndex = 0; + for (size_t i = 0; i < nodes.size(); i++) { + Id node = nodes[i]; + auto it = hull.find(node); + if (it == hull.end()) { + continue; + } + + for (Id otherNode : it->second) { + table.emplace_back(); + table(rowIndex, startSideCol) = node; + table(rowIndex, targetSideCol) = otherNode; + + TransitivePath::copyColumns(startView, table, i, + rowIndex, skipCol); + + rowIndex++; + } + } +} + +// _____________________________________________________________________________ +template +void TransitivePath::fillTableWithHull(IdTableStatic& table, + const Map& hull, size_t startSideCol, + size_t targetSideCol) { + size_t rowIndex = 0; + for (auto const& [node, linkedNodes] : hull) { + for (Id linkedNode : linkedNodes) { + table.emplace_back(); + table(rowIndex, startSideCol) = node; + table(rowIndex, targetSideCol) = linkedNode; + + rowIndex++; + } + } +} + +// _____________________________________________________________________________ +template +std::pair> +TransitivePath::setupMapAndNodes(const IdTable& sub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, + const IdTable& startSideTable) const { + std::vector nodes; + Map edges = setupEdgesMap(sub, startSide, targetSide); + + // Bound -> var|id + std::span startNodes = setupNodes( + startSideTable, startSide.treeAndCol_.value().second); + nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); + + return {std::move(edges), std::move(nodes)}; +} + +// _____________________________________________________________________________ +template +std::pair> +TransitivePath::setupMapAndNodes(const IdTable& sub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + std::vector nodes; + Map edges = setupEdgesMap(sub, startSide, targetSide); + + // id -> var|id + if (!startSide.isVariable()) { + nodes.push_back(std::get(startSide.value_)); + // var -> var + } else { + std::span startNodes = + setupNodes(sub, startSide.subCol_); + nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); + if (minDist_ == 0) { + std::span targetNodes = + setupNodes(sub, targetSide.subCol_); + nodes.insert(nodes.end(), targetNodes.begin(), targetNodes.end()); + } + } + + return {std::move(edges), std::move(nodes)}; +} + +// _____________________________________________________________________________ +template +TransitivePath::Map TransitivePath::setupEdgesMap( + const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + const IdTableView sub = dynSub.asStaticView(); + Map edges{allocator()}; + decltype(auto) startCol = sub.getColumn(startSide.subCol_); + decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); + + for (size_t i = 0; i < sub.size(); i++) { + checkCancellation(); + insertIntoMap(edges, startCol[i], targetCol[i]); + } + return edges; +} + +// _____________________________________________________________________________ +template +std::span TransitivePath::setupNodes(const IdTable& table, + size_t col) { + return table.getColumn(col); +} + // _____________________________________________________________________________ GrbMatrix TransitivePath::getTargetRow(GrbMatrix& hull, size_t targetIndex) const { @@ -547,3 +811,9 @@ void TransitivePath::copyColumns(const IdTableView& inputTable, outCol++; } } + +// _____________________________________________________________________________ +void TransitivePath::insertIntoMap(Map& map, Id key, Id value) const { + auto [it, success] = map.try_emplace(key, allocator()); + it->second.insert(value); +} diff --git a/src/engine/TransitivePath.h b/src/engine/TransitivePath.h index d50abd47e3..cf3947822d 100644 --- a/src/engine/TransitivePath.h +++ b/src/engine/TransitivePath.h @@ -199,6 +199,12 @@ class TransitivePath : public Operation { const TransitivePathSide& targetSide, const IdTable& startSideTable) const; + template + void computeTransitivePathBoundFallback(IdTable* res, const IdTable& sub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, + const IdTable& startSideTable) const; + /** * @brief Compute the transitive hull. * This function is called when no side is bound (or an id). @@ -215,6 +221,11 @@ class TransitivePath : public Operation { const TransitivePathSide& startSide, const TransitivePathSide& targetSide) const; + template + void computeTransitivePathFallback( + IdTable* res, const IdTable& sub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const; + private: /** * @brief Compute the result for this TransitivePath operation @@ -249,6 +260,22 @@ class TransitivePath : public Operation { GrbMatrix transitiveHull(const GrbMatrix& graph, std::optional startNodes) const; + /** + * @brief Compute the transitive hull starting at the given nodes, + * using the given Map. + * + * @param edges Adjacency lists, mapping Ids (nodes) to their connected + * Ids. + * @param nodes A list of Ids. These Ids are used as starting points for the + * transitive hull. Thus, this parameter guides the performance of this + * algorithm. + * @param target Optional target Id. If supplied, only paths which end + * in this Id are added to the hull. + * @return Map Maps each Id to its connected Ids in the transitive hull + */ + Map transitiveHull(const Map& edges, const std::vector& startNodes, + std::optional target) const; + /** * @brief Fill the given table with the transitive hull and use the * startSideTable to fill in the rest of the columns. @@ -349,6 +376,92 @@ class TransitivePath : public Operation { size_t startSideCol, size_t targetSideCol, size_t skipCol); + /** + * @brief Fill the given table with the transitive hull and use the + * startSideTable to fill in the rest of the columns. + * This function is called if the start side is bound and a variable. + * + * @tparam WIDTH The number of columns of the result table. + * @tparam START_WIDTH The number of columns of the start table. + * @param table The result table which will be filled. + * @param hull The transitive hull. + * @param nodes The start nodes of the transitive hull. These need to be in + * the same order and amount as the starting side nodes in the startTable. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull + * @param startSideTable An IdTable that holds other results. The other + * results will be transferred to the new result table. + * @param skipCol This column contains the Ids of the start side in the + * startSideTable and will be skipped. + */ + template + static void fillTableWithHull(IdTableStatic& table, const Map& hull, + std::vector& nodes, size_t startSideCol, + size_t targetSideCol, + const IdTable& startSideTable, size_t skipCol); + + /** + * @brief Fill the given table with the transitive hull. + * This function is called if the sides are unbound or ids. + * + * @tparam WIDTH The number of columns of the result table. + * @param table The result table which will be filled. + * @param hull The transitive hull. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull + */ + template + static void fillTableWithHull(IdTableStatic& table, const Map& hull, + size_t startSideCol, size_t targetSideCol); + + /** + * @brief Prepare a Map and a nodes vector for the transitive hull + * computation. + * + * @tparam SUB_WIDTH Number of columns of the sub table + * @tparam SIDE_WIDTH Number of columns of the startSideTable + * @param sub The sub table result + * @param startSide The TransitivePathSide where the edges start + * @param targetSide The TransitivePathSide where the edges end + * @param startSideTable An IdTable containing the Ids for the startSide + * @return std::pair> A Map and Id vector (nodes) for the + * transitive hull computation + */ + template + std::pair> setupMapAndNodes( + const IdTable& sub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, + const IdTable& startSideTable) const; + + /** + * @brief Prepare a Map and a nodes vector for the transitive hull + * computation. + * + * @tparam SUB_WIDTH Number of columns of the sub table + * @param sub The sub table result + * @param startSide The TransitivePathSide where the edges start + * @param targetSide The TransitivePathSide where the edges end + * @return std::pair> A Map and Id vector (nodes) for the + * transitive hull computation + */ + template + std::pair> setupMapAndNodes( + const IdTable& sub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const; + + // initialize the map from the subresult + template + Map setupEdgesMap(const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const; + + // initialize a vector for the starting nodes (Ids) + template + static std::span setupNodes(const IdTable& table, size_t col); + GrbMatrix getTargetRow(GrbMatrix& hull, size_t targetIndex) const; /** @@ -383,4 +496,9 @@ class TransitivePath : public Operation { static void copyColumns(const IdTableView& inputTable, IdTableStatic& outputTable, size_t inputRow, size_t outputRow, size_t skipCol); + + // A small helper function: Insert the `value` to the set at `map[key]`. + // As the sets all have an allocator with memory limit, this construction is a + // little bit more involved, so this can be a separate helper function. + void insertIntoMap(Map& map, Id key, Id value) const; }; From d1de4841e4c30f8bc1eee240edcae08d4ec906ec Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Thu, 1 Feb 2024 14:21:22 +0100 Subject: [PATCH 10/92] Reworks GrBMatrix - Renamed nrows, ncols and nvals - Moved code to cpp file - Nullptr is now valid for GrbMatrix::matrix_ --- src/engine/GrbMatrix.cpp | 44 ++++++++++++++++++++++------------- src/engine/GrbMatrix.h | 21 +++++++---------- src/engine/TransitivePath.cpp | 14 +++++------ test/GrbMatrixTest.cpp | 16 ++++++------- 4 files changed, 51 insertions(+), 44 deletions(-) diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp index d02936c904..68c349c27e 100644 --- a/src/engine/GrbMatrix.cpp +++ b/src/engine/GrbMatrix.cpp @@ -11,10 +11,16 @@ #include "util/Exception.h" +// _____________________________________________________________________________ +GrbMatrix::GrbMatrix(size_t numRows, size_t numCols) { + auto info = GrB_Matrix_new(rawMatrix(), GrB_BOOL, numRows, numCols); + handleError(info); +} + // _____________________________________________________________________________ GrbMatrix GrbMatrix::copy() const { GrB_Matrix matrixCopy; - auto info = GrB_Matrix_new(&matrixCopy, GrB_BOOL, nrows(), ncols()); + auto info = GrB_Matrix_new(&matrixCopy, GrB_BOOL, numRows(), numCols()); handleError(info); info = GrB_Matrix_dup(&matrixCopy, *matrix_); handleError(info); @@ -76,7 +82,7 @@ GrbMatrix GrbMatrix::diag(size_t nvals) { // _____________________________________________________________________________ std::vector> GrbMatrix::extractTuples() const { - size_t n = nvals(); + size_t n = numNonZero(); size_t rowIndices[n]; size_t colIndices[n]; bool values[n]; @@ -96,17 +102,17 @@ std::vector> GrbMatrix::extractTuples() const { // _____________________________________________________________________________ std::vector GrbMatrix::extractColumn(size_t colIndex) const { std::unique_ptr columnVector = std::make_unique(); - size_t numRows = nrows(); - auto info = GrB_Vector_new(columnVector.get(), GrB_BOOL, numRows); + size_t rows = numRows(); + auto info = GrB_Vector_new(columnVector.get(), GrB_BOOL, rows); handleError(info); info = GrB_Col_extract(*columnVector, GrB_NULL, GrB_NULL, *matrix_, GrB_ALL, - numRows, colIndex, GrB_NULL); + rows, colIndex, GrB_NULL); handleError(info); - size_t indices[numRows]; - bool values[numRows]; - std::unique_ptr nvals = std::make_unique(numRows); + size_t indices[rows]; + bool values[rows]; + std::unique_ptr nvals = std::make_unique(rows); info = GrB_Vector_extractTuples_BOOL(indices, values, nvals.get(), *columnVector); handleError(info); @@ -126,7 +132,7 @@ std::vector GrbMatrix::extractRow(size_t rowIndex) const { } // _____________________________________________________________________________ -size_t GrbMatrix::nvals() const { +size_t GrbMatrix::numNonZero() const { size_t nvals; auto info = GrB_Matrix_nvals(&nvals, *matrix_); GrbMatrix::handleError(info); @@ -134,7 +140,7 @@ size_t GrbMatrix::nvals() const { } // _____________________________________________________________________________ -size_t GrbMatrix::nrows() const { +size_t GrbMatrix::numRows() const { size_t nrows; auto info = GrB_Matrix_nrows(&nrows, *matrix_); GrbMatrix::handleError(info); @@ -142,7 +148,7 @@ size_t GrbMatrix::nrows() const { } // _____________________________________________________________________________ -size_t GrbMatrix::ncols() const { +size_t GrbMatrix::numCols() const { size_t ncols; auto info = GrB_Matrix_ncols(&ncols, *matrix_); GrbMatrix::handleError(info); @@ -152,7 +158,7 @@ size_t GrbMatrix::ncols() const { // _____________________________________________________________________________ GrbMatrix GrbMatrix::transpose() const { GrB_Matrix transposed; - auto info = GrB_Matrix_new(&transposed, GrB_BOOL, ncols(), nrows()); + auto info = GrB_Matrix_new(&transposed, GrB_BOOL, numCols(), numRows()); handleError(info); info = GrB_transpose(transposed, GrB_NULL, GrB_NULL, *matrix_, GrB_NULL); handleError(info); @@ -172,8 +178,8 @@ void GrbMatrix::accumulateMultiply(const GrbMatrix& otherMatrix) const { // _____________________________________________________________________________ GrbMatrix GrbMatrix::multiply(const GrbMatrix& otherMatrix) const { - size_t resultNumRows = nrows(); - size_t resultNumCols = otherMatrix.ncols(); + size_t resultNumRows = numRows(); + size_t resultNumCols = otherMatrix.numCols(); GrB_Matrix resultMatrix; auto info = GrB_Matrix_new(&resultMatrix, GrB_BOOL, resultNumRows, resultNumCols); @@ -187,6 +193,14 @@ GrbMatrix GrbMatrix::multiply(const GrbMatrix& otherMatrix) const { return result; } +// _____________________________________________________________________________ +GrB_Matrix* GrbMatrix::rawMatrix() const { + if (matrix_.get() != nullptr) { + return matrix_.get(); + } + AD_THROW("GrbMatrix error: internal GrB_Matrix is null"); +} + // _____________________________________________________________________________ void GrbMatrix::handleError(GrB_Info info) { switch (info) { @@ -194,8 +208,6 @@ void GrbMatrix::handleError(GrB_Info info) { return; case GrB_NO_VALUE: return; - // case GxB_EXHAUSTED: - // return; case GrB_UNINITIALIZED_OBJECT: AD_THROW("GraphBLAS error: object has not been initialized"); case GrB_NULL_POINTER: diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h index 102194ec24..1c386fef11 100644 --- a/src/engine/GrbMatrix.h +++ b/src/engine/GrbMatrix.h @@ -15,22 +15,16 @@ extern "C" { // Currently only boolean matrices are supported. class GrbMatrix { private: - std::unique_ptr matrix_; + std::unique_ptr matrix_ = std::make_unique(); static bool isInitialized_; public: - GrbMatrix(size_t numRows, size_t numCols) { - matrix_ = std::make_unique(); - auto info = GrB_Matrix_new(matrix_.get(), GrB_BOOL, numRows, numCols); - handleError(info); - } + GrbMatrix(size_t numRows, size_t numCols); - GrbMatrix() { matrix_ = std::make_unique(); } + GrbMatrix() = default; // Move constructor - GrbMatrix(GrbMatrix&& otherMatrix) { - matrix_ = std::move(otherMatrix.matrix_); - }; + GrbMatrix(GrbMatrix&& otherMatrix) = default; // Disable copy constructor and assignment operator GrbMatrix(const GrbMatrix&) = delete; @@ -69,11 +63,11 @@ class GrbMatrix { std::vector extractRow(size_t rowIndex) const; // Number of "true" values in the matrix. - size_t nvals() const; + size_t numNonZero() const; - size_t nrows() const; + size_t numRows() const; - size_t ncols() const; + size_t numCols() const; GrbMatrix transpose() const; @@ -91,5 +85,6 @@ class GrbMatrix { private: GrB_Matrix& getMatrix() const { return *matrix_; } + GrB_Matrix* rawMatrix() const; static void handleError(GrB_Info info); }; diff --git a/src/engine/TransitivePath.cpp b/src/engine/TransitivePath.cpp index f1391450cf..7031137de3 100644 --- a/src/engine/TransitivePath.cpp +++ b/src/engine/TransitivePath.cpp @@ -200,7 +200,7 @@ void TransitivePath::computeTransitivePathBound( std::span startNodes = startSideTable.getColumn(startSide.treeAndCol_->second); GrbMatrix startNodeMatrix = - setupStartNodeMatrix(startNodes, graph.nrows(), mapping); + setupStartNodeMatrix(startNodes, graph.numRows(), mapping); auto hull = std::make_unique( transitiveHull(graph, std::make_optional(std::move(startNodeMatrix)))); @@ -259,7 +259,7 @@ void TransitivePath::computeTransitivePath( if (!startSide.isVariable()) { const Id startNode[]{std::get(startSide.value_)}; GrbMatrix startMatrix = - setupStartNodeMatrix(startNode, graph.nrows(), mapping); + setupStartNodeMatrix(startNode, graph.numRows(), mapping); hull = std::make_unique( transitiveHull(graph, std::make_optional(std::move(startMatrix)))); } else { @@ -452,7 +452,7 @@ GrbMatrix TransitivePath::transitiveHull( if (startNodes) { result = std::make_unique(std::move(startNodes.value())); } else { - result = std::make_unique(GrbMatrix::diag(graph.nrows())); + result = std::make_unique(GrbMatrix::diag(graph.numRows())); } if (minDist_ > 0) { @@ -461,13 +461,13 @@ GrbMatrix TransitivePath::transitiveHull( } size_t previousNvals = 0; - size_t nvals = result->nvals(); + size_t nvals = result->numNonZero(); while (nvals > previousNvals && pathLength < maxDist_) { - previousNvals = result->nvals(); + previousNvals = result->numNonZero(); // Row major, Column major result->accumulateMultiply(graph); // Add check cancellation - nvals = result->nvals(); + nvals = result->numNonZero(); pathLength++; } return std::move(*result); @@ -744,7 +744,7 @@ std::span TransitivePath::setupNodes(const IdTable& table, // _____________________________________________________________________________ GrbMatrix TransitivePath::getTargetRow(GrbMatrix& hull, size_t targetIndex) const { - GrbMatrix transformer = GrbMatrix(hull.ncols(), hull.ncols()); + GrbMatrix transformer = GrbMatrix(hull.numCols(), hull.numCols()); transformer.setElement(targetIndex, targetIndex, true); return hull.multiply(transformer); } diff --git a/test/GrbMatrixTest.cpp b/test/GrbMatrixTest.cpp index 1d17cae50a..aa811db000 100644 --- a/test/GrbMatrixTest.cpp +++ b/test/GrbMatrixTest.cpp @@ -7,9 +7,9 @@ TEST(GrbMatrixTest, constructor) { GrbMatrix::initialize(); GrbMatrix matrix = GrbMatrix(2, 3); - size_t numRows = matrix.nrows(); - size_t numCols = matrix.ncols(); - size_t nvals = matrix.nvals(); + size_t numRows = matrix.numRows(); + size_t numCols = matrix.numCols(); + size_t nvals = matrix.numNonZero(); GrbMatrix::finalize(); @@ -46,7 +46,7 @@ TEST(GrbMatrixTest, getSetElement) { bool elemOneZero = matrix.getElement(1, 0); bool elemZeroTwo = matrix.getElement(0, 2); bool elemOneTwo = matrix.getElement(1, 2); - size_t nvals = matrix.nvals(); + size_t nvals = matrix.numNonZero(); GrbMatrix::finalize(); @@ -172,8 +172,8 @@ TEST(GrbMatrixTest, multiplyShapedMatrices) { GrbMatrix matrix3 = matrix1.multiply(matrix2); - EXPECT_EQ(matrix3.nrows(), 2); - EXPECT_EQ(matrix3.ncols(), 2); + EXPECT_EQ(matrix3.numRows(), 2); + EXPECT_EQ(matrix3.numCols(), 2); EXPECT_EQ(matrix3.getElement(0, 0), true); EXPECT_EQ(matrix3.getElement(0, 1), false); EXPECT_EQ(matrix3.getElement(1, 0), true); @@ -193,8 +193,8 @@ TEST(GrbMatrixTest, transpose) { GrbMatrix result = matrix.transpose(); - EXPECT_EQ(3, result.nrows()); - EXPECT_EQ(2, result.ncols()); + EXPECT_EQ(3, result.numRows()); + EXPECT_EQ(2, result.numCols()); EXPECT_EQ(true, result.getElement(0, 0)); EXPECT_EQ(false, result.getElement(0, 1)); From 303ff2fd4e88c2acf7530b6e768e650e247e9fc6 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Thu, 1 Feb 2024 15:31:32 +0100 Subject: [PATCH 11/92] More reworks on GrbMatrix - Renamed copy() -> clone() - Functions that create a GrbMatrix now use the unique_ptr directly --- src/engine/GrbMatrix.cpp | 40 ++++++++++++++++++---------------------- src/engine/GrbMatrix.h | 2 +- test/GrbMatrixTest.cpp | 2 +- 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp index 68c349c27e..ac1572f0f2 100644 --- a/src/engine/GrbMatrix.cpp +++ b/src/engine/GrbMatrix.cpp @@ -18,17 +18,15 @@ GrbMatrix::GrbMatrix(size_t numRows, size_t numCols) { } // _____________________________________________________________________________ -GrbMatrix GrbMatrix::copy() const { - GrB_Matrix matrixCopy; - auto info = GrB_Matrix_new(&matrixCopy, GrB_BOOL, numRows(), numCols()); +GrbMatrix GrbMatrix::clone() const { + GrbMatrix matrixCopy = GrbMatrix(); + auto info = + GrB_Matrix_new(matrixCopy.rawMatrix(), GrB_BOOL, numRows(), numCols()); handleError(info); - info = GrB_Matrix_dup(&matrixCopy, *matrix_); + info = GrB_Matrix_dup(matrixCopy.rawMatrix(), *matrix_); handleError(info); - auto returnMatrix = GrbMatrix(); - returnMatrix.matrix_ = std::make_unique(matrixCopy); - - return returnMatrix; + return matrixCopy; } // _____________________________________________________________________________ @@ -157,16 +155,15 @@ size_t GrbMatrix::numCols() const { // _____________________________________________________________________________ GrbMatrix GrbMatrix::transpose() const { - GrB_Matrix transposed; - auto info = GrB_Matrix_new(&transposed, GrB_BOOL, numCols(), numRows()); + GrbMatrix transposed; + auto info = + GrB_Matrix_new(transposed.rawMatrix(), GrB_BOOL, numCols(), numRows()); handleError(info); - info = GrB_transpose(transposed, GrB_NULL, GrB_NULL, *matrix_, GrB_NULL); + info = GrB_transpose(transposed.getMatrix(), GrB_NULL, GrB_NULL, *matrix_, + GrB_NULL); handleError(info); - GrbMatrix result = GrbMatrix(); - result.matrix_ = std::make_unique(transposed); - - return result; + return transposed; } // _____________________________________________________________________________ @@ -180,16 +177,15 @@ void GrbMatrix::accumulateMultiply(const GrbMatrix& otherMatrix) const { GrbMatrix GrbMatrix::multiply(const GrbMatrix& otherMatrix) const { size_t resultNumRows = numRows(); size_t resultNumCols = otherMatrix.numCols(); - GrB_Matrix resultMatrix; - auto info = - GrB_Matrix_new(&resultMatrix, GrB_BOOL, resultNumRows, resultNumCols); + GrbMatrix result; + auto info = GrB_Matrix_new(result.rawMatrix(), GrB_BOOL, resultNumRows, + resultNumCols); - info = GrB_mxm(resultMatrix, GrB_NULL, GrB_NULL, GrB_LOR_LAND_SEMIRING_BOOL, - *matrix_, otherMatrix.getMatrix(), GrB_NULL); + info = GrB_mxm(result.getMatrix(), GrB_NULL, GrB_NULL, + GrB_LOR_LAND_SEMIRING_BOOL, *matrix_, otherMatrix.getMatrix(), + GrB_NULL); handleError(info); - GrbMatrix result = GrbMatrix(resultNumRows, resultNumCols); - result.matrix_ = std::make_unique(resultMatrix); return result; } diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h index 1c386fef11..8f03fe3144 100644 --- a/src/engine/GrbMatrix.h +++ b/src/engine/GrbMatrix.h @@ -32,7 +32,7 @@ class GrbMatrix { ~GrbMatrix() { GrB_Matrix_free(matrix_.get()); } - GrbMatrix copy() const; + GrbMatrix clone() const; void setElement(size_t row, size_t col, bool value); diff --git a/test/GrbMatrixTest.cpp b/test/GrbMatrixTest.cpp index aa811db000..df923d7482 100644 --- a/test/GrbMatrixTest.cpp +++ b/test/GrbMatrixTest.cpp @@ -24,7 +24,7 @@ TEST(GrbMatrixTest, copy) { GrbMatrix matrix1 = GrbMatrix(2, 2); matrix1.setElement(0, 0, true); - GrbMatrix matrix2 = matrix1.copy(); + GrbMatrix matrix2 = matrix1.clone(); matrix1.setElement(1, 1, true); From 958b4f7c92fc0d3e2f67b9602860651c6b54bef9 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Thu, 1 Feb 2024 16:27:40 +0100 Subject: [PATCH 12/92] Renamed getMatrix -> matrix --- src/engine/GrbMatrix.cpp | 30 +++++++++++++++--------------- src/engine/GrbMatrix.h | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp index ac1572f0f2..2a811e434c 100644 --- a/src/engine/GrbMatrix.cpp +++ b/src/engine/GrbMatrix.cpp @@ -23,7 +23,7 @@ GrbMatrix GrbMatrix::clone() const { auto info = GrB_Matrix_new(matrixCopy.rawMatrix(), GrB_BOOL, numRows(), numCols()); handleError(info); - info = GrB_Matrix_dup(matrixCopy.rawMatrix(), *matrix_); + info = GrB_Matrix_dup(matrixCopy.rawMatrix(), matrix()); handleError(info); return matrixCopy; @@ -31,14 +31,14 @@ GrbMatrix GrbMatrix::clone() const { // _____________________________________________________________________________ void GrbMatrix::setElement(size_t row, size_t col, bool value) { - auto info = GrB_Matrix_setElement_BOOL(*matrix_, value, row, col); + auto info = GrB_Matrix_setElement_BOOL(matrix(), value, row, col); handleError(info); } // _____________________________________________________________________________ bool GrbMatrix::getElement(size_t row, size_t col) const { bool result; - auto info = GrB_Matrix_extractElement_BOOL(&result, *matrix_, row, col); + auto info = GrB_Matrix_extractElement_BOOL(&result, matrix(), row, col); handleError(info); if (info == GrB_NO_VALUE) { return false; @@ -61,7 +61,7 @@ GrbMatrix GrbMatrix::build(const std::vector rowIndices, values[i] = true; } auto info = - GrB_Matrix_build_BOOL(matrix.getMatrix(), &rowIndices[0], &colIndices[0], + GrB_Matrix_build_BOOL(matrix.matrix(), &rowIndices[0], &colIndices[0], values, nvals, GxB_IGNORE_DUP); GrbMatrix::handleError(info); return matrix; @@ -85,7 +85,7 @@ std::vector> GrbMatrix::extractTuples() const { size_t colIndices[n]; bool values[n]; auto info = GrB_Matrix_extractTuples_BOOL(rowIndices, colIndices, values, &n, - *matrix_); + matrix()); GrbMatrix::handleError(info); std::vector> result; @@ -104,7 +104,7 @@ std::vector GrbMatrix::extractColumn(size_t colIndex) const { auto info = GrB_Vector_new(columnVector.get(), GrB_BOOL, rows); handleError(info); - info = GrB_Col_extract(*columnVector, GrB_NULL, GrB_NULL, *matrix_, GrB_ALL, + info = GrB_Col_extract(*columnVector, GrB_NULL, GrB_NULL, matrix(), GrB_ALL, rows, colIndex, GrB_NULL); handleError(info); @@ -132,7 +132,7 @@ std::vector GrbMatrix::extractRow(size_t rowIndex) const { // _____________________________________________________________________________ size_t GrbMatrix::numNonZero() const { size_t nvals; - auto info = GrB_Matrix_nvals(&nvals, *matrix_); + auto info = GrB_Matrix_nvals(&nvals, matrix()); GrbMatrix::handleError(info); return nvals; } @@ -140,7 +140,7 @@ size_t GrbMatrix::numNonZero() const { // _____________________________________________________________________________ size_t GrbMatrix::numRows() const { size_t nrows; - auto info = GrB_Matrix_nrows(&nrows, *matrix_); + auto info = GrB_Matrix_nrows(&nrows, matrix()); GrbMatrix::handleError(info); return nrows; } @@ -148,7 +148,7 @@ size_t GrbMatrix::numRows() const { // _____________________________________________________________________________ size_t GrbMatrix::numCols() const { size_t ncols; - auto info = GrB_Matrix_ncols(&ncols, *matrix_); + auto info = GrB_Matrix_ncols(&ncols, matrix()); GrbMatrix::handleError(info); return ncols; } @@ -159,7 +159,7 @@ GrbMatrix GrbMatrix::transpose() const { auto info = GrB_Matrix_new(transposed.rawMatrix(), GrB_BOOL, numCols(), numRows()); handleError(info); - info = GrB_transpose(transposed.getMatrix(), GrB_NULL, GrB_NULL, *matrix_, + info = GrB_transpose(transposed.matrix(), GrB_NULL, GrB_NULL, matrix(), GrB_NULL); handleError(info); @@ -168,8 +168,8 @@ GrbMatrix GrbMatrix::transpose() const { // _____________________________________________________________________________ void GrbMatrix::accumulateMultiply(const GrbMatrix& otherMatrix) const { - auto info = GrB_mxm(*matrix_, GrB_NULL, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, - *matrix_, otherMatrix.getMatrix(), GrB_NULL); + auto info = GrB_mxm(matrix(), GrB_NULL, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, + matrix(), otherMatrix.matrix(), GrB_NULL); handleError(info); } @@ -181,9 +181,9 @@ GrbMatrix GrbMatrix::multiply(const GrbMatrix& otherMatrix) const { auto info = GrB_Matrix_new(result.rawMatrix(), GrB_BOOL, resultNumRows, resultNumCols); - info = GrB_mxm(result.getMatrix(), GrB_NULL, GrB_NULL, - GrB_LOR_LAND_SEMIRING_BOOL, *matrix_, otherMatrix.getMatrix(), - GrB_NULL); + info = + GrB_mxm(result.matrix(), GrB_NULL, GrB_NULL, GrB_LOR_LAND_SEMIRING_BOOL, + matrix(), otherMatrix.matrix(), GrB_NULL); handleError(info); return result; diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h index 8f03fe3144..f15a026c19 100644 --- a/src/engine/GrbMatrix.h +++ b/src/engine/GrbMatrix.h @@ -84,7 +84,7 @@ class GrbMatrix { static void finalize(); private: - GrB_Matrix& getMatrix() const { return *matrix_; } + GrB_Matrix& matrix() const { return *matrix_; } GrB_Matrix* rawMatrix() const; static void handleError(GrB_Info info); }; From 87bc650cef99796503222c8fb678e5aa175c45d5 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 2 Feb 2024 12:04:31 +0100 Subject: [PATCH 13/92] Added documentation to GrbMatrix --- src/engine/GrbMatrix.h | 143 +++++++++++++++++++++++++++++++++++------ 1 file changed, 123 insertions(+), 20 deletions(-) diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h index f15a026c19..d1953346d0 100644 --- a/src/engine/GrbMatrix.h +++ b/src/engine/GrbMatrix.h @@ -11,14 +11,23 @@ extern "C" { #include #include -// This class wraps the functionality of the GraphBLAS object GrB_Matrix. -// Currently only boolean matrices are supported. +/** + * @class GrbMatrix + * @brief This class wraps the functionality of the GraphBLAS object GrB_Matrix. + * Currently only boolean matrices are supported. + */ class GrbMatrix { private: std::unique_ptr matrix_ = std::make_unique(); static bool isInitialized_; public: + /** + * @brief Construct a matrix with the given dimensions + * + * @param numRows + * @param numCols + */ GrbMatrix(size_t numRows, size_t numCols); GrbMatrix() = default; @@ -32,59 +41,153 @@ class GrbMatrix { ~GrbMatrix() { GrB_Matrix_free(matrix_.get()); } + /** + * @brief Create a matrix and fill it with the data of this matrix. + * + * @return GrbMatrix duplicate matrix + */ GrbMatrix clone() const; + /** + * @brief Set an element in the matrix to a specified value. + * + * @param row Row index, must be smaller than numRows() + * @param col Column index, must be smaller than numCols() + * @param value Boolean, which value to set + */ void setElement(size_t row, size_t col, bool value); + /** + * @brief Get an element from the matrix. + * + * @param row Row index, must be smaller than numRows() + * @param col Column index, must be smaller than numCols() + * @return Boolean value + */ bool getElement(size_t row, size_t col) const; - // Create a matrix from the given lists of indices. - // For each given pair of indices, the corresponding entry in the result - // matrix is set to true. All other entries are false (by default). + /** + * @brief Create a matrix from the given lists of indices. For each given pair + * of indices, the corresponding entry in the result matrix is set to true. + * All other entries are false (by default). + * The vectors rowIndices and colIndices have to be the same length. Their + * entries have to be smaller than numRows and numCols respectively. + * + * @param rowIndices Vector of row indices, entries must be smaller than + * numRows + * @param colIndices Vector of column indices, entries must be smaller than + * numCols + * @param numRows Number of rows of the result matrix + * @param numCols Number of columns of the result matrix + * @return New matrix with given entries set to true + */ static GrbMatrix build(const std::vector rowIndices, const std::vector colIndices, size_t numRows, size_t numCols); - // Create a square, diagonal matrix. All entries on the diagonal are set to - // true, all others to false. The resulting matrix will have nvals rows and - // columns. + /** + * @brief Create a square, diagonal matrix. All entries on the diagonal are + * set to true, all others to false. The resulting matrix will have nvals rows + * and columns. + * + * @param nvals + * @return + */ static GrbMatrix diag(size_t nvals); - // Extract all true entries from the matrix. The first entry in the pair is - // the row index, the second entry is the column index. + /** + * @brief Extract all true entries from the matrix. The first entry in the + * pair is the row index, the second entry is the column index. + */ std::vector> extractTuples() const; - // Extract a column from the matrix. Returns all row indices where this - // column's entries are true. + /** + * @brief Extract a column from the matrix. Returns all row indices where this + * column's entries are true. + * + * @param colIndex + */ std::vector extractColumn(size_t colIndex) const; - // Extract a row from the matrix. Returns all column indices where this - // rows's entries are true. + /** + * @brief Extract a row from the matrix. Returns all column indices where this + * rows's entries are true. + * + * @param rowIndex + */ std::vector extractRow(size_t rowIndex) const; - // Number of "true" values in the matrix. + /** + * @brief Number of "true" values in the matrix. + * + * @return + */ size_t numNonZero() const; + /** + * @brief Number of rows of the matrix. + * + * @return + */ size_t numRows() const; + /** + * @brief Number of columns of the matrix. + * + * @return + */ size_t numCols() const; + /** + * @brief Create a new matrix, which is the transpose of this matrix. + * + * @return + */ GrbMatrix transpose() const; - // Multiply this matrix with the other matrix and accumulate the result in - // this matrix. Logical or is used for accumulation. + /** + * @brief Multiply this matrix with the other matrix and accumulate the result + * in this matrix. Logical or is used for accumulation. + * + * @param otherMatrix + */ void accumulateMultiply(const GrbMatrix& otherMatrix) const; - // Multiply this matrix with another matrix and write the result to a new - // matrix. + /** + * @brief Multiply this matrix with another matrix and write the result to a + * new matrix. + * + * @param otherMatrix + * @return + */ GrbMatrix multiply(const GrbMatrix& otherMatrix) const; + // TODO: Move to singleton class static void initialize(); - static void finalize(); private: + /** + * @brief Get a reference to the internal matrix. + * + * @return + */ GrB_Matrix& matrix() const { return *matrix_; } + + /** + * @brief Get a raw pointer to the internal matrix. If this pointer is the + * nullptr, an Exception is thrown. + * + * @return + */ GrB_Matrix* rawMatrix() const; + + /** + * @brief Handle the GrB_Info object. GrB_SUCCESS is ignored, all other return + * valus cause an Exception. + * See also GraphBLAS userguide, section 5.5 + * + * @param info + */ static void handleError(GrB_Info info); }; From 370ad436e182cda670a9f52ffcd095b7c4baaff4 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 2 Feb 2024 12:07:38 +0100 Subject: [PATCH 14/92] Reworked build function --- src/engine/GrbMatrix.cpp | 10 +++++----- src/engine/GrbMatrix.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp index 2a811e434c..c41253d71f 100644 --- a/src/engine/GrbMatrix.cpp +++ b/src/engine/GrbMatrix.cpp @@ -47,9 +47,9 @@ bool GrbMatrix::getElement(size_t row, size_t col) const { } // _____________________________________________________________________________ -GrbMatrix GrbMatrix::build(const std::vector rowIndices, - const std::vector colIndices, size_t numRows, - size_t numCols) { +GrbMatrix GrbMatrix::build(const std::vector& rowIndices, + const std::vector& colIndices, + size_t numRows, size_t numCols) { auto matrix = GrbMatrix(numRows, numCols); GrB_Index nvals = rowIndices.size(); if (nvals == 0) { @@ -61,8 +61,8 @@ GrbMatrix GrbMatrix::build(const std::vector rowIndices, values[i] = true; } auto info = - GrB_Matrix_build_BOOL(matrix.matrix(), &rowIndices[0], &colIndices[0], - values, nvals, GxB_IGNORE_DUP); + GrB_Matrix_build_BOOL(matrix.matrix(), rowIndices.data(), + colIndices.data(), values, nvals, GxB_IGNORE_DUP); GrbMatrix::handleError(info); return matrix; } diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h index d1953346d0..d951f14315 100644 --- a/src/engine/GrbMatrix.h +++ b/src/engine/GrbMatrix.h @@ -81,8 +81,8 @@ class GrbMatrix { * @param numCols Number of columns of the result matrix * @return New matrix with given entries set to true */ - static GrbMatrix build(const std::vector rowIndices, - const std::vector colIndices, size_t numRows, + static GrbMatrix build(const std::vector& rowIndices, + const std::vector& colIndices, size_t numRows, size_t numCols); /** From 0018c844fdf68e08f917c837b39efd20f8c11fa2 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 2 Feb 2024 16:29:57 +0100 Subject: [PATCH 15/92] Reworked extractRow and extractCol --- src/engine/GrbMatrix.cpp | 61 ++++++++++++++++++++++++---------------- src/engine/GrbMatrix.h | 4 ++- test/GrbMatrixTest.cpp | 21 ++++++++++++++ 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp index c41253d71f..02abc2b9a9 100644 --- a/src/engine/GrbMatrix.cpp +++ b/src/engine/GrbMatrix.cpp @@ -99,34 +99,13 @@ std::vector> GrbMatrix::extractTuples() const { // _____________________________________________________________________________ std::vector GrbMatrix::extractColumn(size_t colIndex) const { - std::unique_ptr columnVector = std::make_unique(); - size_t rows = numRows(); - auto info = GrB_Vector_new(columnVector.get(), GrB_BOOL, rows); - handleError(info); - - info = GrB_Col_extract(*columnVector, GrB_NULL, GrB_NULL, matrix(), GrB_ALL, - rows, colIndex, GrB_NULL); - handleError(info); - - size_t indices[rows]; - bool values[rows]; - std::unique_ptr nvals = std::make_unique(rows); - info = GrB_Vector_extractTuples_BOOL(indices, values, nvals.get(), - *columnVector); - handleError(info); - - info = GrB_Vector_free(columnVector.get()); - handleError(info); - - std::vector vec; - vec.insert(vec.begin(), indices, indices + *nvals); - return vec; + return extract(colIndex, GrB_NULL); } // _____________________________________________________________________________ std::vector GrbMatrix::extractRow(size_t rowIndex) const { - GrbMatrix transposed = transpose(); - return transposed.extractColumn(rowIndex); + // The descriptor GrB_DESC_T0 transposes the second input, which is the matrix + return extract(rowIndex, GrB_DESC_T0); } // _____________________________________________________________________________ @@ -197,6 +176,40 @@ GrB_Matrix* GrbMatrix::rawMatrix() const { AD_THROW("GrbMatrix error: internal GrB_Matrix is null"); } +// _____________________________________________________________________________ +std::vector GrbMatrix::extract(size_t index, + GrB_Descriptor desc) const { + GrB_Vector vector; + size_t vectorSize; + if (desc == GrB_NULL) { + vectorSize = numRows(); + } else { + vectorSize = numCols(); + } + auto info = GrB_Vector_new(&vector, GrB_BOOL, vectorSize); + handleError(info); + + info = GrB_Col_extract(vector, GrB_NULL, GrB_NULL, matrix(), GrB_ALL, + vectorSize, index, desc); + handleError(info); + + size_t vectorNvals; + info = GrB_Vector_nvals(&vectorNvals, vector); + handleError(info); + + std::vector indices; + indices.resize(vectorNvals); + bool vals[vectorNvals]; + info = + GrB_Vector_extractTuples_BOOL(indices.data(), vals, &vectorNvals, vector); + handleError(info); + + info = GrB_Vector_free(&vector); + handleError(info); + + return indices; +} + // _____________________________________________________________________________ void GrbMatrix::handleError(GrB_Info info) { switch (info) { diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h index d951f14315..413bb7d4f9 100644 --- a/src/engine/GrbMatrix.h +++ b/src/engine/GrbMatrix.h @@ -166,7 +166,6 @@ class GrbMatrix { static void initialize(); static void finalize(); - private: /** * @brief Get a reference to the internal matrix. * @@ -174,6 +173,7 @@ class GrbMatrix { */ GrB_Matrix& matrix() const { return *matrix_; } + private: /** * @brief Get a raw pointer to the internal matrix. If this pointer is the * nullptr, an Exception is thrown. @@ -182,6 +182,8 @@ class GrbMatrix { */ GrB_Matrix* rawMatrix() const; + std::vector extract(size_t index, GrB_Descriptor desc) const; + /** * @brief Handle the GrB_Info object. GrB_SUCCESS is ignored, all other return * valus cause an Exception. diff --git a/test/GrbMatrixTest.cpp b/test/GrbMatrixTest.cpp index df923d7482..efc32a4e3f 100644 --- a/test/GrbMatrixTest.cpp +++ b/test/GrbMatrixTest.cpp @@ -1,5 +1,9 @@ +#include #include +#include +#include + #include "engine/GrbMatrix.h" #include "gmock/gmock.h" @@ -137,6 +141,23 @@ TEST(GrbMatrixTest, extractColumn) { EXPECT_THAT(colIndices, testing::UnorderedElementsAreArray(expected)); } +TEST(GrbMatrixTest, extractRow) { + GrbMatrix::initialize(); + + GrbMatrix matrix = GrbMatrix(3, 3); + + matrix.setElement(1, 0, true); + matrix.setElement(1, 2, true); + + std::vector rowIndices = matrix.extractRow(1); + + GrbMatrix::finalize(); + + std::vector expected{0, 2}; + + EXPECT_THAT(rowIndices, testing::UnorderedElementsAreArray(expected)); +} + TEST(GrbMatrixTest, multiplySquareMatrices) { GrbMatrix::initialize(); From 622dddd3e6cd725bf8477aedf6823f060edd5504 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 2 Feb 2024 17:11:32 +0100 Subject: [PATCH 16/92] Reworked use of C arrays in GrbMatrix --- src/engine/GrbMatrix.cpp | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp index 02abc2b9a9..f440fc354a 100644 --- a/src/engine/GrbMatrix.cpp +++ b/src/engine/GrbMatrix.cpp @@ -56,13 +56,13 @@ GrbMatrix GrbMatrix::build(const std::vector& rowIndices, return matrix; } - bool values[nvals]; + std::unique_ptr values{new bool[nvals]()}; for (size_t i = 0; i < nvals; i++) { values[i] = true; } - auto info = - GrB_Matrix_build_BOOL(matrix.matrix(), rowIndices.data(), - colIndices.data(), values, nvals, GxB_IGNORE_DUP); + auto info = GrB_Matrix_build_BOOL(matrix.matrix(), rowIndices.data(), + colIndices.data(), values.get(), nvals, + GxB_IGNORE_DUP); GrbMatrix::handleError(info); return matrix; } @@ -80,16 +80,16 @@ GrbMatrix GrbMatrix::diag(size_t nvals) { // _____________________________________________________________________________ std::vector> GrbMatrix::extractTuples() const { - size_t n = numNonZero(); - size_t rowIndices[n]; - size_t colIndices[n]; - bool values[n]; - auto info = GrB_Matrix_extractTuples_BOOL(rowIndices, colIndices, values, &n, - matrix()); + size_t nvals = numNonZero(); + size_t rowIndices[nvals]; + size_t colIndices[nvals]; + std::unique_ptr values{new bool[nvals]()}; + auto info = GrB_Matrix_extractTuples_BOOL(rowIndices, colIndices, + values.get(), &nvals, matrix()); GrbMatrix::handleError(info); std::vector> result; - for (size_t i = 0; i < n; i++) { + for (size_t i = 0; i < nvals; i++) { if (values[i]) { result.push_back(std::make_pair(rowIndices[i], colIndices[i])); } @@ -199,9 +199,8 @@ std::vector GrbMatrix::extract(size_t index, std::vector indices; indices.resize(vectorNvals); - bool vals[vectorNvals]; - info = - GrB_Vector_extractTuples_BOOL(indices.data(), vals, &vectorNvals, vector); + info = GrB_Vector_extractTuples_BOOL(indices.data(), nullptr, &vectorNvals, + vector); handleError(info); info = GrB_Vector_free(&vector); From 7c7d5b6a0e83bdcde4ac62204ab46cd3ffe3556f Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 2 Feb 2024 17:38:16 +0100 Subject: [PATCH 17/92] Additional reworks for GrbMatrix --- src/engine/GrbMatrix.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp index f440fc354a..eb512d44e5 100644 --- a/src/engine/GrbMatrix.cpp +++ b/src/engine/GrbMatrix.cpp @@ -86,7 +86,7 @@ std::vector> GrbMatrix::extractTuples() const { std::unique_ptr values{new bool[nvals]()}; auto info = GrB_Matrix_extractTuples_BOOL(rowIndices, colIndices, values.get(), &nvals, matrix()); - GrbMatrix::handleError(info); + handleError(info); std::vector> result; for (size_t i = 0; i < nvals; i++) { @@ -112,7 +112,7 @@ std::vector GrbMatrix::extractRow(size_t rowIndex) const { size_t GrbMatrix::numNonZero() const { size_t nvals; auto info = GrB_Matrix_nvals(&nvals, matrix()); - GrbMatrix::handleError(info); + handleError(info); return nvals; } @@ -120,7 +120,7 @@ size_t GrbMatrix::numNonZero() const { size_t GrbMatrix::numRows() const { size_t nrows; auto info = GrB_Matrix_nrows(&nrows, matrix()); - GrbMatrix::handleError(info); + handleError(info); return nrows; } @@ -128,7 +128,7 @@ size_t GrbMatrix::numRows() const { size_t GrbMatrix::numCols() const { size_t ncols; auto info = GrB_Matrix_ncols(&ncols, matrix()); - GrbMatrix::handleError(info); + handleError(info); return ncols; } From 21a5994c442cff03af35c09987c112f785779145 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 5 Feb 2024 14:50:23 +0100 Subject: [PATCH 18/92] Reworked extractTuples --- src/engine/GrbMatrix.cpp | 23 ++++++++++------------- src/engine/GrbMatrix.h | 2 +- src/engine/TransitivePath.cpp | 8 +++++--- test/GrbMatrixTest.cpp | 13 ++++++------- 4 files changed, 22 insertions(+), 24 deletions(-) diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp index eb512d44e5..629aaa1782 100644 --- a/src/engine/GrbMatrix.cpp +++ b/src/engine/GrbMatrix.cpp @@ -19,7 +19,7 @@ GrbMatrix::GrbMatrix(size_t numRows, size_t numCols) { // _____________________________________________________________________________ GrbMatrix GrbMatrix::clone() const { - GrbMatrix matrixCopy = GrbMatrix(); + GrbMatrix matrixCopy; auto info = GrB_Matrix_new(matrixCopy.rawMatrix(), GrB_BOOL, numRows(), numCols()); handleError(info); @@ -79,22 +79,19 @@ GrbMatrix GrbMatrix::diag(size_t nvals) { } // _____________________________________________________________________________ -std::vector> GrbMatrix::extractTuples() const { +std::pair, std::vector> GrbMatrix::extractTuples() + const { size_t nvals = numNonZero(); - size_t rowIndices[nvals]; - size_t colIndices[nvals]; + std::vector rowIndices; + rowIndices.resize(nvals); + std::vector colIndices; + colIndices.resize(nvals); std::unique_ptr values{new bool[nvals]()}; - auto info = GrB_Matrix_extractTuples_BOOL(rowIndices, colIndices, - values.get(), &nvals, matrix()); + auto info = GrB_Matrix_extractTuples_BOOL( + rowIndices.data(), colIndices.data(), values.get(), &nvals, matrix()); handleError(info); - std::vector> result; - for (size_t i = 0; i < nvals; i++) { - if (values[i]) { - result.push_back(std::make_pair(rowIndices[i], colIndices[i])); - } - } - return result; + return {rowIndices, colIndices}; } // _____________________________________________________________________________ diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h index 413bb7d4f9..60f60d32fe 100644 --- a/src/engine/GrbMatrix.h +++ b/src/engine/GrbMatrix.h @@ -99,7 +99,7 @@ class GrbMatrix { * @brief Extract all true entries from the matrix. The first entry in the * pair is the row index, the second entry is the column index. */ - std::vector> extractTuples() const; + std::pair, std::vector> extractTuples() const; /** * @brief Extract a column from the matrix. Returns all row indices where this diff --git a/src/engine/TransitivePath.cpp b/src/engine/TransitivePath.cpp index 7031137de3..9ba0b57f91 100644 --- a/src/engine/TransitivePath.cpp +++ b/src/engine/TransitivePath.cpp @@ -558,10 +558,12 @@ void TransitivePath::fillTableWithHull(IdTableStatic& table, const IdMapping& mapping, size_t startSideCol, size_t targetSideCol) { - std::vector> pairs = hull.extractTuples(); - for (size_t i = 0; i < pairs.size(); i++) { + auto [rowIndices, colIndices] = hull.extractTuples(); + + for (size_t i = 0; i < rowIndices.size(); i++) { table.emplace_back(); - auto [startIndex, targetIndex] = pairs[i]; + auto startIndex = rowIndices[i]; + auto targetIndex = colIndices[i]; Id startId = mapping.getId(startIndex); Id targetId = mapping.getId(targetIndex); table(i, startSideCol) = startId; diff --git a/test/GrbMatrixTest.cpp b/test/GrbMatrixTest.cpp index efc32a4e3f..efd92f6e81 100644 --- a/test/GrbMatrixTest.cpp +++ b/test/GrbMatrixTest.cpp @@ -2,7 +2,6 @@ #include #include -#include #include "engine/GrbMatrix.h" #include "gmock/gmock.h" @@ -112,16 +111,16 @@ TEST(GrbMatrixTest, extractTuples) { matrix.setElement(0, 2, true); matrix.setElement(1, 2, true); - std::vector> tuples = matrix.extractTuples(); + auto [rowIndices, colIndices] = matrix.extractTuples(); GrbMatrix::finalize(); - std::vector> expected; - expected.push_back({0, 1}); - expected.push_back({0, 2}); - expected.push_back({1, 2}); + std::vector expectedRowIndices{0, 0, 1}; + std::vector expectedColIndices{1, 2, 2}; + auto expected = {expectedRowIndices, expectedColIndices}; + auto got = {rowIndices, colIndices}; - EXPECT_THAT(tuples, testing::UnorderedElementsAreArray(expected)); + EXPECT_THAT(got, testing::UnorderedElementsAreArray(expected)); } TEST(GrbMatrixTest, extractColumn) { From 6f0f8cdb39e9428bee04bdb1712444909df17f4c Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 5 Feb 2024 16:03:27 +0100 Subject: [PATCH 19/92] Added a quick fix for GrB_init issue --- src/engine/GrbMatrix.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp index 629aaa1782..52fd233aa4 100644 --- a/src/engine/GrbMatrix.cpp +++ b/src/engine/GrbMatrix.cpp @@ -245,7 +245,10 @@ void GrbMatrix::handleError(GrB_Info info) { AD_FAIL(); } -bool GrbMatrix::isInitialized_ = false; +bool GrbMatrix::isInitialized_ = []() { + GrB_init(GrB_NONBLOCKING); + return true; +}(); // _____________________________________________________________________________ void GrbMatrix::initialize() { From 7269e4c03db34f4e11a93c29162085c3fdfd5870 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 5 Feb 2024 17:27:46 +0100 Subject: [PATCH 20/92] Reworked IdMapping - Remove nextIndex_ - Made internal datastructures private - isContained -> contains - const Id& -> Id --- src/engine/TransitivePath.cpp | 2 +- src/engine/TransitivePath.h | 19 ++++++++----------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/engine/TransitivePath.cpp b/src/engine/TransitivePath.cpp index 9ba0b57f91..33a0695cd8 100644 --- a/src/engine/TransitivePath.cpp +++ b/src/engine/TransitivePath.cpp @@ -784,7 +784,7 @@ GrbMatrix TransitivePath::setupStartNodeMatrix(std::span startIds, GrbMatrix startMatrix = GrbMatrix(startIds.size(), numCols); size_t rowIndex = 0; for (Id id : startIds) { - if (!mapping.isContained(id)) { + if (!mapping.contains(id)) { continue; } size_t colIndex = mapping.getIndex(id); diff --git a/src/engine/TransitivePath.h b/src/engine/TransitivePath.h index cf3947822d..6c21aa40bd 100644 --- a/src/engine/TransitivePath.h +++ b/src/engine/TransitivePath.h @@ -61,29 +61,26 @@ struct TransitivePathSide { // This struct keeps track of the mapping between Ids and matrix indices struct IdMapping { - ad_utility::HashMap idMap_{}; - - std::vector indexMap_; - - size_t nextIndex_ = 0; - - bool isContained(Id id) { return idMap_.contains(id); } + bool contains(Id id) { return idMap_.contains(id); } size_t addId(Id id) { if (!idMap_.contains(id)) { - idMap_.insert({id, nextIndex_}); indexMap_.push_back(id); - nextIndex_++; - return nextIndex_ - 1; } + idMap_.try_emplace(id, indexMap_.size() - 1); return idMap_[id]; } Id getId(size_t index) const { return indexMap_.at(index); } - size_t getIndex(const Id& id) const { return idMap_.at(id); } + size_t getIndex(Id id) const { return idMap_.at(id); } size_t size() const { return indexMap_.size(); } + + private: + ad_utility::HashMap idMap_; + + std::vector indexMap_; }; class TransitivePath : public Operation { From 97d766ad5898eb1d3ecfa8b5c67f91a63a2f32a2 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 6 Feb 2024 11:16:28 +0100 Subject: [PATCH 21/92] Reworks on TransitivePath - Remove C style arrays - Add checkCancellation --- src/engine/TransitivePath.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/engine/TransitivePath.cpp b/src/engine/TransitivePath.cpp index 33a0695cd8..74aa4745be 100644 --- a/src/engine/TransitivePath.cpp +++ b/src/engine/TransitivePath.cpp @@ -257,7 +257,7 @@ void TransitivePath::computeTransitivePath( std::unique_ptr hull; if (!startSide.isVariable()) { - const Id startNode[]{std::get(startSide.value_)}; + std::vector startNode{std::get(startSide.value_)}; GrbMatrix startMatrix = setupStartNodeMatrix(startNode, graph.numRows(), mapping); hull = std::make_unique( @@ -273,7 +273,7 @@ void TransitivePath::computeTransitivePath( } if (!startSide.isVariable()) { - const Id startNode[]{std::get(startSide.value_)}; + std::vector startNode{std::get(startSide.value_)}; TransitivePath::fillTableWithHull(res, *hull, mapping, startNode, startSide.outputCol_, targetSide.outputCol_); @@ -464,9 +464,10 @@ GrbMatrix TransitivePath::transitiveHull( size_t nvals = result->numNonZero(); while (nvals > previousNvals && pathLength < maxDist_) { previousNvals = result->numNonZero(); - // Row major, Column major + // TODO: Check effect of matrix orientation (Row major, Column major) on + // performance. result->accumulateMultiply(graph); - // Add check cancellation + checkCancellation(); nvals = result->numNonZero(); pathLength++; } From a06ed718f59914652ca3ebd61a243d90b36bf5ed Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Wed, 7 Feb 2024 11:08:41 +0100 Subject: [PATCH 22/92] A tiny bugfix and make the stuff configurable. --- src/engine/TransitivePath.cpp | 5 +++-- src/global/Constants.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/engine/TransitivePath.cpp b/src/engine/TransitivePath.cpp index 74aa4745be..0748b3f174 100644 --- a/src/engine/TransitivePath.cpp +++ b/src/engine/TransitivePath.cpp @@ -324,7 +324,7 @@ ResultTable TransitivePath::computeResult() { size_t subWidth = subRes->idTable().numColumns(); - bool useFallback = false; + bool useFallback = !RuntimeParameters().get<"use-graphblas">(); auto computeForOneSide = [this, &idTable, subRes, subWidth, useFallback]( auto& boundSide, @@ -432,8 +432,9 @@ std::shared_ptr TransitivePath::bindLeftOrRightSide( columnIndexWithType.columnIndex_ += columnIndex > inputCol ? 1 : 2; p->variableColumns_[variable] = columnIndexWithType; - p->resultWidth_++; + //p->resultWidth_++; } + p->resultWidth_ += leftOrRightOp->getResultWidth() - 1; return p; } diff --git a/src/global/Constants.h b/src/global/Constants.h index f887a6200d..284c13f8f9 100644 --- a/src/global/Constants.h +++ b/src/global/Constants.h @@ -239,6 +239,7 @@ inline auto& RuntimeParameters() { DurationParameter{ 30s}), SizeT<"lazy-index-scan-max-size-materialization">{1'000'000}, + Bool<"use-graphblas">{false}, Bool<"use-group-by-hash-map-optimization">{false}}; }(); return params; From 6e84e8a5f6d63166d97edb2409eff82ab23f6c4e Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 7 Feb 2024 17:33:29 +0100 Subject: [PATCH 23/92] Fix build error, add move assignment to GrbMatrix --- CMakeLists.txt | 2 +- src/engine/CMakeLists.txt | 2 +- src/engine/GrbMatrix.h | 1 + test/CMakeLists.txt | 4 ++-- test/GrbMatrixTest.cpp | 3 --- 5 files changed, 5 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 24832b41d7..40b32018d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -393,7 +393,7 @@ add_executable(IndexBuilderMain src/index/IndexBuilderMain.cpp) qlever_target_link_libraries(IndexBuilderMain index ${CMAKE_THREAD_LIBS_INIT} Boost::program_options) add_executable(ServerMain src/ServerMain.cpp) -qlever_target_link_libraries (ServerMain engine ${CMAKE_THREAD_LIBS_INIT} Boost::program_options graphblas) +qlever_target_link_libraries (ServerMain engine ${CMAKE_THREAD_LIBS_INIT} Boost::program_options) target_precompile_headers(ServerMain REUSE_FROM engine) add_executable(PrefixHeuristicEvaluatorMain src/PrefixHeuristicEvaluatorMain.cpp) diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index acd89c4e2e..e9cec801bf 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -12,4 +12,4 @@ add_library(engine VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp idTable/CompressedExternalIdTable.h GrbMatrix.cpp) -qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams) +qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams graphblas) diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h index 60f60d32fe..7285e9ec1e 100644 --- a/src/engine/GrbMatrix.h +++ b/src/engine/GrbMatrix.h @@ -34,6 +34,7 @@ class GrbMatrix { // Move constructor GrbMatrix(GrbMatrix&& otherMatrix) = default; + GrbMatrix& operator=(GrbMatrix&&) = default; // Disable copy constructor and assignment operator GrbMatrix(const GrbMatrix&) = delete; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index fb0086459a..2ffb6ea267 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -7,7 +7,7 @@ add_subdirectory(util) # general test utilities and all libraries that are specified as additional # arguments. function (linkTest basename) - qlever_target_link_libraries(${basename} ${ARGN} GTest::gtest GTest::gmock_main testUtil graphblas ${CMAKE_THREAD_LIBS_INIT}) + qlever_target_link_libraries(${basename} ${ARGN} GTest::gtest GTest::gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) endfunction() # Add the exectutable ${basename} that is compiled from the source file @@ -149,7 +149,7 @@ addLinkAndDiscoverTest(MultiColumnJoinTest engine) addLinkAndDiscoverTest(IdTableTest util) -addLinkAndDiscoverTest(GrbMatrixTest engine) +addLinkAndDiscoverTest(GrbMatrixTest engine graphblas) addLinkAndDiscoverTest(TransitivePathTest engine) diff --git a/test/GrbMatrixTest.cpp b/test/GrbMatrixTest.cpp index efd92f6e81..8be29eef8a 100644 --- a/test/GrbMatrixTest.cpp +++ b/test/GrbMatrixTest.cpp @@ -1,8 +1,5 @@ -#include #include -#include - #include "engine/GrbMatrix.h" #include "gmock/gmock.h" From 005dc9cc59afe45844431d3c1380adf6ca2ba005 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Thu, 8 Feb 2024 16:54:41 +0100 Subject: [PATCH 24/92] Simplifications Removed unnecessary unique_ptr and make_optionals --- src/engine/TransitivePath.cpp | 40 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/engine/TransitivePath.cpp b/src/engine/TransitivePath.cpp index 0748b3f174..39c3a915d5 100644 --- a/src/engine/TransitivePath.cpp +++ b/src/engine/TransitivePath.cpp @@ -202,16 +202,15 @@ void TransitivePath::computeTransitivePathBound( GrbMatrix startNodeMatrix = setupStartNodeMatrix(startNodes, graph.numRows(), mapping); - auto hull = std::make_unique( - transitiveHull(graph, std::make_optional(std::move(startNodeMatrix)))); + auto hull = transitiveHull(graph, std::move(startNodeMatrix)); if (!targetSide.isVariable()) { Id target = std::get(targetSide.value_); size_t targetIndex = mapping.getIndex(target); - hull = std::make_unique(getTargetRow(*hull, targetIndex)); + hull = getTargetRow(hull, targetIndex); } TransitivePath::fillTableWithHull( - res, *hull, mapping, startSideTable, startNodes, startSide.outputCol_, + res, hull, mapping, startSideTable, startNodes, startSide.outputCol_, targetSide.outputCol_, startSide.treeAndCol_.value().second); *dynRes = std::move(res).toDynamic(); @@ -255,31 +254,30 @@ void TransitivePath::computeTransitivePath( GrbMatrix::initialize(); auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); - std::unique_ptr hull; + GrbMatrix hull; if (!startSide.isVariable()) { std::vector startNode{std::get(startSide.value_)}; GrbMatrix startMatrix = setupStartNodeMatrix(startNode, graph.numRows(), mapping); - hull = std::make_unique( - transitiveHull(graph, std::make_optional(std::move(startMatrix)))); + hull = transitiveHull(graph, std::move(startMatrix)); } else { - hull = std::make_unique(transitiveHull(graph, std::nullopt)); + hull = transitiveHull(graph, std::nullopt); } if (!targetSide.isVariable()) { Id target = std::get(targetSide.value_); size_t targetIndex = mapping.getIndex(target); - hull = std::make_unique(getTargetRow(*hull, targetIndex)); + hull = getTargetRow(hull, targetIndex); } if (!startSide.isVariable()) { std::vector startNode{std::get(startSide.value_)}; - TransitivePath::fillTableWithHull(res, *hull, mapping, startNode, + TransitivePath::fillTableWithHull(res, hull, mapping, startNode, startSide.outputCol_, targetSide.outputCol_); } else { TransitivePath::fillTableWithHull( - res, *hull, mapping, startSide.outputCol_, targetSide.outputCol_); + res, hull, mapping, startSide.outputCol_, targetSide.outputCol_); } *dynRes = std::move(res).toDynamic(); @@ -432,7 +430,7 @@ std::shared_ptr TransitivePath::bindLeftOrRightSide( columnIndexWithType.columnIndex_ += columnIndex > inputCol ? 1 : 2; p->variableColumns_[variable] = columnIndexWithType; - //p->resultWidth_++; + // p->resultWidth_++; } p->resultWidth_ += leftOrRightOp->getResultWidth() - 1; return p; @@ -448,31 +446,31 @@ bool TransitivePath::isBoundOrId() const { GrbMatrix TransitivePath::transitiveHull( const GrbMatrix& graph, std::optional startNodes) const { size_t pathLength = 0; - std::unique_ptr result; + GrbMatrix result; if (startNodes) { - result = std::make_unique(std::move(startNodes.value())); + result = std::move(startNodes.value()); } else { - result = std::make_unique(GrbMatrix::diag(graph.numRows())); + result = GrbMatrix::diag(graph.numRows()); } if (minDist_ > 0) { - result = std::make_unique(result->multiply(graph)); + result = result.multiply(graph); pathLength++; } size_t previousNvals = 0; - size_t nvals = result->numNonZero(); + size_t nvals = result.numNonZero(); while (nvals > previousNvals && pathLength < maxDist_) { - previousNvals = result->numNonZero(); + previousNvals = result.numNonZero(); // TODO: Check effect of matrix orientation (Row major, Column major) on // performance. - result->accumulateMultiply(graph); + result.accumulateMultiply(graph); checkCancellation(); - nvals = result->numNonZero(); + nvals = result.numNonZero(); pathLength++; } - return std::move(*result); + return result; } // _____________________________________________________________________________ From e7dbf4dd22f04c182a5117872ce1349b2cb41c01 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 9 Feb 2024 11:12:07 +0100 Subject: [PATCH 25/92] Reworks - Throw exception on GrB_NO_VALUE - unique_ptr in GrbMatrix uses custom delter - Simplified GrbMatrixTest --- src/engine/GrbMatrix.cpp | 4 +- src/engine/GrbMatrix.h | 7 ++- src/engine/TransitivePath.cpp | 1 + test/GrbMatrixTest.cpp | 100 +++++++++++----------------------- 4 files changed, 41 insertions(+), 71 deletions(-) diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp index 52fd233aa4..ae6735a3aa 100644 --- a/src/engine/GrbMatrix.cpp +++ b/src/engine/GrbMatrix.cpp @@ -39,10 +39,10 @@ void GrbMatrix::setElement(size_t row, size_t col, bool value) { bool GrbMatrix::getElement(size_t row, size_t col) const { bool result; auto info = GrB_Matrix_extractElement_BOOL(&result, matrix(), row, col); - handleError(info); if (info == GrB_NO_VALUE) { return false; } + handleError(info); return result; } @@ -212,7 +212,7 @@ void GrbMatrix::handleError(GrB_Info info) { case GrB_SUCCESS: return; case GrB_NO_VALUE: - return; + AD_THROW("GraphBLAS error: entry does not appear in the matrix"); case GrB_UNINITIALIZED_OBJECT: AD_THROW("GraphBLAS error: object has not been initialized"); case GrB_NULL_POINTER: diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h index 7285e9ec1e..13505b4162 100644 --- a/src/engine/GrbMatrix.h +++ b/src/engine/GrbMatrix.h @@ -4,6 +4,7 @@ #pragma once +#include extern "C" { #include } @@ -18,7 +19,11 @@ extern "C" { */ class GrbMatrix { private: - std::unique_ptr matrix_ = std::make_unique(); + using MatrixDeleter = + decltype([](GrB_Matrix* matrix) { GrB_Matrix_free(matrix); }); + using MatrixPtr = std::unique_ptr; + MatrixPtr matrix_ = + std::unique_ptr(new GrB_Matrix()); static bool isInitialized_; public: diff --git a/src/engine/TransitivePath.cpp b/src/engine/TransitivePath.cpp index 39c3a915d5..6262d100ad 100644 --- a/src/engine/TransitivePath.cpp +++ b/src/engine/TransitivePath.cpp @@ -4,6 +4,7 @@ #include "TransitivePath.h" +#include #include #include #include diff --git a/test/GrbMatrixTest.cpp b/test/GrbMatrixTest.cpp index 8be29eef8a..88caaad84c 100644 --- a/test/GrbMatrixTest.cpp +++ b/test/GrbMatrixTest.cpp @@ -1,24 +1,38 @@ +// Copyright 2018, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.roland.herrmann@mars.uni-freiburg.de) + #include #include "engine/GrbMatrix.h" #include "gmock/gmock.h" +// This helper function checks all important proprties of a matrix. +// One matrix consists of row index, column index and value in this order. +// Entries which do not appear in the entries vector are ignored. +using Entries = std::vector>; +void checkMatrix(GrbMatrix& matrix, size_t numRows, size_t numCols, + size_t numNonZero, Entries entries) { + EXPECT_THAT(matrix.numNonZero(), numNonZero); + EXPECT_THAT(matrix.numRows(), numRows); + EXPECT_THAT(matrix.numCols(), numCols); + + for (auto [rowIndex, colIndex, value] : entries) { + EXPECT_THAT(matrix.getElement(rowIndex, colIndex), value); + } +} + TEST(GrbMatrixTest, constructor) { GrbMatrix::initialize(); GrbMatrix matrix = GrbMatrix(2, 3); - size_t numRows = matrix.numRows(); - size_t numCols = matrix.numCols(); - size_t nvals = matrix.numNonZero(); - GrbMatrix::finalize(); + checkMatrix(matrix, 2, 3, 0, {}); - EXPECT_EQ(nvals, 0); - EXPECT_EQ(numRows, 2); - EXPECT_EQ(numCols, 3); + GrbMatrix::finalize(); } -TEST(GrbMatrixTest, copy) { +TEST(GrbMatrixTest, clone) { GrbMatrix::initialize(); GrbMatrix matrix1 = GrbMatrix(2, 2); @@ -28,10 +42,8 @@ TEST(GrbMatrixTest, copy) { matrix1.setElement(1, 1, true); - EXPECT_EQ(matrix2.getElement(0, 0), true); - EXPECT_EQ(matrix2.getElement(0, 1), false); - EXPECT_EQ(matrix2.getElement(1, 0), false); - EXPECT_EQ(matrix2.getElement(1, 1), false); + checkMatrix(matrix2, 2, 2, 1, + {{0, 0, true}, {0, 1, false}, {1, 0, false}, {1, 1, false}}); GrbMatrix::finalize(); } @@ -43,17 +55,9 @@ TEST(GrbMatrixTest, getSetElement) { matrix.setElement(1, 0, true); matrix.setElement(0, 2, true); - bool elemOneZero = matrix.getElement(1, 0); - bool elemZeroTwo = matrix.getElement(0, 2); - bool elemOneTwo = matrix.getElement(1, 2); - size_t nvals = matrix.numNonZero(); + checkMatrix(matrix, 3, 3, 2, {{1, 0, true}, {0, 2, true}}); GrbMatrix::finalize(); - - EXPECT_EQ(nvals, 2); - EXPECT_EQ(elemOneZero, true); - EXPECT_EQ(elemZeroTwo, true); - EXPECT_EQ(elemOneTwo, false); } TEST(GrbMatrixTest, build) { @@ -64,17 +68,7 @@ TEST(GrbMatrixTest, build) { GrbMatrix matrix = GrbMatrix::build(rowIndices, colIndices, 3, 3); - EXPECT_EQ(false, matrix.getElement(0, 0)); - EXPECT_EQ(true, matrix.getElement(0, 1)); - EXPECT_EQ(true, matrix.getElement(0, 2)); - - EXPECT_EQ(false, matrix.getElement(1, 0)); - EXPECT_EQ(false, matrix.getElement(1, 1)); - EXPECT_EQ(true, matrix.getElement(1, 2)); - - EXPECT_EQ(false, matrix.getElement(2, 0)); - EXPECT_EQ(false, matrix.getElement(2, 1)); - EXPECT_EQ(false, matrix.getElement(2, 2)); + checkMatrix(matrix, 3, 3, 3, {{0, 1, true}, {0, 2, true}, {1, 2, true}}); GrbMatrix::finalize(); } @@ -84,17 +78,7 @@ TEST(GrbMatrixTest, diag) { auto matrix = GrbMatrix::diag(3); - EXPECT_EQ(true, matrix.getElement(0, 0)); - EXPECT_EQ(false, matrix.getElement(0, 1)); - EXPECT_EQ(false, matrix.getElement(0, 2)); - - EXPECT_EQ(false, matrix.getElement(1, 0)); - EXPECT_EQ(true, matrix.getElement(1, 1)); - EXPECT_EQ(false, matrix.getElement(1, 2)); - - EXPECT_EQ(false, matrix.getElement(2, 0)); - EXPECT_EQ(false, matrix.getElement(2, 1)); - EXPECT_EQ(true, matrix.getElement(2, 2)); + checkMatrix(matrix, 3, 3, 3, {{0, 0, true}, {1, 1, true}, {2, 2, true}}); GrbMatrix::finalize(); } @@ -167,10 +151,7 @@ TEST(GrbMatrixTest, multiplySquareMatrices) { GrbMatrix matrix3 = matrix1.multiply(matrix2); - EXPECT_EQ(matrix3.getElement(0, 0), true); - EXPECT_EQ(matrix3.getElement(0, 1), false); - EXPECT_EQ(matrix3.getElement(1, 0), true); - EXPECT_EQ(matrix3.getElement(1, 1), false); + checkMatrix(matrix3, 2, 2, 2, {{0, 0, true}, {1, 0, true}}); GrbMatrix::finalize(); } @@ -189,12 +170,7 @@ TEST(GrbMatrixTest, multiplyShapedMatrices) { GrbMatrix matrix3 = matrix1.multiply(matrix2); - EXPECT_EQ(matrix3.numRows(), 2); - EXPECT_EQ(matrix3.numCols(), 2); - EXPECT_EQ(matrix3.getElement(0, 0), true); - EXPECT_EQ(matrix3.getElement(0, 1), false); - EXPECT_EQ(matrix3.getElement(1, 0), true); - EXPECT_EQ(matrix3.getElement(1, 1), false); + checkMatrix(matrix3, 2, 2, 2, {{0, 0, true}, {1, 0, true}}); GrbMatrix::finalize(); } @@ -210,17 +186,7 @@ TEST(GrbMatrixTest, transpose) { GrbMatrix result = matrix.transpose(); - EXPECT_EQ(3, result.numRows()); - EXPECT_EQ(2, result.numCols()); - - EXPECT_EQ(true, result.getElement(0, 0)); - EXPECT_EQ(false, result.getElement(0, 1)); - - EXPECT_EQ(true, result.getElement(1, 0)); - EXPECT_EQ(false, result.getElement(1, 1)); - - EXPECT_EQ(true, result.getElement(2, 0)); - EXPECT_EQ(false, result.getElement(2, 1)); + checkMatrix(result, 3, 2, 3, {{0, 0, true}, {1, 0, true}, {2, 0, true}}); GrbMatrix::finalize(); } @@ -238,10 +204,8 @@ TEST(GrbMatrixTest, accumulateMultiply) { matrix1.accumulateMultiply(matrix2); - EXPECT_EQ(matrix1.getElement(0, 0), true); - EXPECT_EQ(matrix1.getElement(0, 1), true); - EXPECT_EQ(matrix1.getElement(1, 0), true); - EXPECT_EQ(matrix1.getElement(1, 1), true); + checkMatrix(matrix1, 2, 2, 4, + {{0, 0, true}, {0, 1, true}, {1, 0, true}, {1, 1, true}}); GrbMatrix::finalize(); } From 714744a57cfef81e011e19fb105725919e7c922a Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 9 Feb 2024 11:59:02 +0100 Subject: [PATCH 26/92] Added timer to transitive path computation --- src/engine/TransitivePath.cpp | 107 +++++++++++++++++++++++++++++----- 1 file changed, 92 insertions(+), 15 deletions(-) diff --git a/src/engine/TransitivePath.cpp b/src/engine/TransitivePath.cpp index 6262d100ad..ec25ba78d2 100644 --- a/src/engine/TransitivePath.cpp +++ b/src/engine/TransitivePath.cpp @@ -14,6 +14,7 @@ #include "engine/ExportQueryExecutionTrees.h" #include "engine/IndexScan.h" #include "util/Exception.h" +#include "util/Timer.h" // _____________________________________________________________________________ TransitivePath::TransitivePath(QueryExecutionContext* qec, @@ -195,6 +196,9 @@ void TransitivePath::computeTransitivePathBound( decltype(auto) startCol = sub.getColumn(startSide.subCol_); decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); + auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); + timer.start(); + GrbMatrix::initialize(); auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); @@ -203,6 +207,10 @@ void TransitivePath::computeTransitivePathBound( GrbMatrix startNodeMatrix = setupStartNodeMatrix(startNodes, graph.numRows(), mapping); + timer.stop(); + auto initTime = timer.msecs(); + timer.start(); + auto hull = transitiveHull(graph, std::move(startNodeMatrix)); if (!targetSide.isVariable()) { Id target = std::get(targetSide.value_); @@ -210,10 +218,22 @@ void TransitivePath::computeTransitivePathBound( hull = getTargetRow(hull, targetIndex); } + timer.stop(); + auto hullTime = timer.msecs(); + timer.start(); + TransitivePath::fillTableWithHull( res, hull, mapping, startSideTable, startNodes, startSide.outputCol_, targetSide.outputCol_, startSide.treeAndCol_.value().second); + timer.stop(); + auto fillTime = timer.msecs(); + + LOG(DEBUG) << "GraphBLAS Timing measurements:" << std::endl; + LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; + LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; + LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; + *dynRes = std::move(res).toDynamic(); } @@ -224,9 +244,16 @@ void TransitivePath::computeTransitivePathBoundFallback( const TransitivePathSide& targetSide, const IdTable& startSideTable) const { IdTableStatic res = std::move(*dynRes).toStatic(); + auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); + timer.start(); + auto [edges, nodes] = setupMapAndNodes( dynSub, startSide, targetSide, startSideTable); + timer.stop(); + auto initTime = timer.msecs(); + timer.start(); + Map hull(allocator()); if (!targetSide.isVariable()) { hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); @@ -234,10 +261,22 @@ void TransitivePath::computeTransitivePathBoundFallback( hull = transitiveHull(edges, nodes, std::nullopt); } + timer.stop(); + auto hullTime = timer.msecs(); + timer.start(); + TransitivePath::fillTableWithHull( res, hull, nodes, startSide.outputCol_, targetSide.outputCol_, startSideTable, startSide.treeAndCol_.value().second); + timer.stop(); + auto fillTime = timer.msecs(); + + LOG(DEBUG) << "Fallback Timing measurements:" << std::endl; + LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; + LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; + LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; + *dynRes = std::move(res).toDynamic(); } @@ -252,9 +291,16 @@ void TransitivePath::computeTransitivePath( decltype(auto) startCol = sub.getColumn(startSide.subCol_); decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); + auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); + timer.start(); + GrbMatrix::initialize(); auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); + timer.stop(); + auto initTime = timer.msecs(); + timer.start(); + GrbMatrix hull; if (!startSide.isVariable()) { std::vector startNode{std::get(startSide.value_)}; @@ -265,6 +311,10 @@ void TransitivePath::computeTransitivePath( hull = transitiveHull(graph, std::nullopt); } + timer.stop(); + auto hullTime = timer.msecs(); + timer.start(); + if (!targetSide.isVariable()) { Id target = std::get(targetSide.value_); size_t targetIndex = mapping.getIndex(target); @@ -281,6 +331,14 @@ void TransitivePath::computeTransitivePath( res, hull, mapping, startSide.outputCol_, targetSide.outputCol_); } + timer.stop(); + auto fillTime = timer.msecs(); + + LOG(DEBUG) << "GraphBLAS Timing measurements:" << std::endl; + LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; + LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; + LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; + *dynRes = std::move(res).toDynamic(); } @@ -291,9 +349,16 @@ void TransitivePath::computeTransitivePathFallback( const TransitivePathSide& targetSide) const { IdTableStatic res = std::move(*dynRes).toStatic(); + auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); + timer.start(); + auto [edges, nodes] = setupMapAndNodes(dynSub, startSide, targetSide); + timer.stop(); + auto initTime = timer.msecs(); + timer.start(); + Map hull{allocator()}; if (!targetSide.isVariable()) { hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); @@ -301,9 +366,21 @@ void TransitivePath::computeTransitivePathFallback( hull = transitiveHull(edges, nodes, std::nullopt); } + timer.stop(); + auto hullTime = timer.msecs(); + timer.start(); + TransitivePath::fillTableWithHull(res, hull, startSide.outputCol_, targetSide.outputCol_); + timer.stop(); + auto fillTime = timer.msecs(); + + LOG(DEBUG) << "Fallback Timing measurements:" << std::endl; + LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; + LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; + LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; + *dynRes = std::move(res).toDynamic(); } @@ -323,23 +400,23 @@ ResultTable TransitivePath::computeResult() { size_t subWidth = subRes->idTable().numColumns(); - bool useFallback = !RuntimeParameters().get<"use-graphblas">(); + bool useGraphblas = !RuntimeParameters().get<"use-graphblas">(); - auto computeForOneSide = [this, &idTable, subRes, subWidth, useFallback]( + auto computeForOneSide = [this, &idTable, subRes, subWidth, useGraphblas]( auto& boundSide, auto& otherSide) -> ResultTable { shared_ptr sideRes = boundSide.treeAndCol_.value().first->getResult(); size_t sideWidth = sideRes->idTable().numColumns(); - if (useFallback) { + if (useGraphblas) { CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), - &TransitivePath::computeTransitivePathBoundFallback, this, + &TransitivePath::computeTransitivePathBound, this, &idTable, subRes->idTable(), boundSide, otherSide, sideRes->idTable()); } else { CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), - &TransitivePath::computeTransitivePathBound, this, + &TransitivePath::computeTransitivePathBoundFallback, this, &idTable, subRes->idTable(), boundSide, otherSide, sideRes->idTable()); } @@ -354,26 +431,26 @@ ResultTable TransitivePath::computeResult() { return computeForOneSide(rhs_, lhs_); // Right side is an Id } else if (!rhs_.isVariable()) { - if (useFallback) { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePath::computeTransitivePathFallback, this, - &idTable, subRes->idTable(), rhs_, lhs_); - } else { + if (useGraphblas) { CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), &TransitivePath::computeTransitivePath, this, &idTable, subRes->idTable(), rhs_, lhs_); + } else { + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), + &TransitivePath::computeTransitivePathFallback, this, + &idTable, subRes->idTable(), rhs_, lhs_); } // No side is a bound variable, the right side is an unbound variable // and the left side is either an unbound Variable or an ID. } else { - if (useFallback) { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePath::computeTransitivePathFallback, this, - &idTable, subRes->idTable(), lhs_, rhs_); - } else { + if (useGraphblas) { CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), &TransitivePath::computeTransitivePath, this, &idTable, subRes->idTable(), lhs_, rhs_); + } else { + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), + &TransitivePath::computeTransitivePathFallback, this, + &idTable, subRes->idTable(), lhs_, rhs_); } } From 0fd9f92280e25a104338b12df7201dc6259e053f Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 12 Feb 2024 13:53:39 +0100 Subject: [PATCH 27/92] **WIP** Refactor of TransitivePath into Fallback and Graphblas --- src/engine/CMakeLists.txt | 3 +- src/engine/QueryExecutionTree.cpp | 7 +- src/engine/QueryPlanner.cpp | 9 +- src/engine/TransitivePathBase.cpp | 269 +++++++++++++++++ src/engine/TransitivePathBase.h | 164 +++++++++++ src/engine/TransitivePathFallback.cpp | 383 +++++++++++++++++++++++++ src/engine/TransitivePathFallback.h | 201 +++++++++++++ src/engine/TransitivePathGraphblas.cpp | 364 +++++++++++++++++++++++ src/engine/TransitivePathGraphblas.h | 215 ++++++++++++++ test/LocalVocabTest.cpp | 7 +- test/QueryPlannerTestHelpers.h | 22 +- test/TransitivePathTest.cpp | 40 +-- 12 files changed, 1646 insertions(+), 38 deletions(-) create mode 100644 src/engine/TransitivePathBase.cpp create mode 100644 src/engine/TransitivePathBase.h create mode 100644 src/engine/TransitivePathFallback.cpp create mode 100644 src/engine/TransitivePathFallback.h create mode 100644 src/engine/TransitivePathGraphblas.cpp create mode 100644 src/engine/TransitivePathGraphblas.h diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index e9cec801bf..1b5c4b016a 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -7,7 +7,8 @@ add_library(engine Distinct.cpp OrderBy.cpp Filter.cpp Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp OptionalJoin.cpp CountAvailablePredicates.cpp GroupBy.cpp HasPredicateScan.cpp - Union.cpp MultiColumnJoin.cpp TransitivePath.cpp Service.cpp + Union.cpp MultiColumnJoin.cpp TransitivePathBase.cpp TransitivePathFallback.cpp + TransitivePathGraphblas.cpp Service.cpp Values.cpp Bind.cpp Minus.cpp RuntimeInformation.cpp CheckUsePatternTrick.cpp VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index 67b6b05e0e..831193d386 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -30,7 +30,7 @@ #include "engine/Sort.h" #include "engine/TextIndexScanForEntity.h" #include "engine/TextIndexScanForWord.h" -#include "engine/TransitivePath.h" +#include "engine/TransitivePathBase.h" #include "engine/Union.h" #include "engine/Values.h" #include "engine/ValuesForTesting.h" @@ -158,7 +158,7 @@ void QueryExecutionTree::setOperation(std::shared_ptr operation) { type_ = VALUES; } else if constexpr (std::is_same_v) { type_ = SERVICE; - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { type_ = TRANSITIVE_PATH; } else if constexpr (std::is_same_v) { type_ = ORDER_BY; @@ -204,7 +204,8 @@ template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); +template void QueryExecutionTree::setOperation( + std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation(std::shared_ptr); template void QueryExecutionTree::setOperation( diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index e08cd70e5c..5a7860f6a1 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -28,7 +28,7 @@ #include "engine/Sort.h" #include "engine/TextIndexScanForEntity.h" #include "engine/TextIndexScanForWord.h" -#include "engine/TransitivePath.h" +#include "engine/TransitivePathBase.h" #include "engine/Union.h" #include "engine/Values.h" #include "parser/Alias.h" @@ -438,8 +438,9 @@ std::vector QueryPlanner::optimize( right.value_ = getSideValue(arg._right); size_t min = arg._min; size_t max = arg._max; - auto plan = makeSubtreePlan(_qec, sub._qet, left, - right, min, max); + auto transitivePath = TransitivePathBase::makeTransitivePath( + _qec, sub._qet, left, right, min, max); + auto plan = makeSubtreePlan(transitivePath); candidatesOut.push_back(std::move(plan)); } joinCandidates(std::move(candidatesOut)); @@ -1801,7 +1802,7 @@ auto QueryPlanner::createJoinWithTransitivePath( std::shared_ptr otherTree = aIsTransPath ? b._qet : a._qet; auto& transPathTree = aIsTransPath ? a._qet : b._qet; - auto transPathOperation = std::dynamic_pointer_cast( + auto transPathOperation = std::dynamic_pointer_cast( transPathTree->getRootOperation()); // TODO: Handle the case of two or more common variables diff --git a/src/engine/TransitivePathBase.cpp b/src/engine/TransitivePathBase.cpp new file mode 100644 index 0000000000..1cd3475b98 --- /dev/null +++ b/src/engine/TransitivePathBase.cpp @@ -0,0 +1,269 @@ +// Copyright 2019, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) + +#include "TransitivePathBase.h" + +#include +#include +#include +#include + +#include "engine/ExportQueryExecutionTrees.h" +#include "engine/IndexScan.h" +#include "engine/TransitivePathFallback.h" +#include "engine/TransitivePathGraphblas.h" +#include "util/Exception.h" + +// _____________________________________________________________________________ +TransitivePathBase::TransitivePathBase( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist) + : Operation(qec), + subtree_(child + ? QueryExecutionTree::createSortedTree(std::move(child), {0}) + : nullptr), + lhs_(std::move(leftSide)), + rhs_(std::move(rightSide)), + minDist_(minDist), + maxDist_(maxDist) { + AD_CORRECTNESS_CHECK(qec != nullptr); + if (lhs_.isVariable()) { + variableColumns_[std::get(lhs_.value_)] = + makeAlwaysDefinedColumn(0); + } + if (rhs_.isVariable()) { + variableColumns_[std::get(rhs_.value_)] = + makeAlwaysDefinedColumn(1); + } + + lhs_.outputCol_ = 0; + rhs_.outputCol_ = 1; +} + +// _____________________________________________________________________________ +std::string TransitivePathBase::getCacheKeyImpl() const { + std::ostringstream os; + os << " minDist " << minDist_ << " maxDist " << maxDist_ << "\n"; + + os << "Left side:\n"; + os << lhs_.getCacheKey(); + + os << "Right side:\n"; + os << rhs_.getCacheKey(); + + AD_CORRECTNESS_CHECK(subtree_); + os << "Subtree:\n" << subtree_->getCacheKey() << '\n'; + + return std::move(os).str(); +} + +// _____________________________________________________________________________ +std::string TransitivePathBase::getDescriptor() const { + std::ostringstream os; + os << "TransitivePath "; + // If not full transitive hull, show interval as [min, max]. + if (minDist_ > 1 || maxDist_ < std::numeric_limits::max()) { + os << "[" << minDist_ << ", " << maxDist_ << "] "; + } + auto getName = [this](ValueId id) { + auto optStringAndType = + ExportQueryExecutionTrees::idToStringAndType(getIndex(), id, {}); + if (optStringAndType.has_value()) { + return optStringAndType.value().first; + } else { + return absl::StrCat("#", id.getBits()); + } + }; + // Left variable or entity name. + if (lhs_.isVariable()) { + os << std::get(lhs_.value_).name(); + } else { + os << getName(std::get(lhs_.value_)); + } + // The predicate. + auto scanOperation = + std::dynamic_pointer_cast(subtree_->getRootOperation()); + if (scanOperation != nullptr) { + os << " " << scanOperation->getPredicate() << " "; + } else { + // Escaped the question marks to avoid a warning about ignored trigraphs. + os << R"( )"; + } + // Right variable or entity name. + if (rhs_.isVariable()) { + os << std::get(rhs_.value_).name(); + } else { + os << getName(std::get(rhs_.value_)); + } + return std::move(os).str(); +} + +// _____________________________________________________________________________ +size_t TransitivePathBase::getResultWidth() const { return resultWidth_; } + +// _____________________________________________________________________________ +vector TransitivePathBase::resultSortedOn() const { + if (lhs_.isSortedOnInputCol()) { + return {0}; + } + if (rhs_.isSortedOnInputCol()) { + return {1}; + } + + return {}; +} + +// _____________________________________________________________________________ +VariableToColumnMap TransitivePathBase::computeVariableToColumnMap() const { + return variableColumns_; +} + +// _____________________________________________________________________________ +void TransitivePathBase::setTextLimit(size_t limit) { + for (auto child : getChildren()) { + child->setTextLimit(limit); + } +} + +// _____________________________________________________________________________ +bool TransitivePathBase::knownEmptyResult() { + return subtree_->knownEmptyResult(); +} + +// _____________________________________________________________________________ +float TransitivePathBase::getMultiplicity(size_t col) { + (void)col; + // The multiplicities are not known. + return 1; +} + +// _____________________________________________________________________________ +uint64_t TransitivePathBase::getSizeEstimateBeforeLimit() { + if (std::holds_alternative(lhs_.value_) || + std::holds_alternative(rhs_.value_)) { + // If the subject or object is fixed, assume that the number of matching + // triples is 1000. This will usually be an overestimate, but it will do the + // job of avoiding query plans that first generate large intermediate + // results and only then merge them with a triple such as this. In the + // lhs_.isVar && rhs_.isVar case below, we assume a worst-case blowup of + // 10000; see the comment there. + return 1000; + } + if (lhs_.treeAndCol_.has_value()) { + return lhs_.treeAndCol_.value().first->getSizeEstimate(); + } + if (rhs_.treeAndCol_.has_value()) { + return rhs_.treeAndCol_.value().first->getSizeEstimate(); + } + // Set costs to something very large, so that we never compute the complete + // transitive hull (unless the variables on both sides are not bound in any + // other way, so that the only possible query plan is to compute the complete + // transitive hull). + // + // NOTE: _subtree->getSizeEstimateBeforeLimit() is the number of triples of + // the predicate, for which the transitive hull operator (+) is specified. On + // Wikidata, the predicate with the largest blowup when taking the + // transitive hull is wdt:P2789 (connects with). The blowup is then from 90K + // (without +) to 110M (with +), so about 1000 times larger. + AD_CORRECTNESS_CHECK(lhs_.isVariable() && rhs_.isVariable()); + return subtree_->getSizeEstimate() * 10000; +} + +// _____________________________________________________________________________ +size_t TransitivePathBase::getCostEstimate() { + // We assume that the cost of computing the transitive path is proportional to + // the result size. + auto costEstimate = getSizeEstimateBeforeLimit(); + // Add the cost for the index scan of the predicate involved. + for (auto* ptr : getChildren()) { + if (ptr) { + costEstimate += ptr->getCostEstimate(); + } + } + return costEstimate; +} + +// _____________________________________________________________________________ +std::shared_ptr TransitivePathBase::makeTransitivePath( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist) { + bool useGraphblas = !RuntimeParameters().get<"use-graphblas">(); + return makeTransitivePath(qec, child, leftSide, rightSide, minDist, maxDist, + useGraphblas); +} + +// _____________________________________________________________________________ +std::shared_ptr TransitivePathBase::makeTransitivePath( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist, bool useGraphblas) { + if (useGraphblas) { + return std::make_shared(TransitivePathGraphblas( + qec, child, leftSide, rightSide, minDist, maxDist)); + } else { + return std::make_shared(TransitivePathFallback( + qec, child, leftSide, rightSide, minDist, maxDist)); + } +} + +// _____________________________________________________________________________ +std::shared_ptr TransitivePathBase::bindLeftSide( + std::shared_ptr leftop, size_t inputCol) const { + return bindLeftOrRightSide(std::move(leftop), inputCol, true); +} + +// _____________________________________________________________________________ +std::shared_ptr TransitivePathBase::bindRightSide( + std::shared_ptr rightop, size_t inputCol) const { + return bindLeftOrRightSide(std::move(rightop), inputCol, false); +} + +// _____________________________________________________________________________ +std::shared_ptr TransitivePathBase::bindLeftOrRightSide( + std::shared_ptr leftOrRightOp, size_t inputCol, + bool isLeft) const { + // Enforce required sorting of `leftOrRightOp`. + leftOrRightOp = QueryExecutionTree::createSortedTree(std::move(leftOrRightOp), + {inputCol}); + // Create a copy of this. + // + // NOTE: The RHS used to be `std::make_shared()`, which is + // wrong because it first calls the copy constructor of the base class + // `Operation`, which would then ignore the changes in `variableColumnMap_` + // made below (see `Operation::getInternallyVisibleVariableColumns` and + // `Operation::getExternallyVariableColumns`). + std::shared_ptr p = + TransitivePathBase::makeTransitivePath(getExecutionContext(), subtree_, + lhs_, rhs_, minDist_, maxDist_); + if (isLeft) { + p->lhs_.treeAndCol_ = {leftOrRightOp, inputCol}; + } else { + p->rhs_.treeAndCol_ = {leftOrRightOp, inputCol}; + } + + // Note: The `variable` in the following structured binding is `const`, even + // if we bind by value. We deliberately make one unnecessary copy of the + // `variable` to keep the code simpler. + for (auto [variable, columnIndexWithType] : + leftOrRightOp->getVariableColumns()) { + ColumnIndex columnIndex = columnIndexWithType.columnIndex_; + if (columnIndex == inputCol) { + continue; + } + + columnIndexWithType.columnIndex_ += columnIndex > inputCol ? 1 : 2; + + p->variableColumns_[variable] = columnIndexWithType; + // p->resultWidth_++; + } + return p; +} + +// _____________________________________________________________________________ +bool TransitivePathBase::isBoundOrId() const { + return lhs_.isBoundVariable() || rhs_.isBoundVariable() || + !lhs_.isVariable() || !rhs_.isVariable(); +} diff --git a/src/engine/TransitivePathBase.h b/src/engine/TransitivePathBase.h new file mode 100644 index 0000000000..d9658a95b5 --- /dev/null +++ b/src/engine/TransitivePathBase.h @@ -0,0 +1,164 @@ +// Copyright 2019, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) + +#pragma once + +#include +#include + +#include "engine/Operation.h" +#include "engine/QueryExecutionTree.h" + +using TreeAndCol = std::pair, size_t>; +struct TransitivePathSide { + // treeAndCol contains the QueryExecutionTree of this side and the column + // where the Ids of this side are located. This member only has a value if + // this side was bound. + std::optional treeAndCol_; + // Column of the sub table where the Ids of this side are located + size_t subCol_; + std::variant value_; + // The column in the ouput table where this side Ids are written to. + // This member is set by the TransitivePath class + size_t outputCol_ = 0; + + bool isVariable() const { return std::holds_alternative(value_); }; + + bool isBoundVariable() const { return treeAndCol_.has_value(); }; + + std::string getCacheKey() const { + std::ostringstream os; + if (!isVariable()) { + os << "Id: " << std::get(value_); + } + + os << ", subColumn: " << subCol_ << "to " << outputCol_; + + if (treeAndCol_.has_value()) { + const auto& [tree, col] = treeAndCol_.value(); + os << ", Subtree:\n"; + os << tree->getCacheKey() << "with join column " << col << "\n"; + } + return std::move(os).str(); + } + + bool isSortedOnInputCol() const { + if (!treeAndCol_.has_value()) { + return false; + } + + auto [tree, col] = treeAndCol_.value(); + const std::vector& sortedOn = + tree->getRootOperation()->getResultSortedOn(); + // TODO use std::ranges::starts_with + return (!sortedOn.empty() && sortedOn[0] == col); + } +}; + +class TransitivePathBase : public Operation { + protected: + std::shared_ptr subtree_; + TransitivePathSide lhs_; + TransitivePathSide rhs_; + size_t resultWidth_ = 2; + size_t minDist_; + size_t maxDist_; + VariableToColumnMap variableColumns_; + + public: + TransitivePathBase(QueryExecutionContext* qec, + std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, + size_t minDist, size_t maxDist); + + virtual ~TransitivePathBase() {} + + /** + * Returns a new TransitivePath operation that uses the fact that leftop + * generates all possible values for the left side of the paths. If the + * results of leftop is smaller than all possible values this will result in a + * faster transitive path operation (as the transitive paths has to be + * computed for fewer elements). + */ + std::shared_ptr bindLeftSide( + std::shared_ptr leftop, size_t inputCol) const; + + /** + * Returns a new TransitivePath operation that uses the fact that rightop + * generates all possible values for the right side of the paths. If the + * results of rightop is smaller than all possible values this will result in + * a faster transitive path operation (as the transitive paths has to be + * computed for fewer elements). + */ + std::shared_ptr bindRightSide( + std::shared_ptr rightop, size_t inputCol) const; + + bool isBoundOrId() const; + + /** + * Getters, mainly necessary for testing + */ + size_t getMinDist() const { return minDist_; } + size_t getMaxDist() const { return maxDist_; } + const TransitivePathSide& getLeft() const { return lhs_; } + const TransitivePathSide& getRight() const { return rhs_; } + + protected: + virtual std::string getCacheKeyImpl() const override; + + public: + // virtual void computeTransitivePath( + // IdTable* res, const IdTable& sub, const TransitivePathSide& startSide, + // const TransitivePathSide& targetSide) const = 0; + + virtual std::string getDescriptor() const override; + + virtual size_t getResultWidth() const override; + + virtual vector resultSortedOn() const override; + + virtual void setTextLimit(size_t limit) override; + + virtual bool knownEmptyResult() override; + + virtual float getMultiplicity(size_t col) override; + + private: + uint64_t getSizeEstimateBeforeLimit() override; + + public: + virtual size_t getCostEstimate() override; + + static std::shared_ptr makeTransitivePath( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist, bool useGraphblas); + + static std::shared_ptr makeTransitivePath( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist); + + vector getChildren() override { + std::vector res; + auto addChildren = [](std::vector& res, + TransitivePathSide side) { + if (side.treeAndCol_.has_value()) { + res.push_back(side.treeAndCol_.value().first.get()); + } + }; + addChildren(res, lhs_); + addChildren(res, rhs_); + res.push_back(subtree_.get()); + return res; + } + + VariableToColumnMap computeVariableToColumnMap() const override; + + // The internal implementation of `bindLeftSide` and `bindRightSide` which + // share a lot of code. + std::shared_ptr bindLeftOrRightSide( + std::shared_ptr leftOrRightOp, size_t inputCol, + bool isLeft) const; +}; diff --git a/src/engine/TransitivePathFallback.cpp b/src/engine/TransitivePathFallback.cpp new file mode 100644 index 0000000000..d9220b57b7 --- /dev/null +++ b/src/engine/TransitivePathFallback.cpp @@ -0,0 +1,383 @@ +// Copyright 2019, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) + +#include "TransitivePathFallback.h" + +#include +#include +#include + +#include "engine/CallFixedSize.h" +#include "engine/TransitivePathBase.h" +#include "util/Exception.h" +#include "util/Timer.h" + +// _____________________________________________________________________________ +TransitivePathFallback::TransitivePathFallback( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist) + : TransitivePathBase(qec, child, leftSide, rightSide, minDist, maxDist) {} + +// _____________________________________________________________________________ +template +void TransitivePathFallback::computeTransitivePathBound( + IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, const IdTable& startSideTable) const { + IdTableStatic res = std::move(*dynRes).toStatic(); + + auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); + timer.start(); + + auto [edges, nodes] = setupMapAndNodes( + dynSub, startSide, targetSide, startSideTable); + + timer.stop(); + auto initTime = timer.msecs(); + timer.start(); + + Map hull(allocator()); + if (!targetSide.isVariable()) { + hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); + } else { + hull = transitiveHull(edges, nodes, std::nullopt); + } + + timer.stop(); + auto hullTime = timer.msecs(); + timer.start(); + + TransitivePathFallback::fillTableWithHull( + res, hull, nodes, startSide.outputCol_, targetSide.outputCol_, + startSideTable, startSide.treeAndCol_.value().second); + + timer.stop(); + auto fillTime = timer.msecs(); + + LOG(DEBUG) << "Fallback Timing measurements:" << std::endl; + LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; + LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; + LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; + + *dynRes = std::move(res).toDynamic(); +} + +// _____________________________________________________________________________ +template +void TransitivePathFallback::computeTransitivePath( + IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + IdTableStatic res = std::move(*dynRes).toStatic(); + + auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); + timer.start(); + + auto [edges, nodes] = + setupMapAndNodes(dynSub, startSide, targetSide); + + timer.stop(); + auto initTime = timer.msecs(); + timer.start(); + + Map hull{allocator()}; + if (!targetSide.isVariable()) { + hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); + } else { + hull = transitiveHull(edges, nodes, std::nullopt); + } + + timer.stop(); + auto hullTime = timer.msecs(); + timer.start(); + + TransitivePathFallback::fillTableWithHull( + res, hull, startSide.outputCol_, targetSide.outputCol_); + + timer.stop(); + auto fillTime = timer.msecs(); + + LOG(DEBUG) << "Fallback Timing measurements:" << std::endl; + LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; + LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; + LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; + + *dynRes = std::move(res).toDynamic(); +} + +// _____________________________________________________________________________ +ResultTable TransitivePathFallback::computeResult() { + if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() && + rhs_.isVariable()) { + AD_THROW( + "This query might have to evalute the empty path, which is currently " + "not supported"); + } + shared_ptr subRes = subtree_->getResult(); + + IdTable idTable{allocator()}; + + idTable.setNumColumns(getResultWidth()); + + size_t subWidth = subRes->idTable().numColumns(); + + auto computeForOneSide = [this, &idTable, subRes, subWidth]( + auto& boundSide, + auto& otherSide) -> ResultTable { + shared_ptr sideRes = + boundSide.treeAndCol_.value().first->getResult(); + size_t sideWidth = sideRes->idTable().numColumns(); + + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), + &TransitivePathFallback::computeTransitivePathBound, this, + &idTable, subRes->idTable(), boundSide, otherSide, + sideRes->idTable()); + + return {std::move(idTable), resultSortedOn(), + ResultTable::getSharedLocalVocabFromNonEmptyOf(*sideRes, *subRes)}; + }; + + if (lhs_.isBoundVariable()) { + return computeForOneSide(lhs_, rhs_); + } else if (rhs_.isBoundVariable()) { + return computeForOneSide(rhs_, lhs_); + // Right side is an Id + } else if (!rhs_.isVariable()) { + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), + &TransitivePathFallback::computeTransitivePath, this, + &idTable, subRes->idTable(), rhs_, lhs_); + // No side is a bound variable, the right side is an unbound variable + // and the left side is either an unbound Variable or an ID. + } else { + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), + &TransitivePathFallback::computeTransitivePath, this, + &idTable, subRes->idTable(), lhs_, rhs_); + } + + // NOTE: The only place, where the input to a transitive path operation is not + // an index scan (which has an empty local vocabulary by default) is the + // `LocalVocabTest`. But it doesn't harm to propagate the local vocab here + // either. + return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; +} + +// _____________________________________________________________________________ +TransitivePathFallback::Map TransitivePathFallback::transitiveHull( + const Map& edges, const std::vector& startNodes, + std::optional target) const { + using MapIt = Map::const_iterator; + // For every node do a dfs on the graph + Map hull{allocator()}; + + // Stores nodes we already have a path to. This avoids cycles. + ad_utility::HashSetWithMemoryLimit marks{ + getExecutionContext()->getAllocator()}; + + // The stack used to store the dfs' progress + std::vector positions; + + // Used to store all edges leading away from a node for every level. + // Reduces access to the hashmap, and is safe as the map will not + // be modified after this point. + std::vector edgeCache; + + for (Id currentStartNode : startNodes) { + if (hull.contains(currentStartNode)) { + // We have already computed the hull for this node + continue; + } + + // Reset for this iteration + marks.clear(); + + MapIt rootEdges = edges.find(currentStartNode); + if (rootEdges != edges.end()) { + positions.push_back(rootEdges->second.begin()); + edgeCache.push_back(&rootEdges->second); + } + if (minDist_ == 0 && + (!target.has_value() || currentStartNode == target.value())) { + insertIntoMap(hull, currentStartNode, currentStartNode); + } + + // While we have not found the entire transitive hull and have not reached + // the max step limit + while (!positions.empty()) { + checkCancellation(); + size_t stackIndex = positions.size() - 1; + // Process the next child of the node at the top of the stack + Set::const_iterator& pos = positions[stackIndex]; + const Set* nodeEdges = edgeCache.back(); + + if (pos == nodeEdges->end()) { + // We finished processing this node + positions.pop_back(); + edgeCache.pop_back(); + continue; + } + + Id child = *pos; + ++pos; + size_t childDepth = positions.size(); + if (childDepth <= maxDist_ && marks.count(child) == 0) { + // process the child + if (childDepth >= minDist_) { + marks.insert(child); + if (!target.has_value() || child == target.value()) { + insertIntoMap(hull, currentStartNode, child); + } + } + // Add the child to the stack + MapIt it = edges.find(child); + if (it != edges.end()) { + positions.push_back(it->second.begin()); + edgeCache.push_back(&it->second); + } + } + } + } + return hull; +} + +// _____________________________________________________________________________ +template +void TransitivePathFallback::fillTableWithHull( + IdTableStatic& table, const Map& hull, std::vector& nodes, + size_t startSideCol, size_t targetSideCol, const IdTable& startSideTable, + size_t skipCol) { + IdTableView startView = + startSideTable.asStaticView(); + + size_t rowIndex = 0; + for (size_t i = 0; i < nodes.size(); i++) { + Id node = nodes[i]; + auto it = hull.find(node); + if (it == hull.end()) { + continue; + } + + for (Id otherNode : it->second) { + table.emplace_back(); + table(rowIndex, startSideCol) = node; + table(rowIndex, targetSideCol) = otherNode; + + TransitivePathFallback::copyColumns( + startView, table, i, rowIndex, skipCol); + + rowIndex++; + } + } +} + +// _____________________________________________________________________________ +template +void TransitivePathFallback::fillTableWithHull(IdTableStatic& table, + const Map& hull, + size_t startSideCol, + size_t targetSideCol) { + size_t rowIndex = 0; + for (auto const& [node, linkedNodes] : hull) { + for (Id linkedNode : linkedNodes) { + table.emplace_back(); + table(rowIndex, startSideCol) = node; + table(rowIndex, targetSideCol) = linkedNode; + + rowIndex++; + } + } +} + +// _____________________________________________________________________________ +template +std::pair> +TransitivePathFallback::setupMapAndNodes(const IdTable& sub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, + const IdTable& startSideTable) const { + std::vector nodes; + Map edges = setupEdgesMap(sub, startSide, targetSide); + + // Bound -> var|id + std::span startNodes = setupNodes( + startSideTable, startSide.treeAndCol_.value().second); + nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); + + return {std::move(edges), std::move(nodes)}; +} + +// _____________________________________________________________________________ +template +std::pair> +TransitivePathFallback::setupMapAndNodes( + const IdTable& sub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + std::vector nodes; + Map edges = setupEdgesMap(sub, startSide, targetSide); + + // id -> var|id + if (!startSide.isVariable()) { + nodes.push_back(std::get(startSide.value_)); + // var -> var + } else { + std::span startNodes = + setupNodes(sub, startSide.subCol_); + nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); + if (minDist_ == 0) { + std::span targetNodes = + setupNodes(sub, targetSide.subCol_); + nodes.insert(nodes.end(), targetNodes.begin(), targetNodes.end()); + } + } + + return {std::move(edges), std::move(nodes)}; +} + +// _____________________________________________________________________________ +template +TransitivePathFallback::Map TransitivePathFallback::setupEdgesMap( + const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + const IdTableView sub = dynSub.asStaticView(); + Map edges{allocator()}; + decltype(auto) startCol = sub.getColumn(startSide.subCol_); + decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); + + for (size_t i = 0; i < sub.size(); i++) { + checkCancellation(); + insertIntoMap(edges, startCol[i], targetCol[i]); + } + return edges; +} + +// _____________________________________________________________________________ +template +std::span TransitivePathFallback::setupNodes(const IdTable& table, + size_t col) { + return table.getColumn(col); +} + +// _____________________________________________________________________________ +template +void TransitivePathFallback::copyColumns( + const IdTableView& inputTable, + IdTableStatic& outputTable, size_t inputRow, size_t outputRow, + size_t skipCol) { + size_t inCol = 0; + size_t outCol = 2; + while (inCol < inputTable.numColumns() && outCol < outputTable.numColumns()) { + if (skipCol == inCol) { + inCol++; + continue; + } + + outputTable(outputRow, outCol) = inputTable(inputRow, inCol); + inCol++; + outCol++; + } +} + +// _____________________________________________________________________________ +void TransitivePathFallback::insertIntoMap(Map& map, Id key, Id value) const { + auto [it, success] = map.try_emplace(key, allocator()); + it->second.insert(value); +} diff --git a/src/engine/TransitivePathFallback.h b/src/engine/TransitivePathFallback.h new file mode 100644 index 0000000000..b2a4429e6d --- /dev/null +++ b/src/engine/TransitivePathFallback.h @@ -0,0 +1,201 @@ +// Copyright 2019, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) + +#pragma once + +#include +#include + +#include "TransitivePathBase.h" +#include "engine/Operation.h" +#include "engine/QueryExecutionTree.h" +#include "engine/idTable/IdTable.h" + +class TransitivePathFallback : public TransitivePathBase { + // We deliberately use the `std::` variants of a hash set and hash map because + // `absl`s types are not exception safe. + constexpr static auto hash = [](Id id) { + return std::hash{}(id.getBits()); + }; + using Set = std::unordered_set, + ad_utility::AllocatorWithLimit>; + using Map = std::unordered_map< + Id, Set, decltype(hash), std::equal_to, + ad_utility::AllocatorWithLimit>>; + + public: + TransitivePathFallback(QueryExecutionContext* qec, + std::shared_ptr child, + TransitivePathSide leftSide, + TransitivePathSide rightSide, size_t minDist, + size_t maxDist); + + /** + * @brief Compute the transitive hull with a bound side. + * This function is called when the startSide is bound and + * it is a variable. The other IdTable contains the result + * of the start side and will be used to get the start nodes. + * + * @tparam RES_WIDTH Number of columns of the result table + * @tparam SUB_WIDTH Number of columns of the sub table + * @tparam SIDE_WIDTH Number of columns of the + * @param res The result table which will be filled in-place + * @param sub The IdTable for the sub result + * @param startSide The start side for the transitive hull + * @param targetSide The target side for the transitive hull + * @param startSideTable The IdTable of the startSide + */ + + template + void computeTransitivePathBound(IdTable* res, const IdTable& sub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, + const IdTable& startSideTable) const; + + /** + * @brief Compute the transitive hull. + * This function is called when no side is bound (or an id). + * + * @tparam RES_WIDTH Number of columns of the result table + * @tparam SUB_WIDTH Number of columns of the sub table + * @param res The result table which will be filled in-place + * @param sub The IdTable for the sub result + * @param startSide The start side for the transitive hull + * @param targetSide The target side for the transitive hull + */ + + template + void computeTransitivePath(IdTable* res, const IdTable& sub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const; + + // void computeTransitivePath( + // IdTable* res, const IdTable& sub, const TransitivePathSide& startSide, + // const TransitivePathSide& targetSide) const override; + + private: + /** + * @brief Compute the result for this TransitivePath operation + * This function chooses the start and target side for the transitive + * hull computation. This choice of the start side has a large impact + * on the time it takes to compute the hull. The set of nodes on the + * start side should be as small as possible. + * + * @return ResultTable The result of the TransitivePath operation + */ + ResultTable computeResult() override; + + /** + * @brief Compute the transitive hull starting at the given nodes, + * using the given Map. + * + * @param edges Adjacency lists, mapping Ids (nodes) to their connected + * Ids. + * @param nodes A list of Ids. These Ids are used as starting points for the + * transitive hull. Thus, this parameter guides the performance of this + * algorithm. + * @param target Optional target Id. If supplied, only paths which end + * in this Id are added to the hull. + * @return Map Maps each Id to its connected Ids in the transitive hull + */ + Map transitiveHull(const Map& edges, const std::vector& startNodes, + std::optional target) const; + + /** + * @brief Fill the given table with the transitive hull and use the + * startSideTable to fill in the rest of the columns. + * This function is called if the start side is bound and a variable. + * + * @tparam WIDTH The number of columns of the result table. + * @tparam START_WIDTH The number of columns of the start table. + * @param table The result table which will be filled. + * @param hull The transitive hull. + * @param nodes The start nodes of the transitive hull. These need to be in + * the same order and amount as the starting side nodes in the startTable. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull + * @param startSideTable An IdTable that holds other results. The other + * results will be transferred to the new result table. + * @param skipCol This column contains the Ids of the start side in the + * startSideTable and will be skipped. + */ + template + static void fillTableWithHull(IdTableStatic& table, const Map& hull, + std::vector& nodes, size_t startSideCol, + size_t targetSideCol, + const IdTable& startSideTable, size_t skipCol); + + /** + * @brief Fill the given table with the transitive hull. + * This function is called if the sides are unbound or ids. + * + * @tparam WIDTH The number of columns of the result table. + * @param table The result table which will be filled. + * @param hull The transitive hull. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull + */ + template + static void fillTableWithHull(IdTableStatic& table, const Map& hull, + size_t startSideCol, size_t targetSideCol); + + /** + * @brief Prepare a Map and a nodes vector for the transitive hull + * computation. + * + * @tparam SUB_WIDTH Number of columns of the sub table + * @tparam SIDE_WIDTH Number of columns of the startSideTable + * @param sub The sub table result + * @param startSide The TransitivePathSide where the edges start + * @param targetSide The TransitivePathSide where the edges end + * @param startSideTable An IdTable containing the Ids for the startSide + * @return std::pair> A Map and Id vector (nodes) for the + * transitive hull computation + */ + template + std::pair> setupMapAndNodes( + const IdTable& sub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, + const IdTable& startSideTable) const; + + /** + * @brief Prepare a Map and a nodes vector for the transitive hull + * computation. + * + * @tparam SUB_WIDTH Number of columns of the sub table + * @param sub The sub table result + * @param startSide The TransitivePathSide where the edges start + * @param targetSide The TransitivePathSide where the edges end + * @return std::pair> A Map and Id vector (nodes) for the + * transitive hull computation + */ + template + std::pair> setupMapAndNodes( + const IdTable& sub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const; + + // initialize the map from the subresult + template + Map setupEdgesMap(const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const; + + // initialize a vector for the starting nodes (Ids) + template + static std::span setupNodes(const IdTable& table, size_t col); + + // Copy the columns from the input table to the output table + template + static void copyColumns(const IdTableView& inputTable, + IdTableStatic& outputTable, + size_t inputRow, size_t outputRow, size_t skipCol); + + // A small helper function: Insert the `value` to the set at `map[key]`. + // As the sets all have an allocator with memory limit, this construction is a + // little bit more involved, so this can be a separate helper function. + void insertIntoMap(Map& map, Id key, Id value) const; +}; diff --git a/src/engine/TransitivePathGraphblas.cpp b/src/engine/TransitivePathGraphblas.cpp new file mode 100644 index 0000000000..d3f421d8e2 --- /dev/null +++ b/src/engine/TransitivePathGraphblas.cpp @@ -0,0 +1,364 @@ +// Copyright 2019, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) + +#include "TransitivePathGraphblas.h" + +#include +#include +#include + +#include "engine/CallFixedSize.h" +#include "engine/TransitivePathBase.h" +#include "util/Exception.h" +#include "util/Timer.h" + +// _____________________________________________________________________________ +TransitivePathGraphblas::TransitivePathGraphblas( + QueryExecutionContext* qec, std::shared_ptr child, + TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, + size_t maxDist) + : TransitivePathBase(qec, child, leftSide, rightSide, minDist, maxDist) {} + +// _____________________________________________________________________________ +template +void TransitivePathGraphblas::computeTransitivePathBound( + IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, const IdTable& startSideTable) const { + IdTableStatic res = std::move(*dynRes).toStatic(); + + const IdTableView sub = dynSub.asStaticView(); + decltype(auto) startCol = sub.getColumn(startSide.subCol_); + decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); + + auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); + timer.start(); + + GrbMatrix::initialize(); + auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); + + std::span startNodes = + startSideTable.getColumn(startSide.treeAndCol_->second); + GrbMatrix startNodeMatrix = + setupStartNodeMatrix(startNodes, graph.numRows(), mapping); + + timer.stop(); + auto initTime = timer.msecs(); + timer.start(); + + auto hull = transitiveHull(graph, std::move(startNodeMatrix)); + if (!targetSide.isVariable()) { + Id target = std::get(targetSide.value_); + size_t targetIndex = mapping.getIndex(target); + hull = getTargetRow(hull, targetIndex); + } + + timer.stop(); + auto hullTime = timer.msecs(); + timer.start(); + + TransitivePathGraphblas::fillTableWithHull( + res, hull, mapping, startSideTable, startNodes, startSide.outputCol_, + targetSide.outputCol_, startSide.treeAndCol_.value().second); + + timer.stop(); + auto fillTime = timer.msecs(); + + LOG(DEBUG) << "GraphBLAS Timing measurements:" << std::endl; + LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; + LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; + LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; + + *dynRes = std::move(res).toDynamic(); +} + +// _____________________________________________________________________________ +template +void TransitivePathGraphblas::computeTransitivePath( + IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + IdTableStatic res = std::move(*dynRes).toStatic(); + + const IdTableView sub = dynSub.asStaticView(); + decltype(auto) startCol = sub.getColumn(startSide.subCol_); + decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); + + auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); + timer.start(); + + GrbMatrix::initialize(); + auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); + + timer.stop(); + auto initTime = timer.msecs(); + timer.start(); + + GrbMatrix hull; + if (!startSide.isVariable()) { + std::vector startNode{std::get(startSide.value_)}; + GrbMatrix startMatrix = + setupStartNodeMatrix(startNode, graph.numRows(), mapping); + hull = transitiveHull(graph, std::move(startMatrix)); + } else { + hull = transitiveHull(graph, std::nullopt); + } + + timer.stop(); + auto hullTime = timer.msecs(); + timer.start(); + + if (!targetSide.isVariable()) { + Id target = std::get(targetSide.value_); + size_t targetIndex = mapping.getIndex(target); + hull = getTargetRow(hull, targetIndex); + } + + if (!startSide.isVariable()) { + std::vector startNode{std::get(startSide.value_)}; + TransitivePathGraphblas::fillTableWithHull( + res, hull, mapping, startNode, startSide.outputCol_, + targetSide.outputCol_); + } else { + TransitivePathGraphblas::fillTableWithHull( + res, hull, mapping, startSide.outputCol_, targetSide.outputCol_); + } + + timer.stop(); + auto fillTime = timer.msecs(); + + LOG(DEBUG) << "GraphBLAS Timing measurements:" << std::endl; + LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; + LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; + LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; + + *dynRes = std::move(res).toDynamic(); +} + +// _____________________________________________________________________________ +ResultTable TransitivePathGraphblas::computeResult() { + if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() && + rhs_.isVariable()) { + AD_THROW( + "This query might have to evalute the empty path, which is currently " + "not supported"); + } + shared_ptr subRes = subtree_->getResult(); + + IdTable idTable{allocator()}; + + idTable.setNumColumns(getResultWidth()); + + size_t subWidth = subRes->idTable().numColumns(); + + auto computeForOneSide = [this, &idTable, subRes, subWidth]( + auto& boundSide, + auto& otherSide) -> ResultTable { + shared_ptr sideRes = + boundSide.treeAndCol_.value().first->getResult(); + size_t sideWidth = sideRes->idTable().numColumns(); + + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), + &TransitivePathGraphblas::computeTransitivePathBound, this, + &idTable, subRes->idTable(), boundSide, otherSide, + sideRes->idTable()); + + return {std::move(idTable), resultSortedOn(), + ResultTable::getSharedLocalVocabFromNonEmptyOf(*sideRes, *subRes)}; + }; + + if (lhs_.isBoundVariable()) { + return computeForOneSide(lhs_, rhs_); + } else if (rhs_.isBoundVariable()) { + return computeForOneSide(rhs_, lhs_); + // Right side is an Id + } else if (!rhs_.isVariable()) { + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), + &TransitivePathGraphblas::computeTransitivePath, this, + &idTable, subRes->idTable(), rhs_, lhs_); + // No side is a bound variable, the right side is an unbound variable + // and the left side is either an unbound Variable or an ID. + } else { + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), + &TransitivePathGraphblas::computeTransitivePath, this, + &idTable, subRes->idTable(), lhs_, rhs_); + } + + // NOTE: The only place, where the input to a transitive path operation is not + // an index scan (which has an empty local vocabulary by default) is the + // `LocalVocabTest`. But it doesn't harm to propagate the local vocab here + // either. + return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; +} + +// _____________________________________________________________________________ +GrbMatrix TransitivePathGraphblas::transitiveHull( + const GrbMatrix& graph, std::optional startNodes) const { + size_t pathLength = 0; + GrbMatrix result; + + if (startNodes) { + result = std::move(startNodes.value()); + } else { + result = GrbMatrix::diag(graph.numRows()); + } + + if (minDist_ > 0) { + result = result.multiply(graph); + pathLength++; + } + + size_t previousNvals = 0; + size_t nvals = result.numNonZero(); + while (nvals > previousNvals && pathLength < maxDist_) { + previousNvals = result.numNonZero(); + // TODO: Check effect of matrix orientation (Row major, Column major) on + // performance. + result.accumulateMultiply(graph); + checkCancellation(); + nvals = result.numNonZero(); + pathLength++; + } + return result; +} + +// _____________________________________________________________________________ +template +void TransitivePathGraphblas::fillTableWithHull(IdTableStatic& table, + const GrbMatrix& hull, + const IdMapping& mapping, + size_t startSideCol, + size_t targetSideCol) { + auto [rowIndices, colIndices] = hull.extractTuples(); + + for (size_t i = 0; i < rowIndices.size(); i++) { + table.emplace_back(); + auto startIndex = rowIndices[i]; + auto targetIndex = colIndices[i]; + Id startId = mapping.getId(startIndex); + Id targetId = mapping.getId(targetIndex); + table(i, startSideCol) = startId; + table(i, targetSideCol) = targetId; + } +} + +// _____________________________________________________________________________ +template +void TransitivePathGraphblas::fillTableWithHull(IdTableStatic& table, + const GrbMatrix& hull, + const IdMapping& mapping, + std::span startNodes, + size_t startSideCol, + size_t targetSideCol) { + size_t resultRowIndex = 0; + size_t rowIndex = 0; + + for (auto startNode : startNodes) { + std::vector indices = hull.extractRow(rowIndex); + for (size_t index : indices) { + Id targetNode = mapping.getId(index); + table.emplace_back(); + table(resultRowIndex, startSideCol) = startNode; + table(resultRowIndex, targetSideCol) = targetNode; + resultRowIndex++; + } + rowIndex++; + } +} + +// _____________________________________________________________________________ +template +void TransitivePathGraphblas::fillTableWithHull( + IdTableStatic& table, const GrbMatrix& hull, + const IdMapping& mapping, const IdTable& startSideTable, + std::span startNodes, size_t startSideCol, size_t targetSideCol, + size_t skipCol) { + IdTableView startView = + startSideTable.asStaticView(); + + size_t resultRowIndex = 0; + size_t rowIndex = 0; + for (auto startNode : startNodes) { + std::vector indices = hull.extractRow(rowIndex); + for (size_t index : indices) { + Id targetNode = mapping.getId(index); + table.emplace_back(); + table(resultRowIndex, startSideCol) = startNode; + table(resultRowIndex, targetSideCol) = targetNode; + + TransitivePathGraphblas::copyColumns( + startView, table, rowIndex, resultRowIndex, skipCol); + resultRowIndex++; + } + rowIndex++; + } +} + +// _____________________________________________________________________________ +GrbMatrix TransitivePathGraphblas::getTargetRow(GrbMatrix& hull, + size_t targetIndex) const { + GrbMatrix transformer = GrbMatrix(hull.numCols(), hull.numCols()); + transformer.setElement(targetIndex, targetIndex, true); + return hull.multiply(transformer); +} + +// _____________________________________________________________________________ +std::tuple TransitivePathGraphblas::setupMatrix( + std::span startCol, std::span targetCol, + size_t numRows) const { + std::vector rowIndices; + std::vector colIndices; + IdMapping mapping; + + for (size_t i = 0; i < numRows; i++) { + auto startId = startCol[i]; + auto targetId = targetCol[i]; + auto startIndex = mapping.addId(startId); + auto targetIndex = mapping.addId(targetId); + + rowIndices.push_back(startIndex); + colIndices.push_back(targetIndex); + } + + auto matrix = + GrbMatrix::build(rowIndices, colIndices, mapping.size(), mapping.size()); + return {std::move(matrix), std::move(mapping)}; +} + +// _____________________________________________________________________________ +GrbMatrix TransitivePathGraphblas::setupStartNodeMatrix( + std::span startIds, size_t numCols, IdMapping mapping) const { + // stardIds.size() is the maximum possible number of columns for the + // startMatrix, but if some start node does not have a link in the graph it + // will be skipped, resulting in a zero column at the end of the startMatrix + GrbMatrix startMatrix = GrbMatrix(startIds.size(), numCols); + size_t rowIndex = 0; + for (Id id : startIds) { + if (!mapping.contains(id)) { + continue; + } + size_t colIndex = mapping.getIndex(id); + startMatrix.setElement(rowIndex, colIndex, true); + rowIndex++; + } + return startMatrix; +} + +// _____________________________________________________________________________ +template +void TransitivePathGraphblas::copyColumns( + const IdTableView& inputTable, + IdTableStatic& outputTable, size_t inputRow, size_t outputRow, + size_t skipCol) { + size_t inCol = 0; + size_t outCol = 2; + while (inCol < inputTable.numColumns() && outCol < outputTable.numColumns()) { + if (skipCol == inCol) { + inCol++; + continue; + } + + outputTable(outputRow, outCol) = inputTable(inputRow, inCol); + inCol++; + outCol++; + } +} diff --git a/src/engine/TransitivePathGraphblas.h b/src/engine/TransitivePathGraphblas.h new file mode 100644 index 0000000000..5bfba6b30f --- /dev/null +++ b/src/engine/TransitivePathGraphblas.h @@ -0,0 +1,215 @@ +// Copyright 2019, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) + +#pragma once + +#include + +#include "TransitivePathBase.h" +#include "engine/GrbMatrix.h" +#include "engine/Operation.h" +#include "engine/QueryExecutionTree.h" +#include "engine/idTable/IdTable.h" +#include "util/HashMap.h" + +// This struct keeps track of the mapping between Ids and matrix indices +struct IdMapping { + bool contains(Id id) { return idMap_.contains(id); } + + size_t addId(Id id) { + if (!idMap_.contains(id)) { + indexMap_.push_back(id); + } + idMap_.try_emplace(id, indexMap_.size() - 1); + return idMap_[id]; + } + + Id getId(size_t index) const { return indexMap_.at(index); } + + size_t getIndex(Id id) const { return idMap_.at(id); } + + size_t size() const { return indexMap_.size(); } + + private: + ad_utility::HashMap idMap_; + + std::vector indexMap_; +}; + +class TransitivePathGraphblas : public TransitivePathBase { + public: + TransitivePathGraphblas(QueryExecutionContext* qec, + std::shared_ptr child, + TransitivePathSide leftSide, + TransitivePathSide rightSide, size_t minDist, + size_t maxDist); + + /** + * @brief Compute the transitive hull with a bound side. + * This function is called when the startSide is bound and + * it is a variable. The other IdTable contains the result + * of the start side and will be used to get the start nodes. + * + * @tparam RES_WIDTH Number of columns of the result table + * @tparam SUB_WIDTH Number of columns of the sub table + * @tparam SIDE_WIDTH Number of columns of the + * @param res The result table which will be filled in-place + * @param sub The IdTable for the sub result + * @param startSide The start side for the transitive hull + * @param targetSide The target side for the transitive hull + * @param startSideTable The IdTable of the startSide + */ + template + void computeTransitivePathBound(IdTable* res, const IdTable& sub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, + const IdTable& startSideTable) const; + + /** + * @brief Compute the transitive hull. + * This function is called when no side is bound (or an id). + * + * @tparam RES_WIDTH Number of columns of the result table + * @tparam SUB_WIDTH Number of columns of the sub table + * @param res The result table which will be filled in-place + * @param sub The IdTable for the sub result + * @param startSide The start side for the transitive hull + * @param targetSide The target side for the transitive hull + */ + template + void computeTransitivePath(IdTable* res, const IdTable& sub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const; + + // void computeTransitivePath( + // IdTable* res, const IdTable& sub, const TransitivePathSide& startSide, + // const TransitivePathSide& targetSide) const override; + + private: + /** + * @brief Compute the result for this TransitivePath operation + * This function chooses the start and target side for the transitive + * hull computation. This choice of the start side has a large impact + * on the time it takes to compute the hull. The set of nodes on the + * start side should be as small as possible. + * + * @return ResultTable The result of the TransitivePath operation + */ + ResultTable computeResult() override; + + /** + * @brief Compute the transitive hull of the graph. If given startNodes, + * compute the transitive hull starting at the startNodes. + * + * @param graph Boolean, square, sparse, adjacency matrix. Row i, column j is + * true, iff. there is an edge going from i to j in the graph. + * @param startNodes Boolean, sparse, adjacency matrix, marking the start + * nodes. There is one row for each start node. The number of columns has to + * be equal to the number of columns of the graph matrix. + * @return An adjacency matrix containing the transitive hull + */ + GrbMatrix transitiveHull(const GrbMatrix& graph, + std::optional startNodes) const; + + /** + * @brief Fill the IdTable with the given transitive hull. + * + * @tparam WIDTH The number of columns of the result table. + * @param table The result table which will be filled. + * @param hull The transitive hull. Represented by a sparse, boolean adjacency + * matrix + * @param mapping IdMapping, which maps Ids to matrix indices and vice versa. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull + */ + template + static void fillTableWithHull(IdTableStatic& table, + const GrbMatrix& hull, const IdMapping& mapping, + size_t startSideCol, size_t targetSideCol); + + /** + * @brief Fill the IdTable with the given transitive hull. This function is + * used in case the hull computation has one (or more) Ids as start nodes. + * + * @tparam WIDTH The number of columns of the result table. + * @param table The result table which will be filled. + * @param hull The transitive hull. Represented by a sparse, boolean adjacency + * matrix + * @param mapping IdMapping, which maps Ids to matrix indices and vice versa. + * @param startNodes Ids of the start nodes. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull + */ + template + static void fillTableWithHull(IdTableStatic& table, + const GrbMatrix& hull, const IdMapping& mapping, + std::span startNodes, + size_t startSideCol, size_t targetSideCol); + + /** + * @brief Fill the IdTable with the given transitive hull. This function is + * used if the start side was already bound and there is an IdTable from which + * data has to be copied to the result table. + * + * @tparam WIDTH The number of columns of the result table. + * @tparam START_WIDTH The number of columns of the start table. + * @param table The result table which will be filled. + * @param hull The transitive hull. Represented by a sparse, boolean adjacency + * matrix + * @param mapping IdMapping, which maps Ids to matrix indices and vice versa. + * @param startNodes Ids of the start nodes. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull + * @param skipCol This column contains the Ids of the start side in the + * startSideTable and will be skipped. + */ + template + static void fillTableWithHull(IdTableStatic& table, + const GrbMatrix& hull, const IdMapping& mapping, + const IdTable& startSideTable, + std::span startNodes, + size_t startSideCol, size_t targetSideCol, + size_t skipCol); + + GrbMatrix getTargetRow(GrbMatrix& hull, size_t targetIndex) const; + + /** + * @brief Create a boolean, sparse adjacency matrix from the given edges. The + * edges are given as lists, where one list contains the start node of the + * edge and the other list contains the target node of the edge. + * Also create an IdMapping, which maps the given Ids to matrix indices. + * + * @param startCol Column from the IdTable, which contains edge start nodes + * @param targetCol Column from the IdTable, which contains edge target nodes + * @param numRows Number of rows in the IdTable + */ + std::tuple setupMatrix(std::span startCol, + std::span targetCol, + size_t numRows) const; + + /** + * @brief Create a boolean, sparse, adjacency matrix which holds the starting + * nodes for the transitive hull computation. + * + * @param startIds List of Ids where the transitive hull computation should + * start + * @param numRows Number of rows in the IdTable where startIds comes from + * @param mapping An IdMapping between Ids and matrix indices + * @return Matrix with one row for each start node + */ + GrbMatrix setupStartNodeMatrix(std::span startIds, size_t numRows, + IdMapping mapping) const; + + // Copy the columns from the input table to the output table + template + static void copyColumns(const IdTableView& inputTable, + IdTableStatic& outputTable, + size_t inputRow, size_t outputRow, size_t skipCol); +}; diff --git a/test/LocalVocabTest.cpp b/test/LocalVocabTest.cpp index ab8cac143f..19f0039af6 100644 --- a/test/LocalVocabTest.cpp +++ b/test/LocalVocabTest.cpp @@ -23,7 +23,7 @@ #include "engine/QueryExecutionTree.h" #include "engine/ResultTable.h" #include "engine/Sort.h" -#include "engine/TransitivePath.h" +#include "engine/TransitivePathBase.h" #include "engine/Union.h" #include "engine/Values.h" #include "engine/sparqlExpressions/GroupConcatExpression.h" @@ -293,8 +293,9 @@ TEST(LocalVocab, propagation) { // local-vocabulary. Still, it doesn't harm to test this. TransitivePathSide left(std::nullopt, 0, Variable{"?x"}); TransitivePathSide right(std::nullopt, 1, Variable{"?y"}); - TransitivePath transitivePath(testQec, qet(values1), left, right, 1, 1); - checkLocalVocab(transitivePath, std::vector{"x", "y1", "y2"}); + auto transitivePath = TransitivePathBase::makeTransitivePath( + testQec, qet(values1), left, right, 1, 1); + checkLocalVocab(*transitivePath, std::vector{"x", "y1", "y2"}); // PATTERN TRICK operations. HasPredicateScan hasPredicateScan(testQec, qet(values1), 0, Variable{"?z"}); diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index fee3fed1fa..0c1a46e38a 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -18,7 +18,7 @@ #include "engine/Sort.h" #include "engine/TextIndexScanForEntity.h" #include "engine/TextIndexScanForWord.h" -#include "engine/TransitivePath.h" +#include "engine/TransitivePathBase.h" #include "gmock/gmock-matchers.h" #include "gmock/gmock.h" #include "parser/SparqlParser.h" @@ -148,7 +148,8 @@ inline auto CountAvailablePredicates = [](size_t subjectColumnIdx, const Variable& predicateVar, const Variable& countVar, const std::same_as auto&... childMatchers) - requires(sizeof...(childMatchers) <= 1) { + requires(sizeof...(childMatchers) <= 1) +{ return RootOperation<::CountAvailablePredicates>(AllOf( AD_PROPERTY(::CountAvailablePredicates, subjectColumnIndex, Eq(subjectColumnIdx)), @@ -225,14 +226,15 @@ inline auto TransitivePathSideMatcher = [](TransitivePathSide side) { inline auto TransitivePath = [](TransitivePathSide left, TransitivePathSide right, size_t minDist, size_t maxDist, const std::same_as auto&... childMatchers) { - return RootOperation<::TransitivePath>(AllOf( - Property("getChildren", &Operation::getChildren, - ElementsAre(Pointee(childMatchers)...)), - AD_PROPERTY(TransitivePath, getMinDist, Eq(minDist)), - AD_PROPERTY(TransitivePath, getMaxDist, Eq(maxDist)), - AD_PROPERTY(TransitivePath, getLeft, TransitivePathSideMatcher(left)), - AD_PROPERTY(TransitivePath, getRight, - TransitivePathSideMatcher(right)))); + return RootOperation<::TransitivePathBase>( + AllOf(Property("getChildren", &Operation::getChildren, + ElementsAre(Pointee(childMatchers)...)), + AD_PROPERTY(TransitivePathBase, getMinDist, Eq(minDist)), + AD_PROPERTY(TransitivePathBase, getMaxDist, Eq(maxDist)), + AD_PROPERTY(TransitivePathBase, getLeft, + TransitivePathSideMatcher(left)), + AD_PROPERTY(TransitivePathBase, getRight, + TransitivePathSideMatcher(right)))); }; /// Parse the given SPARQL `query`, pass it to a `QueryPlanner` with empty diff --git a/test/TransitivePathTest.cpp b/test/TransitivePathTest.cpp index e1a631c110..ced9ab1379 100644 --- a/test/TransitivePathTest.cpp +++ b/test/TransitivePathTest.cpp @@ -7,7 +7,7 @@ #include "./IndexTestHelpers.h" #include "./util/AllocatorTestHelpers.h" #include "./util/IdTestHelpers.h" -#include "engine/TransitivePath.h" +#include "engine/TransitivePathBase.h" using ad_utility::testing::getQec; using ad_utility::testing::makeAllocator; @@ -50,10 +50,11 @@ TEST(TransitivePathTest, idToId) { TransitivePathSide left(std::nullopt, 0, V(0), 0); TransitivePathSide right(std::nullopt, 1, V(3), 1); - TransitivePath T(getQec(), nullptr, left, right, 1, - std::numeric_limits::max()); + auto T = TransitivePathBase::makeTransitivePath( + getQec(), nullptr, left, right, 1, std::numeric_limits::max()); - T.computeTransitivePath<2, 2>(&result, sub, left, right); + // T->computeTransitivePath<2, 2>(&result, sub, left, right); + // T->computeTransitivePath(&result, sub, left, right); assertSameUnorderedContent(expected, result); } @@ -73,10 +74,11 @@ TEST(TransitivePathTest, idToVar) { TransitivePathSide left(std::nullopt, 0, V(0), 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - TransitivePath T(getQec(), nullptr, left, right, 1, - std::numeric_limits::max()); + auto T = TransitivePathBase::makeTransitivePath( + getQec(), nullptr, left, right, 1, std::numeric_limits::max()); - T.computeTransitivePath<2, 2>(&result, sub, left, right); + // T.computeTransitivePath<2, 2>(&result, sub, left, right); + // T->computeTransitivePath(&result, sub, left, right); assertSameUnorderedContent(expected, result); } @@ -99,10 +101,10 @@ TEST(TransitivePathTest, varTovar) { TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - TransitivePath T(getQec(), nullptr, right, left, 1, - std::numeric_limits::max()); + auto T = TransitivePathBase::makeTransitivePath( + getQec(), nullptr, right, left, 1, std::numeric_limits::max()); - T.computeTransitivePath<2, 2>(&result, sub, left, right); + // T->computeTransitivePath(&result, sub, left, right); assertSameUnorderedContent(expected, result); } @@ -141,10 +143,10 @@ TEST(TransitivePathTest, unlimitedMaxLength) { TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - TransitivePath T(getQec(), nullptr, left, right, 1, - std::numeric_limits::max()); + auto T = TransitivePathBase::makeTransitivePath( + getQec(), nullptr, left, right, 1, std::numeric_limits::max()); - T.computeTransitivePath<2, 2>(&result, sub, left, right); + // T->computeTransitivePath(&result, sub, left, right); assertSameUnorderedContent(expected, result); } @@ -179,8 +181,10 @@ TEST(TransitivePathTest, maxLength2) { TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - TransitivePath T(getQec(), nullptr, left, right, 1, 2); - T.computeTransitivePath<2, 2>(&result, sub, left, right); + auto T = TransitivePathBase::makeTransitivePath(getQec(), nullptr, left, + right, 1, 2); + // T.computeTransitivePath<2, 2>(&result, sub, left, right); + // T->computeTransitivePath(&result, sub, left, right); assertSameUnorderedContent(expected, result); result.clear(); @@ -191,7 +195,8 @@ TEST(TransitivePathTest, maxLength2) { left.value_ = V(7); right.value_ = Variable{"?target"}; - T.computeTransitivePath<2, 2>(&result, sub, left, right); + // T.computeTransitivePath<2, 2>(&result, sub, left, right); + // T->computeTransitivePath(&result, sub, left, right); assertSameUnorderedContent(expected, result); result.clear(); @@ -201,6 +206,7 @@ TEST(TransitivePathTest, maxLength2) { left.value_ = Variable{"?start"}; right.value_ = V(2); - T.computeTransitivePath<2, 2>(&result, sub, right, left); + // T.computeTransitivePath<2, 2>(&result, sub, right, left); + // T->computeTransitivePath(&result, sub, right, left); assertSameUnorderedContent(expected, result); } From 60a37b604b6c822023b0d9a773707358dcfda830 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Thu, 15 Feb 2024 12:45:53 +0100 Subject: [PATCH 28/92] Fixed timing conversion --- src/engine/TransitivePathGraphblas.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/engine/TransitivePathGraphblas.cpp b/src/engine/TransitivePathGraphblas.cpp index d3f421d8e2..7684cd7e74 100644 --- a/src/engine/TransitivePathGraphblas.cpp +++ b/src/engine/TransitivePathGraphblas.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include "engine/CallFixedSize.h" @@ -65,9 +66,12 @@ void TransitivePathGraphblas::computeTransitivePathBound( auto fillTime = timer.msecs(); LOG(DEBUG) << "GraphBLAS Timing measurements:" << std::endl; - LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; - LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; - LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; + LOG(DEBUG) << "Initialization time: " << std::to_string(initTime.count()) + << "ms" << std::endl; + LOG(DEBUG) << "Hull computation time: " << std::to_string(hullTime.count()) + << "ms" << std::endl; + LOG(DEBUG) << "IdTable fill time: " << std::to_string(fillTime.count()) + << "ms" << std::endl; *dynRes = std::move(res).toDynamic(); } @@ -127,9 +131,12 @@ void TransitivePathGraphblas::computeTransitivePath( auto fillTime = timer.msecs(); LOG(DEBUG) << "GraphBLAS Timing measurements:" << std::endl; - LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; - LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; - LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; + LOG(DEBUG) << "Initialization time: " << std::to_string(initTime.count()) + << "ms" << std::endl; + LOG(DEBUG) << "Hull computation time: " << std::to_string(hullTime.count()) + << "ms" << std::endl; + LOG(DEBUG) << "IdTable fill time: " << std::to_string(fillTime.count()) + << "ms" << std::endl; *dynRes = std::move(res).toDynamic(); } From d07401e39aaddb67d44b910c0e74274e7f689f44 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Thu, 15 Feb 2024 13:13:38 +0100 Subject: [PATCH 29/92] Added singleton class for GraphBLAS global context --- src/engine/GrbGlobalContext.h | 29 +++++++++++++++ src/engine/GrbMatrix.cpp | 21 ----------- src/engine/GrbMatrix.h | 5 --- src/engine/TransitivePathGraphblas.cpp | 5 +-- test/GrbMatrixTest.cpp | 49 +++++++------------------- 5 files changed, 45 insertions(+), 64 deletions(-) create mode 100644 src/engine/GrbGlobalContext.h diff --git a/src/engine/GrbGlobalContext.h b/src/engine/GrbGlobalContext.h new file mode 100644 index 0000000000..c063d2a4fc --- /dev/null +++ b/src/engine/GrbGlobalContext.h @@ -0,0 +1,29 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.roland.herrmann@mars.uni-freiburg.de) + +#include + +/** + * @class GrbGlobalContext + * @brief This Singleton class is based on the design by Scott Meyers. The basic + * idea is that the singleton object exists in a 'magic' state within the + * getContext() function. This is threadsafe. + * + * Reference: + * https://laristra.github.io/flecsi/src/developer-guide/patterns/meyers_singleton.html + * + */ +class GrbGlobalContext { + GrbGlobalContext() { GrB_init(GrB_NONBLOCKING); } + ~GrbGlobalContext() { GrB_finalize(); } + + public: + static GrbGlobalContext& getContext() { + static GrbGlobalContext context; + return context; + } + + GrbGlobalContext(const GrbGlobalContext&) = delete; + GrbGlobalContext& operator=(const GrbGlobalContext&) = delete; +}; diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp index ae6735a3aa..1555e0430a 100644 --- a/src/engine/GrbMatrix.cpp +++ b/src/engine/GrbMatrix.cpp @@ -244,24 +244,3 @@ void GrbMatrix::handleError(GrB_Info info) { } AD_FAIL(); } - -bool GrbMatrix::isInitialized_ = []() { - GrB_init(GrB_NONBLOCKING); - return true; -}(); - -// _____________________________________________________________________________ -void GrbMatrix::initialize() { - if (!GrbMatrix::isInitialized_) { - GrB_init(GrB_NONBLOCKING); - GrbMatrix::isInitialized_ = true; - } -} - -// _____________________________________________________________________________ -void GrbMatrix::finalize() { - if (GrbMatrix::isInitialized_) { - GrB_finalize(); - GrbMatrix::isInitialized_ = false; - } -} diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h index 13505b4162..4a463105b0 100644 --- a/src/engine/GrbMatrix.h +++ b/src/engine/GrbMatrix.h @@ -24,7 +24,6 @@ class GrbMatrix { using MatrixPtr = std::unique_ptr; MatrixPtr matrix_ = std::unique_ptr(new GrB_Matrix()); - static bool isInitialized_; public: /** @@ -168,10 +167,6 @@ class GrbMatrix { */ GrbMatrix multiply(const GrbMatrix& otherMatrix) const; - // TODO: Move to singleton class - static void initialize(); - static void finalize(); - /** * @brief Get a reference to the internal matrix. * diff --git a/src/engine/TransitivePathGraphblas.cpp b/src/engine/TransitivePathGraphblas.cpp index 7684cd7e74..99f188fed6 100644 --- a/src/engine/TransitivePathGraphblas.cpp +++ b/src/engine/TransitivePathGraphblas.cpp @@ -10,6 +10,7 @@ #include #include "engine/CallFixedSize.h" +#include "engine/GrbGlobalContext.h" #include "engine/TransitivePathBase.h" #include "util/Exception.h" #include "util/Timer.h" @@ -35,7 +36,7 @@ void TransitivePathGraphblas::computeTransitivePathBound( auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); timer.start(); - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); std::span startNodes = @@ -90,7 +91,7 @@ void TransitivePathGraphblas::computeTransitivePath( auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); timer.start(); - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); timer.stop(); diff --git a/test/GrbMatrixTest.cpp b/test/GrbMatrixTest.cpp index 88caaad84c..585652fd6e 100644 --- a/test/GrbMatrixTest.cpp +++ b/test/GrbMatrixTest.cpp @@ -4,6 +4,7 @@ #include +#include "engine/GrbGlobalContext.h" #include "engine/GrbMatrix.h" #include "gmock/gmock.h" @@ -23,17 +24,15 @@ void checkMatrix(GrbMatrix& matrix, size_t numRows, size_t numCols, } TEST(GrbMatrixTest, constructor) { - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); GrbMatrix matrix = GrbMatrix(2, 3); checkMatrix(matrix, 2, 3, 0, {}); - - GrbMatrix::finalize(); } TEST(GrbMatrixTest, clone) { - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); GrbMatrix matrix1 = GrbMatrix(2, 2); matrix1.setElement(0, 0, true); @@ -44,24 +43,20 @@ TEST(GrbMatrixTest, clone) { checkMatrix(matrix2, 2, 2, 1, {{0, 0, true}, {0, 1, false}, {1, 0, false}, {1, 1, false}}); - - GrbMatrix::finalize(); } TEST(GrbMatrixTest, getSetElement) { - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); GrbMatrix matrix = GrbMatrix(3, 3); matrix.setElement(1, 0, true); matrix.setElement(0, 2, true); checkMatrix(matrix, 3, 3, 2, {{1, 0, true}, {0, 2, true}}); - - GrbMatrix::finalize(); } TEST(GrbMatrixTest, build) { - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); std::vector rowIndices{0, 0, 1}; std::vector colIndices{1, 2, 2}; @@ -69,22 +64,18 @@ TEST(GrbMatrixTest, build) { GrbMatrix matrix = GrbMatrix::build(rowIndices, colIndices, 3, 3); checkMatrix(matrix, 3, 3, 3, {{0, 1, true}, {0, 2, true}, {1, 2, true}}); - - GrbMatrix::finalize(); } TEST(GrbMatrixTest, diag) { - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); auto matrix = GrbMatrix::diag(3); checkMatrix(matrix, 3, 3, 3, {{0, 0, true}, {1, 1, true}, {2, 2, true}}); - - GrbMatrix::finalize(); } TEST(GrbMatrixTest, extractTuples) { - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); GrbMatrix matrix = GrbMatrix(3, 3); @@ -94,8 +85,6 @@ TEST(GrbMatrixTest, extractTuples) { auto [rowIndices, colIndices] = matrix.extractTuples(); - GrbMatrix::finalize(); - std::vector expectedRowIndices{0, 0, 1}; std::vector expectedColIndices{1, 2, 2}; auto expected = {expectedRowIndices, expectedColIndices}; @@ -105,7 +94,7 @@ TEST(GrbMatrixTest, extractTuples) { } TEST(GrbMatrixTest, extractColumn) { - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); GrbMatrix matrix = GrbMatrix(3, 3); @@ -114,15 +103,13 @@ TEST(GrbMatrixTest, extractColumn) { std::vector colIndices = matrix.extractColumn(1); - GrbMatrix::finalize(); - std::vector expected{0, 2}; EXPECT_THAT(colIndices, testing::UnorderedElementsAreArray(expected)); } TEST(GrbMatrixTest, extractRow) { - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); GrbMatrix matrix = GrbMatrix(3, 3); @@ -131,15 +118,13 @@ TEST(GrbMatrixTest, extractRow) { std::vector rowIndices = matrix.extractRow(1); - GrbMatrix::finalize(); - std::vector expected{0, 2}; EXPECT_THAT(rowIndices, testing::UnorderedElementsAreArray(expected)); } TEST(GrbMatrixTest, multiplySquareMatrices) { - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); GrbMatrix matrix1 = GrbMatrix(2, 2); matrix1.setElement(0, 0, true); @@ -152,12 +137,10 @@ TEST(GrbMatrixTest, multiplySquareMatrices) { GrbMatrix matrix3 = matrix1.multiply(matrix2); checkMatrix(matrix3, 2, 2, 2, {{0, 0, true}, {1, 0, true}}); - - GrbMatrix::finalize(); } TEST(GrbMatrixTest, multiplyShapedMatrices) { - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); GrbMatrix matrix1 = GrbMatrix(2, 3); matrix1.setElement(0, 0, true); @@ -171,12 +154,10 @@ TEST(GrbMatrixTest, multiplyShapedMatrices) { GrbMatrix matrix3 = matrix1.multiply(matrix2); checkMatrix(matrix3, 2, 2, 2, {{0, 0, true}, {1, 0, true}}); - - GrbMatrix::finalize(); } TEST(GrbMatrixTest, transpose) { - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); auto matrix = GrbMatrix(2, 3); @@ -187,12 +168,10 @@ TEST(GrbMatrixTest, transpose) { GrbMatrix result = matrix.transpose(); checkMatrix(result, 3, 2, 3, {{0, 0, true}, {1, 0, true}, {2, 0, true}}); - - GrbMatrix::finalize(); } TEST(GrbMatrixTest, accumulateMultiply) { - GrbMatrix::initialize(); + GrbGlobalContext::getContext(); GrbMatrix matrix1 = GrbMatrix(2, 2); matrix1.setElement(0, 0, true); @@ -206,6 +185,4 @@ TEST(GrbMatrixTest, accumulateMultiply) { checkMatrix(matrix1, 2, 2, 4, {{0, 0, true}, {0, 1, true}, {1, 0, true}, {1, 1, true}}); - - GrbMatrix::finalize(); } From 1f424de8557619b007c6863d6322cafafa516466 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Thu, 15 Feb 2024 13:46:15 +0100 Subject: [PATCH 30/92] Added some checkCancellation --- src/engine/TransitivePathGraphblas.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/engine/TransitivePathGraphblas.cpp b/src/engine/TransitivePathGraphblas.cpp index 99f188fed6..b96cf9ded6 100644 --- a/src/engine/TransitivePathGraphblas.cpp +++ b/src/engine/TransitivePathGraphblas.cpp @@ -39,6 +39,8 @@ void TransitivePathGraphblas::computeTransitivePathBound( GrbGlobalContext::getContext(); auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); + checkCancellation(); + std::span startNodes = startSideTable.getColumn(startSide.treeAndCol_->second); GrbMatrix startNodeMatrix = @@ -325,6 +327,7 @@ std::tuple TransitivePathGraphblas::setupMatrix( rowIndices.push_back(startIndex); colIndices.push_back(targetIndex); + checkCancellation(); } auto matrix = @@ -347,6 +350,7 @@ GrbMatrix TransitivePathGraphblas::setupStartNodeMatrix( size_t colIndex = mapping.getIndex(id); startMatrix.setElement(rowIndex, colIndex, true); rowIndex++; + checkCancellation(); } return startMatrix; } From d64f4b67f40af50cbc46955b6f0fb99f0bde21e3 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 20 Feb 2024 12:59:56 +0100 Subject: [PATCH 31/92] Build fix --- src/engine/GrbGlobalContext.h | 2 ++ src/engine/GrbMatrix.cpp | 2 ++ src/engine/TransitivePathFallback.cpp | 19 +++++++++++++------ test/GrbMatrixTest.cpp | 2 +- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/engine/GrbGlobalContext.h b/src/engine/GrbGlobalContext.h index c063d2a4fc..b979c7b117 100644 --- a/src/engine/GrbGlobalContext.h +++ b/src/engine/GrbGlobalContext.h @@ -2,7 +2,9 @@ // Chair of Algorithms and Data Structures. // Author: Johannes Herrmann (johannes.roland.herrmann@mars.uni-freiburg.de) +extern "C" { #include +} /** * @class GrbGlobalContext diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp index 1555e0430a..119df4afcc 100644 --- a/src/engine/GrbMatrix.cpp +++ b/src/engine/GrbMatrix.cpp @@ -4,7 +4,9 @@ #include "GrbMatrix.h" +extern "C" { #include +} #include #include diff --git a/src/engine/TransitivePathFallback.cpp b/src/engine/TransitivePathFallback.cpp index d9220b57b7..d68654867d 100644 --- a/src/engine/TransitivePathFallback.cpp +++ b/src/engine/TransitivePathFallback.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include "engine/CallFixedSize.h" @@ -56,9 +57,12 @@ void TransitivePathFallback::computeTransitivePathBound( auto fillTime = timer.msecs(); LOG(DEBUG) << "Fallback Timing measurements:" << std::endl; - LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; - LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; - LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; + LOG(DEBUG) << "Initialization time: " << std::to_string(initTime.count()) + << "ms" << std::endl; + LOG(DEBUG) << "Hull computation time: " << std::to_string(hullTime.count()) + << "ms" << std::endl; + LOG(DEBUG) << "IdTable fill time: " << std::to_string(fillTime.count()) + << "ms" << std::endl; *dynRes = std::move(res).toDynamic(); } @@ -98,9 +102,12 @@ void TransitivePathFallback::computeTransitivePath( auto fillTime = timer.msecs(); LOG(DEBUG) << "Fallback Timing measurements:" << std::endl; - LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; - LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; - LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; + LOG(DEBUG) << "Initialization time: " << std::to_string(initTime.count()) + << "ms" << std::endl; + LOG(DEBUG) << "Hull computation time: " << std::to_string(hullTime.count()) + << "ms" << std::endl; + LOG(DEBUG) << "IdTable fill time: " << std::to_string(fillTime.count()) + << "ms" << std::endl; *dynRes = std::move(res).toDynamic(); } diff --git a/test/GrbMatrixTest.cpp b/test/GrbMatrixTest.cpp index 585652fd6e..b3eba03f44 100644 --- a/test/GrbMatrixTest.cpp +++ b/test/GrbMatrixTest.cpp @@ -2,11 +2,11 @@ // Chair of Algorithms and Data Structures. // Author: Johannes Herrmann (johannes.roland.herrmann@mars.uni-freiburg.de) +#include #include #include "engine/GrbGlobalContext.h" #include "engine/GrbMatrix.h" -#include "gmock/gmock.h" // This helper function checks all important proprties of a matrix. // One matrix consists of row index, column index and value in this order. From 816ba9a3d6d53ba74da99f87e5e7d36741a43f2f Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 20 Feb 2024 13:44:15 +0100 Subject: [PATCH 32/92] Removed dead code --- src/engine/TransitivePath.cpp | 899 ---------------------------------- src/engine/TransitivePath.h | 501 ------------------- 2 files changed, 1400 deletions(-) delete mode 100644 src/engine/TransitivePath.cpp delete mode 100644 src/engine/TransitivePath.h diff --git a/src/engine/TransitivePath.cpp b/src/engine/TransitivePath.cpp deleted file mode 100644 index ec25ba78d2..0000000000 --- a/src/engine/TransitivePath.cpp +++ /dev/null @@ -1,899 +0,0 @@ -// Copyright 2019, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) - -#include "TransitivePath.h" - -#include -#include -#include -#include -#include - -#include "engine/CallFixedSize.h" -#include "engine/ExportQueryExecutionTrees.h" -#include "engine/IndexScan.h" -#include "util/Exception.h" -#include "util/Timer.h" - -// _____________________________________________________________________________ -TransitivePath::TransitivePath(QueryExecutionContext* qec, - std::shared_ptr child, - TransitivePathSide leftSide, - TransitivePathSide rightSide, size_t minDist, - size_t maxDist) - : Operation(qec), - subtree_(child - ? QueryExecutionTree::createSortedTree(std::move(child), {0}) - : nullptr), - lhs_(std::move(leftSide)), - rhs_(std::move(rightSide)), - minDist_(minDist), - maxDist_(maxDist) { - AD_CORRECTNESS_CHECK(qec != nullptr); - if (lhs_.isVariable()) { - variableColumns_[std::get(lhs_.value_)] = - makeAlwaysDefinedColumn(0); - } - if (rhs_.isVariable()) { - variableColumns_[std::get(rhs_.value_)] = - makeAlwaysDefinedColumn(1); - } - - lhs_.outputCol_ = 0; - rhs_.outputCol_ = 1; -} - -// _____________________________________________________________________________ -std::string TransitivePath::getCacheKeyImpl() const { - std::ostringstream os; - os << " minDist " << minDist_ << " maxDist " << maxDist_ << "\n"; - - os << "Left side:\n"; - os << lhs_.getCacheKey(); - - os << "Right side:\n"; - os << rhs_.getCacheKey(); - - AD_CORRECTNESS_CHECK(subtree_); - os << "Subtree:\n" << subtree_->getCacheKey() << '\n'; - - return std::move(os).str(); -} - -// _____________________________________________________________________________ -std::string TransitivePath::getDescriptor() const { - std::ostringstream os; - os << "TransitivePath "; - // If not full transitive hull, show interval as [min, max]. - if (minDist_ > 1 || maxDist_ < std::numeric_limits::max()) { - os << "[" << minDist_ << ", " << maxDist_ << "] "; - } - auto getName = [this](ValueId id) { - auto optStringAndType = - ExportQueryExecutionTrees::idToStringAndType(getIndex(), id, {}); - if (optStringAndType.has_value()) { - return optStringAndType.value().first; - } else { - return absl::StrCat("#", id.getBits()); - } - }; - // Left variable or entity name. - if (lhs_.isVariable()) { - os << std::get(lhs_.value_).name(); - } else { - os << getName(std::get(lhs_.value_)); - } - // The predicate. - auto scanOperation = - std::dynamic_pointer_cast(subtree_->getRootOperation()); - if (scanOperation != nullptr) { - os << " " << scanOperation->getPredicate() << " "; - } else { - // Escaped the question marks to avoid a warning about ignored trigraphs. - os << R"( )"; - } - // Right variable or entity name. - if (rhs_.isVariable()) { - os << std::get(rhs_.value_).name(); - } else { - os << getName(std::get(rhs_.value_)); - } - return std::move(os).str(); -} - -// _____________________________________________________________________________ -size_t TransitivePath::getResultWidth() const { return resultWidth_; } - -// _____________________________________________________________________________ -vector TransitivePath::resultSortedOn() const { - if (lhs_.isSortedOnInputCol()) { - return {0}; - } - if (rhs_.isSortedOnInputCol()) { - return {1}; - } - - return {}; -} - -// _____________________________________________________________________________ -VariableToColumnMap TransitivePath::computeVariableToColumnMap() const { - return variableColumns_; -} - -// _____________________________________________________________________________ -void TransitivePath::setTextLimit(size_t limit) { - for (auto child : getChildren()) { - child->setTextLimit(limit); - } -} - -// _____________________________________________________________________________ -bool TransitivePath::knownEmptyResult() { return subtree_->knownEmptyResult(); } - -// _____________________________________________________________________________ -float TransitivePath::getMultiplicity(size_t col) { - (void)col; - // The multiplicities are not known. - return 1; -} - -// _____________________________________________________________________________ -uint64_t TransitivePath::getSizeEstimateBeforeLimit() { - if (std::holds_alternative(lhs_.value_) || - std::holds_alternative(rhs_.value_)) { - // If the subject or object is fixed, assume that the number of matching - // triples is 1000. This will usually be an overestimate, but it will do the - // job of avoiding query plans that first generate large intermediate - // results and only then merge them with a triple such as this. In the - // lhs_.isVar && rhs_.isVar case below, we assume a worst-case blowup of - // 10000; see the comment there. - return 1000; - } - if (lhs_.treeAndCol_.has_value()) { - return lhs_.treeAndCol_.value().first->getSizeEstimate(); - } - if (rhs_.treeAndCol_.has_value()) { - return rhs_.treeAndCol_.value().first->getSizeEstimate(); - } - // Set costs to something very large, so that we never compute the complete - // transitive hull (unless the variables on both sides are not bound in any - // other way, so that the only possible query plan is to compute the complete - // transitive hull). - // - // NOTE: _subtree->getSizeEstimateBeforeLimit() is the number of triples of - // the predicate, for which the transitive hull operator (+) is specified. On - // Wikidata, the predicate with the largest blowup when taking the - // transitive hull is wdt:P2789 (connects with). The blowup is then from 90K - // (without +) to 110M (with +), so about 1000 times larger. - AD_CORRECTNESS_CHECK(lhs_.isVariable() && rhs_.isVariable()); - return subtree_->getSizeEstimate() * 10000; -} - -// _____________________________________________________________________________ -size_t TransitivePath::getCostEstimate() { - // We assume that the cost of computing the transitive path is proportional to - // the result size. - auto costEstimate = getSizeEstimateBeforeLimit(); - // Add the cost for the index scan of the predicate involved. - for (auto* ptr : getChildren()) { - if (ptr) { - costEstimate += ptr->getCostEstimate(); - } - } - return costEstimate; -} - -// _____________________________________________________________________________ -template -void TransitivePath::computeTransitivePathBound( - IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, const IdTable& startSideTable) const { - IdTableStatic res = std::move(*dynRes).toStatic(); - - const IdTableView sub = dynSub.asStaticView(); - decltype(auto) startCol = sub.getColumn(startSide.subCol_); - decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); - - auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); - timer.start(); - - GrbMatrix::initialize(); - auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); - - std::span startNodes = - startSideTable.getColumn(startSide.treeAndCol_->second); - GrbMatrix startNodeMatrix = - setupStartNodeMatrix(startNodes, graph.numRows(), mapping); - - timer.stop(); - auto initTime = timer.msecs(); - timer.start(); - - auto hull = transitiveHull(graph, std::move(startNodeMatrix)); - if (!targetSide.isVariable()) { - Id target = std::get(targetSide.value_); - size_t targetIndex = mapping.getIndex(target); - hull = getTargetRow(hull, targetIndex); - } - - timer.stop(); - auto hullTime = timer.msecs(); - timer.start(); - - TransitivePath::fillTableWithHull( - res, hull, mapping, startSideTable, startNodes, startSide.outputCol_, - targetSide.outputCol_, startSide.treeAndCol_.value().second); - - timer.stop(); - auto fillTime = timer.msecs(); - - LOG(DEBUG) << "GraphBLAS Timing measurements:" << std::endl; - LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; - LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; - LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; - - *dynRes = std::move(res).toDynamic(); -} - -// _____________________________________________________________________________ -template -void TransitivePath::computeTransitivePathBoundFallback( - IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, const IdTable& startSideTable) const { - IdTableStatic res = std::move(*dynRes).toStatic(); - - auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); - timer.start(); - - auto [edges, nodes] = setupMapAndNodes( - dynSub, startSide, targetSide, startSideTable); - - timer.stop(); - auto initTime = timer.msecs(); - timer.start(); - - Map hull(allocator()); - if (!targetSide.isVariable()) { - hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); - } else { - hull = transitiveHull(edges, nodes, std::nullopt); - } - - timer.stop(); - auto hullTime = timer.msecs(); - timer.start(); - - TransitivePath::fillTableWithHull( - res, hull, nodes, startSide.outputCol_, targetSide.outputCol_, - startSideTable, startSide.treeAndCol_.value().second); - - timer.stop(); - auto fillTime = timer.msecs(); - - LOG(DEBUG) << "Fallback Timing measurements:" << std::endl; - LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; - LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; - LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; - - *dynRes = std::move(res).toDynamic(); -} - -// _____________________________________________________________________________ -template -void TransitivePath::computeTransitivePath( - IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const { - IdTableStatic res = std::move(*dynRes).toStatic(); - - const IdTableView sub = dynSub.asStaticView(); - decltype(auto) startCol = sub.getColumn(startSide.subCol_); - decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); - - auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); - timer.start(); - - GrbMatrix::initialize(); - auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); - - timer.stop(); - auto initTime = timer.msecs(); - timer.start(); - - GrbMatrix hull; - if (!startSide.isVariable()) { - std::vector startNode{std::get(startSide.value_)}; - GrbMatrix startMatrix = - setupStartNodeMatrix(startNode, graph.numRows(), mapping); - hull = transitiveHull(graph, std::move(startMatrix)); - } else { - hull = transitiveHull(graph, std::nullopt); - } - - timer.stop(); - auto hullTime = timer.msecs(); - timer.start(); - - if (!targetSide.isVariable()) { - Id target = std::get(targetSide.value_); - size_t targetIndex = mapping.getIndex(target); - hull = getTargetRow(hull, targetIndex); - } - - if (!startSide.isVariable()) { - std::vector startNode{std::get(startSide.value_)}; - TransitivePath::fillTableWithHull(res, hull, mapping, startNode, - startSide.outputCol_, - targetSide.outputCol_); - } else { - TransitivePath::fillTableWithHull( - res, hull, mapping, startSide.outputCol_, targetSide.outputCol_); - } - - timer.stop(); - auto fillTime = timer.msecs(); - - LOG(DEBUG) << "GraphBLAS Timing measurements:" << std::endl; - LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; - LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; - LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; - - *dynRes = std::move(res).toDynamic(); -} - -// _____________________________________________________________________________ -template -void TransitivePath::computeTransitivePathFallback( - IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const { - IdTableStatic res = std::move(*dynRes).toStatic(); - - auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); - timer.start(); - - auto [edges, nodes] = - setupMapAndNodes(dynSub, startSide, targetSide); - - timer.stop(); - auto initTime = timer.msecs(); - timer.start(); - - Map hull{allocator()}; - if (!targetSide.isVariable()) { - hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); - } else { - hull = transitiveHull(edges, nodes, std::nullopt); - } - - timer.stop(); - auto hullTime = timer.msecs(); - timer.start(); - - TransitivePath::fillTableWithHull(res, hull, startSide.outputCol_, - targetSide.outputCol_); - - timer.stop(); - auto fillTime = timer.msecs(); - - LOG(DEBUG) << "Fallback Timing measurements:" << std::endl; - LOG(DEBUG) << "Initialization time: " << initTime << "ms" << std::endl; - LOG(DEBUG) << "Hull computation time: " << hullTime << "ms" << std::endl; - LOG(DEBUG) << "IdTable fill time: " << fillTime << "ms" << std::endl; - - *dynRes = std::move(res).toDynamic(); -} - -// _____________________________________________________________________________ -ResultTable TransitivePath::computeResult() { - if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() && - rhs_.isVariable()) { - AD_THROW( - "This query might have to evalute the empty path, which is currently " - "not supported"); - } - shared_ptr subRes = subtree_->getResult(); - - IdTable idTable{allocator()}; - - idTable.setNumColumns(getResultWidth()); - - size_t subWidth = subRes->idTable().numColumns(); - - bool useGraphblas = !RuntimeParameters().get<"use-graphblas">(); - - auto computeForOneSide = [this, &idTable, subRes, subWidth, useGraphblas]( - auto& boundSide, - auto& otherSide) -> ResultTable { - shared_ptr sideRes = - boundSide.treeAndCol_.value().first->getResult(); - size_t sideWidth = sideRes->idTable().numColumns(); - - if (useGraphblas) { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), - &TransitivePath::computeTransitivePathBound, this, - &idTable, subRes->idTable(), boundSide, otherSide, - sideRes->idTable()); - } else { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), - &TransitivePath::computeTransitivePathBoundFallback, this, - &idTable, subRes->idTable(), boundSide, otherSide, - sideRes->idTable()); - } - - return {std::move(idTable), resultSortedOn(), - ResultTable::getSharedLocalVocabFromNonEmptyOf(*sideRes, *subRes)}; - }; - - if (lhs_.isBoundVariable()) { - return computeForOneSide(lhs_, rhs_); - } else if (rhs_.isBoundVariable()) { - return computeForOneSide(rhs_, lhs_); - // Right side is an Id - } else if (!rhs_.isVariable()) { - if (useGraphblas) { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePath::computeTransitivePath, this, &idTable, - subRes->idTable(), rhs_, lhs_); - } else { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePath::computeTransitivePathFallback, this, - &idTable, subRes->idTable(), rhs_, lhs_); - } - // No side is a bound variable, the right side is an unbound variable - // and the left side is either an unbound Variable or an ID. - } else { - if (useGraphblas) { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePath::computeTransitivePath, this, &idTable, - subRes->idTable(), lhs_, rhs_); - } else { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePath::computeTransitivePathFallback, this, - &idTable, subRes->idTable(), lhs_, rhs_); - } - } - - // NOTE: The only place, where the input to a transitive path operation is not - // an index scan (which has an empty local vocabulary by default) is the - // `LocalVocabTest`. But it doesn't harm to propagate the local vocab here - // either. - return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; -} - -// _____________________________________________________________________________ -std::shared_ptr TransitivePath::bindLeftSide( - std::shared_ptr leftop, size_t inputCol) const { - return bindLeftOrRightSide(std::move(leftop), inputCol, true); -} - -// _____________________________________________________________________________ -std::shared_ptr TransitivePath::bindRightSide( - std::shared_ptr rightop, size_t inputCol) const { - return bindLeftOrRightSide(std::move(rightop), inputCol, false); -} - -// _____________________________________________________________________________ -std::shared_ptr TransitivePath::bindLeftOrRightSide( - std::shared_ptr leftOrRightOp, size_t inputCol, - bool isLeft) const { - // Enforce required sorting of `leftOrRightOp`. - leftOrRightOp = QueryExecutionTree::createSortedTree(std::move(leftOrRightOp), - {inputCol}); - // Create a copy of this. - // - // NOTE: The RHS used to be `std::make_shared()`, which is - // wrong because it first calls the copy constructor of the base class - // `Operation`, which would then ignore the changes in `variableColumnMap_` - // made below (see `Operation::getInternallyVisibleVariableColumns` and - // `Operation::getExternallyVariableColumns`). - std::shared_ptr p = std::make_shared( - getExecutionContext(), subtree_, lhs_, rhs_, minDist_, maxDist_); - if (isLeft) { - p->lhs_.treeAndCol_ = {leftOrRightOp, inputCol}; - } else { - p->rhs_.treeAndCol_ = {leftOrRightOp, inputCol}; - } - - // Note: The `variable` in the following structured binding is `const`, even - // if we bind by value. We deliberately make one unnecessary copy of the - // `variable` to keep the code simpler. - for (auto [variable, columnIndexWithType] : - leftOrRightOp->getVariableColumns()) { - ColumnIndex columnIndex = columnIndexWithType.columnIndex_; - if (columnIndex == inputCol) { - continue; - } - - columnIndexWithType.columnIndex_ += columnIndex > inputCol ? 1 : 2; - - p->variableColumns_[variable] = columnIndexWithType; - // p->resultWidth_++; - } - p->resultWidth_ += leftOrRightOp->getResultWidth() - 1; - return p; -} - -// _____________________________________________________________________________ -bool TransitivePath::isBoundOrId() const { - return lhs_.isBoundVariable() || rhs_.isBoundVariable() || - !lhs_.isVariable() || !rhs_.isVariable(); -} - -// _____________________________________________________________________________ -GrbMatrix TransitivePath::transitiveHull( - const GrbMatrix& graph, std::optional startNodes) const { - size_t pathLength = 0; - GrbMatrix result; - - if (startNodes) { - result = std::move(startNodes.value()); - } else { - result = GrbMatrix::diag(graph.numRows()); - } - - if (minDist_ > 0) { - result = result.multiply(graph); - pathLength++; - } - - size_t previousNvals = 0; - size_t nvals = result.numNonZero(); - while (nvals > previousNvals && pathLength < maxDist_) { - previousNvals = result.numNonZero(); - // TODO: Check effect of matrix orientation (Row major, Column major) on - // performance. - result.accumulateMultiply(graph); - checkCancellation(); - nvals = result.numNonZero(); - pathLength++; - } - return result; -} - -// _____________________________________________________________________________ -TransitivePath::Map TransitivePath::transitiveHull( - const Map& edges, const std::vector& startNodes, - std::optional target) const { - using MapIt = Map::const_iterator; - // For every node do a dfs on the graph - Map hull{allocator()}; - - // Stores nodes we already have a path to. This avoids cycles. - ad_utility::HashSetWithMemoryLimit marks{ - getExecutionContext()->getAllocator()}; - - // The stack used to store the dfs' progress - std::vector positions; - - // Used to store all edges leading away from a node for every level. - // Reduces access to the hashmap, and is safe as the map will not - // be modified after this point. - std::vector edgeCache; - - for (Id currentStartNode : startNodes) { - if (hull.contains(currentStartNode)) { - // We have already computed the hull for this node - continue; - } - - // Reset for this iteration - marks.clear(); - - MapIt rootEdges = edges.find(currentStartNode); - if (rootEdges != edges.end()) { - positions.push_back(rootEdges->second.begin()); - edgeCache.push_back(&rootEdges->second); - } - if (minDist_ == 0 && - (!target.has_value() || currentStartNode == target.value())) { - insertIntoMap(hull, currentStartNode, currentStartNode); - } - - // While we have not found the entire transitive hull and have not reached - // the max step limit - while (!positions.empty()) { - checkCancellation(); - size_t stackIndex = positions.size() - 1; - // Process the next child of the node at the top of the stack - Set::const_iterator& pos = positions[stackIndex]; - const Set* nodeEdges = edgeCache.back(); - - if (pos == nodeEdges->end()) { - // We finished processing this node - positions.pop_back(); - edgeCache.pop_back(); - continue; - } - - Id child = *pos; - ++pos; - size_t childDepth = positions.size(); - if (childDepth <= maxDist_ && marks.count(child) == 0) { - // process the child - if (childDepth >= minDist_) { - marks.insert(child); - if (!target.has_value() || child == target.value()) { - insertIntoMap(hull, currentStartNode, child); - } - } - // Add the child to the stack - MapIt it = edges.find(child); - if (it != edges.end()) { - positions.push_back(it->second.begin()); - edgeCache.push_back(&it->second); - } - } - } - } - return hull; -} - -// _____________________________________________________________________________ -template -void TransitivePath::fillTableWithHull(IdTableStatic& table, - const GrbMatrix& hull, - const IdMapping& mapping, - size_t startSideCol, - size_t targetSideCol) { - auto [rowIndices, colIndices] = hull.extractTuples(); - - for (size_t i = 0; i < rowIndices.size(); i++) { - table.emplace_back(); - auto startIndex = rowIndices[i]; - auto targetIndex = colIndices[i]; - Id startId = mapping.getId(startIndex); - Id targetId = mapping.getId(targetIndex); - table(i, startSideCol) = startId; - table(i, targetSideCol) = targetId; - } -} - -// _____________________________________________________________________________ -template -void TransitivePath::fillTableWithHull(IdTableStatic& table, - const GrbMatrix& hull, - const IdMapping& mapping, - std::span startNodes, - size_t startSideCol, - size_t targetSideCol) { - size_t resultRowIndex = 0; - size_t rowIndex = 0; - - for (auto startNode : startNodes) { - std::vector indices = hull.extractRow(rowIndex); - for (size_t index : indices) { - Id targetNode = mapping.getId(index); - table.emplace_back(); - table(resultRowIndex, startSideCol) = startNode; - table(resultRowIndex, targetSideCol) = targetNode; - resultRowIndex++; - } - rowIndex++; - } -} - -// _____________________________________________________________________________ -template -void TransitivePath::fillTableWithHull(IdTableStatic& table, - const GrbMatrix& hull, - const IdMapping& mapping, - const IdTable& startSideTable, - std::span startNodes, - size_t startSideCol, - size_t targetSideCol, size_t skipCol) { - IdTableView startView = - startSideTable.asStaticView(); - - size_t resultRowIndex = 0; - size_t rowIndex = 0; - for (auto startNode : startNodes) { - std::vector indices = hull.extractRow(rowIndex); - for (size_t index : indices) { - Id targetNode = mapping.getId(index); - table.emplace_back(); - table(resultRowIndex, startSideCol) = startNode; - table(resultRowIndex, targetSideCol) = targetNode; - - TransitivePath::copyColumns( - startView, table, rowIndex, resultRowIndex, skipCol); - resultRowIndex++; - } - rowIndex++; - } -} - -// _____________________________________________________________________________ -template -void TransitivePath::fillTableWithHull(IdTableStatic& table, - const Map& hull, std::vector& nodes, - size_t startSideCol, - size_t targetSideCol, - const IdTable& startSideTable, - size_t skipCol) { - IdTableView startView = - startSideTable.asStaticView(); - - size_t rowIndex = 0; - for (size_t i = 0; i < nodes.size(); i++) { - Id node = nodes[i]; - auto it = hull.find(node); - if (it == hull.end()) { - continue; - } - - for (Id otherNode : it->second) { - table.emplace_back(); - table(rowIndex, startSideCol) = node; - table(rowIndex, targetSideCol) = otherNode; - - TransitivePath::copyColumns(startView, table, i, - rowIndex, skipCol); - - rowIndex++; - } - } -} - -// _____________________________________________________________________________ -template -void TransitivePath::fillTableWithHull(IdTableStatic& table, - const Map& hull, size_t startSideCol, - size_t targetSideCol) { - size_t rowIndex = 0; - for (auto const& [node, linkedNodes] : hull) { - for (Id linkedNode : linkedNodes) { - table.emplace_back(); - table(rowIndex, startSideCol) = node; - table(rowIndex, targetSideCol) = linkedNode; - - rowIndex++; - } - } -} - -// _____________________________________________________________________________ -template -std::pair> -TransitivePath::setupMapAndNodes(const IdTable& sub, - const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, - const IdTable& startSideTable) const { - std::vector nodes; - Map edges = setupEdgesMap(sub, startSide, targetSide); - - // Bound -> var|id - std::span startNodes = setupNodes( - startSideTable, startSide.treeAndCol_.value().second); - nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); - - return {std::move(edges), std::move(nodes)}; -} - -// _____________________________________________________________________________ -template -std::pair> -TransitivePath::setupMapAndNodes(const IdTable& sub, - const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const { - std::vector nodes; - Map edges = setupEdgesMap(sub, startSide, targetSide); - - // id -> var|id - if (!startSide.isVariable()) { - nodes.push_back(std::get(startSide.value_)); - // var -> var - } else { - std::span startNodes = - setupNodes(sub, startSide.subCol_); - nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); - if (minDist_ == 0) { - std::span targetNodes = - setupNodes(sub, targetSide.subCol_); - nodes.insert(nodes.end(), targetNodes.begin(), targetNodes.end()); - } - } - - return {std::move(edges), std::move(nodes)}; -} - -// _____________________________________________________________________________ -template -TransitivePath::Map TransitivePath::setupEdgesMap( - const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const { - const IdTableView sub = dynSub.asStaticView(); - Map edges{allocator()}; - decltype(auto) startCol = sub.getColumn(startSide.subCol_); - decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); - - for (size_t i = 0; i < sub.size(); i++) { - checkCancellation(); - insertIntoMap(edges, startCol[i], targetCol[i]); - } - return edges; -} - -// _____________________________________________________________________________ -template -std::span TransitivePath::setupNodes(const IdTable& table, - size_t col) { - return table.getColumn(col); -} - -// _____________________________________________________________________________ -GrbMatrix TransitivePath::getTargetRow(GrbMatrix& hull, - size_t targetIndex) const { - GrbMatrix transformer = GrbMatrix(hull.numCols(), hull.numCols()); - transformer.setElement(targetIndex, targetIndex, true); - return hull.multiply(transformer); -} - -// _____________________________________________________________________________ -std::tuple TransitivePath::setupMatrix( - std::span startCol, std::span targetCol, - size_t numRows) const { - std::vector rowIndices; - std::vector colIndices; - IdMapping mapping; - - for (size_t i = 0; i < numRows; i++) { - auto startId = startCol[i]; - auto targetId = targetCol[i]; - auto startIndex = mapping.addId(startId); - auto targetIndex = mapping.addId(targetId); - - rowIndices.push_back(startIndex); - colIndices.push_back(targetIndex); - } - - auto matrix = - GrbMatrix::build(rowIndices, colIndices, mapping.size(), mapping.size()); - return {std::move(matrix), std::move(mapping)}; -} - -// _____________________________________________________________________________ -GrbMatrix TransitivePath::setupStartNodeMatrix(std::span startIds, - size_t numCols, - IdMapping mapping) const { - // stardIds.size() is the maximum possible number of columns for the - // startMatrix, but if some start node does not have a link in the graph it - // will be skipped, resulting in a zero column at the end of the startMatrix - GrbMatrix startMatrix = GrbMatrix(startIds.size(), numCols); - size_t rowIndex = 0; - for (Id id : startIds) { - if (!mapping.contains(id)) { - continue; - } - size_t colIndex = mapping.getIndex(id); - startMatrix.setElement(rowIndex, colIndex, true); - rowIndex++; - } - return startMatrix; -} - -// _____________________________________________________________________________ -template -void TransitivePath::copyColumns(const IdTableView& inputTable, - IdTableStatic& outputTable, - size_t inputRow, size_t outputRow, - size_t skipCol) { - size_t inCol = 0; - size_t outCol = 2; - while (inCol < inputTable.numColumns() && outCol < outputTable.numColumns()) { - if (skipCol == inCol) { - inCol++; - continue; - } - - outputTable(outputRow, outCol) = inputTable(inputRow, inCol); - inCol++; - outCol++; - } -} - -// _____________________________________________________________________________ -void TransitivePath::insertIntoMap(Map& map, Id key, Id value) const { - auto [it, success] = map.try_emplace(key, allocator()); - it->second.insert(value); -} diff --git a/src/engine/TransitivePath.h b/src/engine/TransitivePath.h deleted file mode 100644 index 6c21aa40bd..0000000000 --- a/src/engine/TransitivePath.h +++ /dev/null @@ -1,501 +0,0 @@ -// Copyright 2019, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) - -#pragma once - -#include -#include - -#include "engine/GrbMatrix.h" -#include "engine/Operation.h" -#include "engine/QueryExecutionTree.h" -#include "engine/idTable/IdTable.h" -#include "util/HashMap.h" - -using TreeAndCol = std::pair, size_t>; -struct TransitivePathSide { - // treeAndCol contains the QueryExecutionTree of this side and the column - // where the Ids of this side are located. This member only has a value if - // this side was bound. - std::optional treeAndCol_; - // Column of the sub table where the Ids of this side are located - size_t subCol_; - std::variant value_; - // The column in the ouput table where this side Ids are written to. - // This member is set by the TransitivePath class - size_t outputCol_ = 0; - - bool isVariable() const { return std::holds_alternative(value_); }; - - bool isBoundVariable() const { return treeAndCol_.has_value(); }; - - std::string getCacheKey() const { - std::ostringstream os; - if (!isVariable()) { - os << "Id: " << std::get(value_); - } - - os << ", subColumn: " << subCol_ << "to " << outputCol_; - - if (treeAndCol_.has_value()) { - const auto& [tree, col] = treeAndCol_.value(); - os << ", Subtree:\n"; - os << tree->getCacheKey() << "with join column " << col << "\n"; - } - return std::move(os).str(); - } - - bool isSortedOnInputCol() const { - if (!treeAndCol_.has_value()) { - return false; - } - - auto [tree, col] = treeAndCol_.value(); - const std::vector& sortedOn = - tree->getRootOperation()->getResultSortedOn(); - // TODO use std::ranges::starts_with - return (!sortedOn.empty() && sortedOn[0] == col); - } -}; - -// This struct keeps track of the mapping between Ids and matrix indices -struct IdMapping { - bool contains(Id id) { return idMap_.contains(id); } - - size_t addId(Id id) { - if (!idMap_.contains(id)) { - indexMap_.push_back(id); - } - idMap_.try_emplace(id, indexMap_.size() - 1); - return idMap_[id]; - } - - Id getId(size_t index) const { return indexMap_.at(index); } - - size_t getIndex(Id id) const { return idMap_.at(id); } - - size_t size() const { return indexMap_.size(); } - - private: - ad_utility::HashMap idMap_; - - std::vector indexMap_; -}; - -class TransitivePath : public Operation { - // We deliberately use the `std::` variants of a hash set and hash map because - // `absl`s types are not exception safe. - constexpr static auto hash = [](Id id) { - return std::hash{}(id.getBits()); - }; - using Set = std::unordered_set, - ad_utility::AllocatorWithLimit>; - using Map = std::unordered_map< - Id, Set, decltype(hash), std::equal_to, - ad_utility::AllocatorWithLimit>>; - - std::shared_ptr subtree_; - TransitivePathSide lhs_; - TransitivePathSide rhs_; - size_t resultWidth_ = 2; - size_t minDist_; - size_t maxDist_; - VariableToColumnMap variableColumns_; - - public: - TransitivePath(QueryExecutionContext* qec, - std::shared_ptr child, - TransitivePathSide leftSide, TransitivePathSide rightSide, - size_t minDist, size_t maxDist); - - /** - * Returns a new TransitivePath operation that uses the fact that leftop - * generates all possible values for the left side of the paths. If the - * results of leftop is smaller than all possible values this will result in a - * faster transitive path operation (as the transitive paths has to be - * computed for fewer elements). - */ - std::shared_ptr bindLeftSide( - std::shared_ptr leftop, size_t inputCol) const; - - /** - * Returns a new TransitivePath operation that uses the fact that rightop - * generates all possible values for the right side of the paths. If the - * results of rightop is smaller than all possible values this will result in - * a faster transitive path operation (as the transitive paths has to be - * computed for fewer elements). - */ - std::shared_ptr bindRightSide( - std::shared_ptr rightop, size_t inputCol) const; - - bool isBoundOrId() const; - - /** - * Getters, mainly necessary for testing - */ - size_t getMinDist() const { return minDist_; } - size_t getMaxDist() const { return maxDist_; } - const TransitivePathSide& getLeft() const { return lhs_; } - const TransitivePathSide& getRight() const { return rhs_; } - - protected: - virtual std::string getCacheKeyImpl() const override; - - public: - virtual std::string getDescriptor() const override; - - virtual size_t getResultWidth() const override; - - virtual vector resultSortedOn() const override; - - virtual void setTextLimit(size_t limit) override; - - virtual bool knownEmptyResult() override; - - virtual float getMultiplicity(size_t col) override; - - private: - uint64_t getSizeEstimateBeforeLimit() override; - - public: - virtual size_t getCostEstimate() override; - - vector getChildren() override { - std::vector res; - auto addChildren = [](std::vector& res, - TransitivePathSide side) { - if (side.treeAndCol_.has_value()) { - res.push_back(side.treeAndCol_.value().first.get()); - } - }; - addChildren(res, lhs_); - addChildren(res, rhs_); - res.push_back(subtree_.get()); - return res; - } - - /** - * @brief Compute the transitive hull with a bound side. - * This function is called when the startSide is bound and - * it is a variable. The other IdTable contains the result - * of the start side and will be used to get the start nodes. - * - * @tparam RES_WIDTH Number of columns of the result table - * @tparam SUB_WIDTH Number of columns of the sub table - * @tparam SIDE_WIDTH Number of columns of the - * @param res The result table which will be filled in-place - * @param sub The IdTable for the sub result - * @param startSide The start side for the transitive hull - * @param targetSide The target side for the transitive hull - * @param startSideTable The IdTable of the startSide - */ - template - void computeTransitivePathBound(IdTable* res, const IdTable& sub, - const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, - const IdTable& startSideTable) const; - - template - void computeTransitivePathBoundFallback(IdTable* res, const IdTable& sub, - const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, - const IdTable& startSideTable) const; - - /** - * @brief Compute the transitive hull. - * This function is called when no side is bound (or an id). - * - * @tparam RES_WIDTH Number of columns of the result table - * @tparam SUB_WIDTH Number of columns of the sub table - * @param res The result table which will be filled in-place - * @param sub The IdTable for the sub result - * @param startSide The start side for the transitive hull - * @param targetSide The target side for the transitive hull - */ - template - void computeTransitivePath(IdTable* res, const IdTable& sub, - const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const; - - template - void computeTransitivePathFallback( - IdTable* res, const IdTable& sub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const; - - private: - /** - * @brief Compute the result for this TransitivePath operation - * This function chooses the start and target side for the transitive - * hull computation. This choice of the start side has a large impact - * on the time it takes to compute the hull. The set of nodes on the - * start side should be as small as possible. - * - * @return ResultTable The result of the TransitivePath operation - */ - virtual ResultTable computeResult() override; - - VariableToColumnMap computeVariableToColumnMap() const override; - - // The internal implementation of `bindLeftSide` and `bindRightSide` which - // share a lot of code. - std::shared_ptr bindLeftOrRightSide( - std::shared_ptr leftOrRightOp, size_t inputCol, - bool isLeft) const; - - /** - * @brief Compute the transitive hull of the graph. If given startNodes, - * compute the transitive hull starting at the startNodes. - * - * @param graph Boolean, square, sparse, adjacency matrix. Row i, column j is - * true, iff. there is an edge going from i to j in the graph. - * @param startNodes Boolean, sparse, adjacency matrix, marking the start - * nodes. There is one row for each start node. The number of columns has to - * be equal to the number of columns of the graph matrix. - * @return An adjacency matrix containing the transitive hull - */ - GrbMatrix transitiveHull(const GrbMatrix& graph, - std::optional startNodes) const; - - /** - * @brief Compute the transitive hull starting at the given nodes, - * using the given Map. - * - * @param edges Adjacency lists, mapping Ids (nodes) to their connected - * Ids. - * @param nodes A list of Ids. These Ids are used as starting points for the - * transitive hull. Thus, this parameter guides the performance of this - * algorithm. - * @param target Optional target Id. If supplied, only paths which end - * in this Id are added to the hull. - * @return Map Maps each Id to its connected Ids in the transitive hull - */ - Map transitiveHull(const Map& edges, const std::vector& startNodes, - std::optional target) const; - - /** - * @brief Fill the given table with the transitive hull and use the - * startSideTable to fill in the rest of the columns. - * This function is called if the start side is bound and a variable. - * - * @tparam WIDTH The number of columns of the result table. - * @tparam START_WIDTH The number of columns of the start table. - * @param table The result table which will be filled. - * @param hull The transitive hull. - * @param nodes The start nodes of the transitive hull. These need to be in - * the same order and amount as the starting side nodes in the startTable. - * @param startSideCol The column of the result table for the startSide of the - * hull - * @param targetSideCol The column of the result table for the targetSide of - * the hull - * @param startSideTable An IdTable that holds other results. The other - * results will be transferred to the new result table. - * @param skipCol This column contains the Ids of the start side in the - * startSideTable and will be skipped. - */ - - /** - * @brief Fill the given table with the transitive hull. - * This function is called if the sides are unbound or ids. - * - * @tparam WIDTH The number of columns of the result table. - * @param table The result table which will be filled. - * @param hull The transitive hull. - * @param startSideCol The column of the result table for the startSide of the - * hull - * @param targetSideCol The column of the result table for the targetSide of - * the hull - */ - - /** - * @brief Fill the IdTable with the given transitive hull. - * - * @tparam WIDTH The number of columns of the result table. - * @param table The result table which will be filled. - * @param hull The transitive hull. Represented by a sparse, boolean adjacency - * matrix - * @param mapping IdMapping, which maps Ids to matrix indices and vice versa. - * @param startSideCol The column of the result table for the startSide of the - * hull - * @param targetSideCol The column of the result table for the targetSide of - * the hull - */ - template - static void fillTableWithHull(IdTableStatic& table, - const GrbMatrix& hull, const IdMapping& mapping, - size_t startSideCol, size_t targetSideCol); - - /** - * @brief Fill the IdTable with the given transitive hull. This function is - * used in case the hull computation has one (or more) Ids as start nodes. - * - * @tparam WIDTH The number of columns of the result table. - * @param table The result table which will be filled. - * @param hull The transitive hull. Represented by a sparse, boolean adjacency - * matrix - * @param mapping IdMapping, which maps Ids to matrix indices and vice versa. - * @param startNodes Ids of the start nodes. - * @param startSideCol The column of the result table for the startSide of the - * hull - * @param targetSideCol The column of the result table for the targetSide of - * the hull - */ - template - static void fillTableWithHull(IdTableStatic& table, - const GrbMatrix& hull, const IdMapping& mapping, - std::span startNodes, - size_t startSideCol, size_t targetSideCol); - - /** - * @brief Fill the IdTable with the given transitive hull. This function is - * used if the start side was already bound and there is an IdTable from which - * data has to be copied to the result table. - * - * @tparam WIDTH The number of columns of the result table. - * @tparam START_WIDTH The number of columns of the start table. - * @param table The result table which will be filled. - * @param hull The transitive hull. Represented by a sparse, boolean adjacency - * matrix - * @param mapping IdMapping, which maps Ids to matrix indices and vice versa. - * @param startNodes Ids of the start nodes. - * @param startSideCol The column of the result table for the startSide of the - * hull - * @param targetSideCol The column of the result table for the targetSide of - * the hull - * @param skipCol This column contains the Ids of the start side in the - * startSideTable and will be skipped. - */ - template - static void fillTableWithHull(IdTableStatic& table, - const GrbMatrix& hull, const IdMapping& mapping, - const IdTable& startSideTable, - std::span startNodes, - size_t startSideCol, size_t targetSideCol, - size_t skipCol); - - /** - * @brief Fill the given table with the transitive hull and use the - * startSideTable to fill in the rest of the columns. - * This function is called if the start side is bound and a variable. - * - * @tparam WIDTH The number of columns of the result table. - * @tparam START_WIDTH The number of columns of the start table. - * @param table The result table which will be filled. - * @param hull The transitive hull. - * @param nodes The start nodes of the transitive hull. These need to be in - * the same order and amount as the starting side nodes in the startTable. - * @param startSideCol The column of the result table for the startSide of the - * hull - * @param targetSideCol The column of the result table for the targetSide of - * the hull - * @param startSideTable An IdTable that holds other results. The other - * results will be transferred to the new result table. - * @param skipCol This column contains the Ids of the start side in the - * startSideTable and will be skipped. - */ - template - static void fillTableWithHull(IdTableStatic& table, const Map& hull, - std::vector& nodes, size_t startSideCol, - size_t targetSideCol, - const IdTable& startSideTable, size_t skipCol); - - /** - * @brief Fill the given table with the transitive hull. - * This function is called if the sides are unbound or ids. - * - * @tparam WIDTH The number of columns of the result table. - * @param table The result table which will be filled. - * @param hull The transitive hull. - * @param startSideCol The column of the result table for the startSide of the - * hull - * @param targetSideCol The column of the result table for the targetSide of - * the hull - */ - template - static void fillTableWithHull(IdTableStatic& table, const Map& hull, - size_t startSideCol, size_t targetSideCol); - - /** - * @brief Prepare a Map and a nodes vector for the transitive hull - * computation. - * - * @tparam SUB_WIDTH Number of columns of the sub table - * @tparam SIDE_WIDTH Number of columns of the startSideTable - * @param sub The sub table result - * @param startSide The TransitivePathSide where the edges start - * @param targetSide The TransitivePathSide where the edges end - * @param startSideTable An IdTable containing the Ids for the startSide - * @return std::pair> A Map and Id vector (nodes) for the - * transitive hull computation - */ - template - std::pair> setupMapAndNodes( - const IdTable& sub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, - const IdTable& startSideTable) const; - - /** - * @brief Prepare a Map and a nodes vector for the transitive hull - * computation. - * - * @tparam SUB_WIDTH Number of columns of the sub table - * @param sub The sub table result - * @param startSide The TransitivePathSide where the edges start - * @param targetSide The TransitivePathSide where the edges end - * @return std::pair> A Map and Id vector (nodes) for the - * transitive hull computation - */ - template - std::pair> setupMapAndNodes( - const IdTable& sub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const; - - // initialize the map from the subresult - template - Map setupEdgesMap(const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const; - - // initialize a vector for the starting nodes (Ids) - template - static std::span setupNodes(const IdTable& table, size_t col); - - GrbMatrix getTargetRow(GrbMatrix& hull, size_t targetIndex) const; - - /** - * @brief Create a boolean, sparse adjacency matrix from the given edges. The - * edges are given as lists, where one list contains the start node of the - * edge and the other list contains the target node of the edge. - * Also create an IdMapping, which maps the given Ids to matrix indices. - * - * @param startCol Column from the IdTable, which contains edge start nodes - * @param targetCol Column from the IdTable, which contains edge target nodes - * @param numRows Number of rows in the IdTable - */ - std::tuple setupMatrix(std::span startCol, - std::span targetCol, - size_t numRows) const; - - /** - * @brief Create a boolean, sparse, adjacency matrix which holds the starting - * nodes for the transitive hull computation. - * - * @param startIds List of Ids where the transitive hull computation should - * start - * @param numRows Number of rows in the IdTable where startIds comes from - * @param mapping An IdMapping between Ids and matrix indices - * @return Matrix with one row for each start node - */ - GrbMatrix setupStartNodeMatrix(std::span startIds, size_t numRows, - IdMapping mapping) const; - - // Copy the columns from the input table to the output table - template - static void copyColumns(const IdTableView& inputTable, - IdTableStatic& outputTable, - size_t inputRow, size_t outputRow, size_t skipCol); - - // A small helper function: Insert the `value` to the set at `map[key]`. - // As the sets all have an allocator with memory limit, this construction is a - // little bit more involved, so this can be a separate helper function. - void insertIntoMap(Map& map, Id key, Id value) const; -}; From ecdf2a705cad91013a6d381c893f890573b77dc0 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 21 Feb 2024 08:46:06 +0100 Subject: [PATCH 33/92] Sonar fixes --- src/engine/GrbMatrix.cpp | 1 + src/engine/GrbMatrix.h | 2 - src/engine/TransitivePathBase.cpp | 17 ++++---- src/engine/TransitivePathBase.h | 54 ++++++++++++++++++++------ src/engine/TransitivePathFallback.cpp | 4 +- src/engine/TransitivePathFallback.h | 4 +- src/engine/TransitivePathGraphblas.cpp | 8 ++-- src/engine/TransitivePathGraphblas.h | 4 +- 8 files changed, 60 insertions(+), 34 deletions(-) diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp index 119df4afcc..1b6c1b19cf 100644 --- a/src/engine/GrbMatrix.cpp +++ b/src/engine/GrbMatrix.cpp @@ -158,6 +158,7 @@ GrbMatrix GrbMatrix::multiply(const GrbMatrix& otherMatrix) const { GrbMatrix result; auto info = GrB_Matrix_new(result.rawMatrix(), GrB_BOOL, resultNumRows, resultNumCols); + handleError(info); info = GrB_mxm(result.matrix(), GrB_NULL, GrB_NULL, GrB_LOR_LAND_SEMIRING_BOOL, diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h index 4a463105b0..82268e612f 100644 --- a/src/engine/GrbMatrix.h +++ b/src/engine/GrbMatrix.h @@ -44,8 +44,6 @@ class GrbMatrix { GrbMatrix(const GrbMatrix&) = delete; GrbMatrix& operator=(const GrbMatrix&) = delete; - ~GrbMatrix() { GrB_Matrix_free(matrix_.get()); } - /** * @brief Create a matrix and fill it with the data of this matrix. * diff --git a/src/engine/TransitivePathBase.cpp b/src/engine/TransitivePathBase.cpp index 1cd3475b98..3c482d8316 100644 --- a/src/engine/TransitivePathBase.cpp +++ b/src/engine/TransitivePathBase.cpp @@ -188,8 +188,8 @@ size_t TransitivePathBase::getCostEstimate() { // _____________________________________________________________________________ std::shared_ptr TransitivePathBase::makeTransitivePath( QueryExecutionContext* qec, std::shared_ptr child, - TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, - size_t maxDist) { + const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, + size_t minDist, size_t maxDist) { bool useGraphblas = !RuntimeParameters().get<"use-graphblas">(); return makeTransitivePath(qec, child, leftSide, rightSide, minDist, maxDist, useGraphblas); @@ -198,14 +198,14 @@ std::shared_ptr TransitivePathBase::makeTransitivePath( // _____________________________________________________________________________ std::shared_ptr TransitivePathBase::makeTransitivePath( QueryExecutionContext* qec, std::shared_ptr child, - TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, - size_t maxDist, bool useGraphblas) { + const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, + size_t minDist, size_t maxDist, bool useGraphblas) { if (useGraphblas) { - return std::make_shared(TransitivePathGraphblas( - qec, child, leftSide, rightSide, minDist, maxDist)); + return std::make_shared( + qec, child, leftSide, rightSide, minDist, maxDist); } else { - return std::make_shared(TransitivePathFallback( - qec, child, leftSide, rightSide, minDist, maxDist)); + return std::make_shared( + qec, child, leftSide, rightSide, minDist, maxDist); } } @@ -257,7 +257,6 @@ std::shared_ptr TransitivePathBase::bindLeftOrRightSide( columnIndexWithType.columnIndex_ += columnIndex > inputCol ? 1 : 2; p->variableColumns_[variable] = columnIndexWithType; - // p->resultWidth_++; } return p; } diff --git a/src/engine/TransitivePathBase.h b/src/engine/TransitivePathBase.h index d9658a95b5..cab94233a2 100644 --- a/src/engine/TransitivePathBase.h +++ b/src/engine/TransitivePathBase.h @@ -105,40 +105,70 @@ class TransitivePathBase : public Operation { const TransitivePathSide& getRight() const { return rhs_; } protected: - virtual std::string getCacheKeyImpl() const override; + std::string getCacheKeyImpl() const override; public: // virtual void computeTransitivePath( // IdTable* res, const IdTable& sub, const TransitivePathSide& startSide, // const TransitivePathSide& targetSide) const = 0; - virtual std::string getDescriptor() const override; + std::string getDescriptor() const override; - virtual size_t getResultWidth() const override; + size_t getResultWidth() const override; - virtual vector resultSortedOn() const override; + vector resultSortedOn() const override; - virtual void setTextLimit(size_t limit) override; + void setTextLimit(size_t limit) override; - virtual bool knownEmptyResult() override; + bool knownEmptyResult() override; - virtual float getMultiplicity(size_t col) override; + float getMultiplicity(size_t col) override; private: uint64_t getSizeEstimateBeforeLimit() override; public: - virtual size_t getCostEstimate() override; + size_t getCostEstimate() override; + /** + * @brief Make a concrete TransitivePath object using the given parameters. + * The concrete object will either be TransitivePathBase or + * TransitivePathGraphblas, depending on the useGraphblas flag. + * + * @param qec QueryExecutionContext for the TransitivePath Operation + * @param child QueryExecutionTree for the subquery of the TransitivePath + * @param leftSide Settings for the left side of the TransitivePath + * @param rightSide Settings for the right side of the TransitivePath + * @param minDist Minimum distance a resulting path may have (distance = + * number of nodes) + * @param maxDist Maximum distance a resulting path may have (distance = + * number of nodes) + * @param useGraphblas If true, the returned object will be a + * TransitivePathGraphblas. Else it will be a TransitivePathFallback + */ static std::shared_ptr makeTransitivePath( QueryExecutionContext* qec, std::shared_ptr child, - TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, - size_t maxDist, bool useGraphblas); + const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, + size_t minDist, size_t maxDist, bool useGraphblas); + /** + * @brief Make a concrete TransitivePath object using the given parameters. + * The concrete object will either be TransitivePathBase or + * TransitivePathGraphblas, depending on the runtime constant "use-graphblas". + * + * @param qec QueryExecutionContext for the TransitivePath Operation + * @param child QueryExecutionTree for the subquery of the TransitivePath + * @param leftSide Settings for the left side of the TransitivePath + * @param rightSide Settings for the right side of the TransitivePath + * @param minDist Minimum distance a resulting path may have (distance = + * number of nodes) + * @param maxDist Maximum distance a resulting path may have (distance = + * number of nodes) + */ static std::shared_ptr makeTransitivePath( QueryExecutionContext* qec, std::shared_ptr child, - TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, - size_t maxDist); + const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, + size_t minDist, size_t maxDist); vector getChildren() override { std::vector res; diff --git a/src/engine/TransitivePathFallback.cpp b/src/engine/TransitivePathFallback.cpp index d68654867d..74de68705a 100644 --- a/src/engine/TransitivePathFallback.cpp +++ b/src/engine/TransitivePathFallback.cpp @@ -17,8 +17,8 @@ // _____________________________________________________________________________ TransitivePathFallback::TransitivePathFallback( QueryExecutionContext* qec, std::shared_ptr child, - TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, - size_t maxDist) + const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, + size_t minDist, size_t maxDist) : TransitivePathBase(qec, child, leftSide, rightSide, minDist, maxDist) {} // _____________________________________________________________________________ diff --git a/src/engine/TransitivePathFallback.h b/src/engine/TransitivePathFallback.h index b2a4429e6d..9c2e1c9e11 100644 --- a/src/engine/TransitivePathFallback.h +++ b/src/engine/TransitivePathFallback.h @@ -27,8 +27,8 @@ class TransitivePathFallback : public TransitivePathBase { public: TransitivePathFallback(QueryExecutionContext* qec, std::shared_ptr child, - TransitivePathSide leftSide, - TransitivePathSide rightSide, size_t minDist, + const TransitivePathSide& leftSide, + const TransitivePathSide& rightSide, size_t minDist, size_t maxDist); /** diff --git a/src/engine/TransitivePathGraphblas.cpp b/src/engine/TransitivePathGraphblas.cpp index b96cf9ded6..8bdea13c6a 100644 --- a/src/engine/TransitivePathGraphblas.cpp +++ b/src/engine/TransitivePathGraphblas.cpp @@ -221,8 +221,6 @@ GrbMatrix TransitivePathGraphblas::transitiveHull( size_t nvals = result.numNonZero(); while (nvals > previousNvals && pathLength < maxDist_) { previousNvals = result.numNonZero(); - // TODO: Check effect of matrix orientation (Row major, Column major) on - // performance. result.accumulateMultiply(graph); checkCancellation(); nvals = result.numNonZero(); @@ -304,9 +302,9 @@ void TransitivePathGraphblas::fillTableWithHull( } // _____________________________________________________________________________ -GrbMatrix TransitivePathGraphblas::getTargetRow(GrbMatrix& hull, +GrbMatrix TransitivePathGraphblas::getTargetRow(const GrbMatrix& hull, size_t targetIndex) const { - GrbMatrix transformer = GrbMatrix(hull.numCols(), hull.numCols()); + auto transformer = GrbMatrix(hull.numCols(), hull.numCols()); transformer.setElement(targetIndex, targetIndex, true); return hull.multiply(transformer); } @@ -341,7 +339,7 @@ GrbMatrix TransitivePathGraphblas::setupStartNodeMatrix( // stardIds.size() is the maximum possible number of columns for the // startMatrix, but if some start node does not have a link in the graph it // will be skipped, resulting in a zero column at the end of the startMatrix - GrbMatrix startMatrix = GrbMatrix(startIds.size(), numCols); + auto startMatrix = GrbMatrix(startIds.size(), numCols); size_t rowIndex = 0; for (Id id : startIds) { if (!mapping.contains(id)) { diff --git a/src/engine/TransitivePathGraphblas.h b/src/engine/TransitivePathGraphblas.h index 5bfba6b30f..65c1b60800 100644 --- a/src/engine/TransitivePathGraphblas.h +++ b/src/engine/TransitivePathGraphblas.h @@ -15,7 +15,7 @@ // This struct keeps track of the mapping between Ids and matrix indices struct IdMapping { - bool contains(Id id) { return idMap_.contains(id); } + bool contains(Id id) const { return idMap_.contains(id); } size_t addId(Id id) { if (!idMap_.contains(id)) { @@ -178,7 +178,7 @@ class TransitivePathGraphblas : public TransitivePathBase { size_t startSideCol, size_t targetSideCol, size_t skipCol); - GrbMatrix getTargetRow(GrbMatrix& hull, size_t targetIndex) const; + GrbMatrix getTargetRow(const GrbMatrix& hull, size_t targetIndex) const; /** * @brief Create a boolean, sparse adjacency matrix from the given edges. The From 12f0e3dd4284748424cfe57137b2a9fd8a0ea38e Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 21 Feb 2024 09:10:52 +0100 Subject: [PATCH 34/92] Add timings to runtime info --- src/engine/TransitivePathFallback.cpp | 22 ++++++++-------------- src/engine/TransitivePathGraphblas.cpp | 22 ++++++++-------------- 2 files changed, 16 insertions(+), 28 deletions(-) diff --git a/src/engine/TransitivePathFallback.cpp b/src/engine/TransitivePathFallback.cpp index 74de68705a..d5d67714eb 100644 --- a/src/engine/TransitivePathFallback.cpp +++ b/src/engine/TransitivePathFallback.cpp @@ -56,13 +56,10 @@ void TransitivePathFallback::computeTransitivePathBound( timer.stop(); auto fillTime = timer.msecs(); - LOG(DEBUG) << "Fallback Timing measurements:" << std::endl; - LOG(DEBUG) << "Initialization time: " << std::to_string(initTime.count()) - << "ms" << std::endl; - LOG(DEBUG) << "Hull computation time: " << std::to_string(hullTime.count()) - << "ms" << std::endl; - LOG(DEBUG) << "IdTable fill time: " << std::to_string(fillTime.count()) - << "ms" << std::endl; + auto& info = runtimeInfo(); + info.addDetail("Initialization time", initTime.count()); + info.addDetail("Hull time", hullTime.count()); + info.addDetail("IdTable fill time", fillTime.count()); *dynRes = std::move(res).toDynamic(); } @@ -101,13 +98,10 @@ void TransitivePathFallback::computeTransitivePath( timer.stop(); auto fillTime = timer.msecs(); - LOG(DEBUG) << "Fallback Timing measurements:" << std::endl; - LOG(DEBUG) << "Initialization time: " << std::to_string(initTime.count()) - << "ms" << std::endl; - LOG(DEBUG) << "Hull computation time: " << std::to_string(hullTime.count()) - << "ms" << std::endl; - LOG(DEBUG) << "IdTable fill time: " << std::to_string(fillTime.count()) - << "ms" << std::endl; + auto& info = runtimeInfo(); + info.addDetail("Initialization time", initTime.count()); + info.addDetail("Hull time", hullTime.count()); + info.addDetail("IdTable fill time", fillTime.count()); *dynRes = std::move(res).toDynamic(); } diff --git a/src/engine/TransitivePathGraphblas.cpp b/src/engine/TransitivePathGraphblas.cpp index 8bdea13c6a..8eb2b93e10 100644 --- a/src/engine/TransitivePathGraphblas.cpp +++ b/src/engine/TransitivePathGraphblas.cpp @@ -68,13 +68,10 @@ void TransitivePathGraphblas::computeTransitivePathBound( timer.stop(); auto fillTime = timer.msecs(); - LOG(DEBUG) << "GraphBLAS Timing measurements:" << std::endl; - LOG(DEBUG) << "Initialization time: " << std::to_string(initTime.count()) - << "ms" << std::endl; - LOG(DEBUG) << "Hull computation time: " << std::to_string(hullTime.count()) - << "ms" << std::endl; - LOG(DEBUG) << "IdTable fill time: " << std::to_string(fillTime.count()) - << "ms" << std::endl; + auto& info = runtimeInfo(); + info.addDetail("Initialization time", initTime.count()); + info.addDetail("Hull time", hullTime.count()); + info.addDetail("IdTable fill time", fillTime.count()); *dynRes = std::move(res).toDynamic(); } @@ -133,13 +130,10 @@ void TransitivePathGraphblas::computeTransitivePath( timer.stop(); auto fillTime = timer.msecs(); - LOG(DEBUG) << "GraphBLAS Timing measurements:" << std::endl; - LOG(DEBUG) << "Initialization time: " << std::to_string(initTime.count()) - << "ms" << std::endl; - LOG(DEBUG) << "Hull computation time: " << std::to_string(hullTime.count()) - << "ms" << std::endl; - LOG(DEBUG) << "IdTable fill time: " << std::to_string(fillTime.count()) - << "ms" << std::endl; + auto& info = runtimeInfo(); + info.addDetail("Initialization time", initTime.count()); + info.addDetail("Hull time", hullTime.count()); + info.addDetail("IdTable fill time", fillTime.count()); *dynRes = std::move(res).toDynamic(); } From ed6dc70ce13e92e534f15f429cb3645e7702260a Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Wed, 21 Feb 2024 10:29:58 +0100 Subject: [PATCH 35/92] Some fixes. --- src/engine/ExportQueryExecutionTrees.cpp | 5 +++++ src/engine/TransitivePathBase.cpp | 4 +++- src/engine/TransitivePathGraphblas.cpp | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 71a5bf407d..164637e0aa 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -117,6 +117,11 @@ nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( continue; } const auto& currentId = data(rowIndex, opt->columnIndex_); + auto& local = resultTable->localVocab(); + auto sz = local.size(); + LOG(INFO) << "Local size " << sz << std::endl; + auto* qec = qet.getQec(); + LOG(INFO) << qec->getIndex().getVocab().size() << "end of bla size" << std::endl; const auto& optionalStringAndXsdType = idToStringAndType( qet.getQec()->getIndex(), currentId, resultTable->localVocab()); if (!optionalStringAndXsdType.has_value()) { diff --git a/src/engine/TransitivePathBase.cpp b/src/engine/TransitivePathBase.cpp index 3c482d8316..bb056df236 100644 --- a/src/engine/TransitivePathBase.cpp +++ b/src/engine/TransitivePathBase.cpp @@ -190,7 +190,7 @@ std::shared_ptr TransitivePathBase::makeTransitivePath( QueryExecutionContext* qec, std::shared_ptr child, const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, size_t minDist, size_t maxDist) { - bool useGraphblas = !RuntimeParameters().get<"use-graphblas">(); + bool useGraphblas = RuntimeParameters().get<"use-graphblas">(); return makeTransitivePath(qec, child, leftSide, rightSide, minDist, maxDist, useGraphblas); } @@ -256,7 +256,9 @@ std::shared_ptr TransitivePathBase::bindLeftOrRightSide( columnIndexWithType.columnIndex_ += columnIndex > inputCol ? 1 : 2; + AD_CORRECTNESS_CHECK(!p->variableColumns_.contains(variable)); p->variableColumns_[variable] = columnIndexWithType; + p->resultWidth_++; } return p; } diff --git a/src/engine/TransitivePathGraphblas.cpp b/src/engine/TransitivePathGraphblas.cpp index 8eb2b93e10..5f726a23ae 100644 --- a/src/engine/TransitivePathGraphblas.cpp +++ b/src/engine/TransitivePathGraphblas.cpp @@ -322,8 +322,10 @@ std::tuple TransitivePathGraphblas::setupMatrix( checkCancellation(); } + ad_utility::Timer t{ad_utility::Timer::Started}; auto matrix = GrbMatrix::build(rowIndices, colIndices, mapping.size(), mapping.size()); + runtimeInfo().addDetail("matrix-build-time-graphblas", t.msecs().count()); return {std::move(matrix), std::move(mapping)}; } From dd8c8416a8524cbef6107f7848b7e7dd2a858ca8 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Fri, 8 Mar 2024 13:31:58 +0100 Subject: [PATCH 36/92] Added binary search, updated tests --- src/engine/CMakeLists.txt | 2 +- src/engine/TransitivePathBase.cpp | 10 +- src/engine/TransitivePathBase.h | 6 +- src/engine/TransitivePathBinSearch.cpp | 353 +++++++++++++++++++++++++ src/engine/TransitivePathBinSearch.h | 239 +++++++++++++++++ src/engine/TransitivePathFallback.h | 4 - src/engine/TransitivePathGraphblas.h | 4 - src/global/Constants.h | 1 + test/TransitivePathTest.cpp | 148 +++++++---- 9 files changed, 702 insertions(+), 65 deletions(-) create mode 100644 src/engine/TransitivePathBinSearch.cpp create mode 100644 src/engine/TransitivePathBinSearch.h diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 1b5c4b016a..e16e00a210 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -8,7 +8,7 @@ add_library(engine Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp OptionalJoin.cpp CountAvailablePredicates.cpp GroupBy.cpp HasPredicateScan.cpp Union.cpp MultiColumnJoin.cpp TransitivePathBase.cpp TransitivePathFallback.cpp - TransitivePathGraphblas.cpp Service.cpp + TransitivePathGraphblas.cpp TransitivePathBinSearch.cpp Service.cpp Values.cpp Bind.cpp Minus.cpp RuntimeInformation.cpp CheckUsePatternTrick.cpp VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp diff --git a/src/engine/TransitivePathBase.cpp b/src/engine/TransitivePathBase.cpp index bb056df236..6c2e9297c7 100644 --- a/src/engine/TransitivePathBase.cpp +++ b/src/engine/TransitivePathBase.cpp @@ -11,8 +11,10 @@ #include "engine/ExportQueryExecutionTrees.h" #include "engine/IndexScan.h" +#include "engine/TransitivePathBinSearch.h" #include "engine/TransitivePathFallback.h" #include "engine/TransitivePathGraphblas.h" +#include "global/Constants.h" #include "util/Exception.h" // _____________________________________________________________________________ @@ -191,18 +193,22 @@ std::shared_ptr TransitivePathBase::makeTransitivePath( const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, size_t minDist, size_t maxDist) { bool useGraphblas = RuntimeParameters().get<"use-graphblas">(); + bool useBinSearch = RuntimeParameters().get<"use-binsearch">(); return makeTransitivePath(qec, child, leftSide, rightSide, minDist, maxDist, - useGraphblas); + useGraphblas, useBinSearch); } // _____________________________________________________________________________ std::shared_ptr TransitivePathBase::makeTransitivePath( QueryExecutionContext* qec, std::shared_ptr child, const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, - size_t minDist, size_t maxDist, bool useGraphblas) { + size_t minDist, size_t maxDist, bool useGraphblas, bool useBinSearch) { if (useGraphblas) { return std::make_shared( qec, child, leftSide, rightSide, minDist, maxDist); + } else if (useBinSearch) { + return std::make_shared( + qec, child, leftSide, rightSide, minDist, maxDist); } else { return std::make_shared( qec, child, leftSide, rightSide, minDist, maxDist); diff --git a/src/engine/TransitivePathBase.h b/src/engine/TransitivePathBase.h index cab94233a2..a55ab724e3 100644 --- a/src/engine/TransitivePathBase.h +++ b/src/engine/TransitivePathBase.h @@ -108,10 +108,6 @@ class TransitivePathBase : public Operation { std::string getCacheKeyImpl() const override; public: - // virtual void computeTransitivePath( - // IdTable* res, const IdTable& sub, const TransitivePathSide& startSide, - // const TransitivePathSide& targetSide) const = 0; - std::string getDescriptor() const override; size_t getResultWidth() const override; @@ -149,7 +145,7 @@ class TransitivePathBase : public Operation { static std::shared_ptr makeTransitivePath( QueryExecutionContext* qec, std::shared_ptr child, const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, - size_t minDist, size_t maxDist, bool useGraphblas); + size_t minDist, size_t maxDist, bool useGraphblas, bool useBinSearch); /** * @brief Make a concrete TransitivePath object using the given parameters. diff --git a/src/engine/TransitivePathBinSearch.cpp b/src/engine/TransitivePathBinSearch.cpp new file mode 100644 index 0000000000..a64899a5fd --- /dev/null +++ b/src/engine/TransitivePathBinSearch.cpp @@ -0,0 +1,353 @@ +// Copyright 2019, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) + +#include "TransitivePathBinSearch.h" + +#include +#include +#include +#include +#include + +#include "engine/CallFixedSize.h" +#include "engine/TransitivePathBase.h" +#include "util/Exception.h" +#include "util/Timer.h" + +// _____________________________________________________________________________ +TransitivePathBinSearch::TransitivePathBinSearch( + QueryExecutionContext* qec, std::shared_ptr child, + const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, + size_t minDist, size_t maxDist) + : TransitivePathBase(qec, child, leftSide, rightSide, minDist, maxDist) {} + +// _____________________________________________________________________________ +template +void TransitivePathBinSearch::computeTransitivePathBound( + IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, const IdTable& startSideTable) const { + IdTableStatic res = std::move(*dynRes).toStatic(); + + auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); + timer.start(); + + auto [edges, nodes] = setupMapAndNodes( + dynSub, startSide, targetSide, startSideTable); + + timer.stop(); + auto initTime = timer.msecs(); + timer.start(); + + Map hull(allocator()); + if (!targetSide.isVariable()) { + hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); + } else { + hull = transitiveHull(edges, nodes, std::nullopt); + } + + timer.stop(); + auto hullTime = timer.msecs(); + timer.start(); + + TransitivePathBinSearch::fillTableWithHull( + res, hull, nodes, startSide.outputCol_, targetSide.outputCol_, + startSideTable, startSide.treeAndCol_.value().second); + + timer.stop(); + auto fillTime = timer.msecs(); + + auto& info = runtimeInfo(); + info.addDetail("Initialization time", initTime.count()); + info.addDetail("Hull time", hullTime.count()); + info.addDetail("IdTable fill time", fillTime.count()); + + *dynRes = std::move(res).toDynamic(); +} + +// _____________________________________________________________________________ +template +void TransitivePathBinSearch::computeTransitivePath( + IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + IdTableStatic res = std::move(*dynRes).toStatic(); + + auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); + timer.start(); + + auto [edges, nodes] = + setupMapAndNodes(dynSub, startSide, targetSide); + + timer.stop(); + auto initTime = timer.msecs(); + timer.start(); + + Map hull{allocator()}; + if (!targetSide.isVariable()) { + hull = transitiveHull(edges, nodes, std::get(targetSide.value_)); + } else { + hull = transitiveHull(edges, nodes, std::nullopt); + } + + timer.stop(); + auto hullTime = timer.msecs(); + timer.start(); + + TransitivePathBinSearch::fillTableWithHull( + res, hull, startSide.outputCol_, targetSide.outputCol_); + + timer.stop(); + auto fillTime = timer.msecs(); + + auto& info = runtimeInfo(); + info.addDetail("Initialization time", initTime.count()); + info.addDetail("Hull time", hullTime.count()); + info.addDetail("IdTable fill time", fillTime.count()); + + *dynRes = std::move(res).toDynamic(); +} + +// _____________________________________________________________________________ +std::pair +TransitivePathBinSearch::decideDirection() { + if (lhs_.isBoundVariable()) { + LOG(DEBUG) << "Computing TransitivePath left to right" << std::endl; + return {lhs_, rhs_}; + } else if (rhs_.isBoundVariable() || !rhs_.isVariable()) { + LOG(DEBUG) << "Computing TransitivePath right to left" << std::endl; + return {rhs_, lhs_}; + } + LOG(DEBUG) << "Computing TransitivePath left to right" << std::endl; + return {lhs_, rhs_}; +} + +// _____________________________________________________________________________ +ResultTable TransitivePathBinSearch::computeResult() { + if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() && + rhs_.isVariable()) { + AD_THROW( + "This query might have to evalute the empty path, which is currently " + "not supported"); + } + auto [startSide, targetSide] = decideDirection(); + subtree_ = QueryExecutionTree::createSortedTree( + subtree_, {startSide.subCol_, targetSide.subCol_}); + shared_ptr subRes = subtree_->getResult(); + + IdTable idTable{allocator()}; + + idTable.setNumColumns(getResultWidth()); + + size_t subWidth = subRes->idTable().numColumns(); + + if (startSide.isBoundVariable()) { + shared_ptr sideRes = + startSide.treeAndCol_.value().first->getResult(); + size_t sideWidth = sideRes->idTable().numColumns(); + + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), + &TransitivePathBinSearch::computeTransitivePathBound, this, + &idTable, subRes->idTable(), startSide, targetSide, + sideRes->idTable()); + + return {std::move(idTable), resultSortedOn(), + ResultTable::getSharedLocalVocabFromNonEmptyOf(*sideRes, *subRes)}; + } + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), + &TransitivePathBinSearch::computeTransitivePath, this, + &idTable, subRes->idTable(), startSide, targetSide); + + // NOTE: The only place, where the input to a transitive path operation is not + // an index scan (which has an empty local vocabulary by default) is the + // `LocalVocabTest`. But it doesn't harm to propagate the local vocab here + // either. + return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; +} + +// _____________________________________________________________________________ +TransitivePathBinSearch::Map TransitivePathBinSearch::transitiveHull( + const BinSearchMap& edges, const std::vector& startNodes, + std::optional target) const { + // For every node do a dfs on the graph + Map hull{allocator()}; + + std::vector> stack; + ad_utility::HashSetWithMemoryLimit marks{ + getExecutionContext()->getAllocator()}; + for (auto startNode : startNodes) { + if (hull.contains(startNode)) { + // We have already computed the hull for this node + continue; + } + + marks.clear(); + stack.clear(); + stack.push_back({startNode, 0}); + + if (minDist_ == 0 && (!target.has_value() || startNode == target.value())) { + insertIntoMap(hull, startNode, startNode); + } + + while (stack.size() > 0) { + // auto [node, steps] = stack.back(); + auto pair = stack.back(); + auto node = pair.first; + auto steps = pair.second; + stack.pop_back(); + + if (steps <= maxDist_ && marks.count(node) == 0) { + if (steps >= minDist_) { + marks.insert(node); + if (!target.has_value() || node == target.value()) { + insertIntoMap(hull, startNode, node); + } + } + + auto successors = edges.successors(node); + for (auto successor : successors) { + stack.push_back({successor, steps + 1}); + } + } + } + } + return hull; +} + +// _____________________________________________________________________________ +template +void TransitivePathBinSearch::fillTableWithHull( + IdTableStatic& table, const Map& hull, std::vector& nodes, + size_t startSideCol, size_t targetSideCol, const IdTable& startSideTable, + size_t skipCol) { + IdTableView startView = + startSideTable.asStaticView(); + + size_t rowIndex = 0; + for (size_t i = 0; i < nodes.size(); i++) { + Id node = nodes[i]; + auto it = hull.find(node); + if (it == hull.end()) { + continue; + } + + for (Id otherNode : it->second) { + table.emplace_back(); + table(rowIndex, startSideCol) = node; + table(rowIndex, targetSideCol) = otherNode; + + TransitivePathBinSearch::copyColumns( + startView, table, i, rowIndex, skipCol); + + rowIndex++; + } + } +} + +// _____________________________________________________________________________ +template +void TransitivePathBinSearch::fillTableWithHull(IdTableStatic& table, + const Map& hull, + size_t startSideCol, + size_t targetSideCol) { + size_t rowIndex = 0; + for (auto const& [node, linkedNodes] : hull) { + for (Id linkedNode : linkedNodes) { + table.emplace_back(); + table(rowIndex, startSideCol) = node; + table(rowIndex, targetSideCol) = linkedNode; + + rowIndex++; + } + } +} + +// _____________________________________________________________________________ +template +std::pair> +TransitivePathBinSearch::setupMapAndNodes(const IdTable& sub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, + const IdTable& startSideTable) const { + std::vector nodes; + auto edges = setupEdgesMap(sub, startSide, targetSide); + + // Bound -> var|id + std::span startNodes = setupNodes( + startSideTable, startSide.treeAndCol_.value().second); + nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); + + return {std::move(edges), std::move(nodes)}; +} + +// _____________________________________________________________________________ +template +std::pair> +TransitivePathBinSearch::setupMapAndNodes( + const IdTable& sub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + std::vector nodes; + auto edges = setupEdgesMap(sub, startSide, targetSide); + + // id -> var|id + if (!startSide.isVariable()) { + nodes.push_back(std::get(startSide.value_)); + // var -> var + } else { + std::span startNodes = + setupNodes(sub, startSide.subCol_); + nodes.insert(nodes.end(), startNodes.begin(), startNodes.end()); + if (minDist_ == 0) { + std::span targetNodes = + setupNodes(sub, targetSide.subCol_); + nodes.insert(nodes.end(), targetNodes.begin(), targetNodes.end()); + } + } + + return {std::move(edges), std::move(nodes)}; +} + +// _____________________________________________________________________________ +template +BinSearchMap TransitivePathBinSearch::setupEdgesMap( + const IdTable& dynSub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const { + const IdTableView sub = dynSub.asStaticView(); + decltype(auto) startCol = sub.getColumn(startSide.subCol_); + decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); + BinSearchMap edges{startCol, targetCol}; + + return edges; +} + +// _____________________________________________________________________________ +template +std::span TransitivePathBinSearch::setupNodes(const IdTable& table, + size_t col) { + return table.getColumn(col); +} + +// _____________________________________________________________________________ +template +void TransitivePathBinSearch::copyColumns( + const IdTableView& inputTable, + IdTableStatic& outputTable, size_t inputRow, size_t outputRow, + size_t skipCol) { + size_t inCol = 0; + size_t outCol = 2; + while (inCol < inputTable.numColumns() && outCol < outputTable.numColumns()) { + if (skipCol == inCol) { + inCol++; + continue; + } + + outputTable(outputRow, outCol) = inputTable(inputRow, inCol); + inCol++; + outCol++; + } +} + +// _____________________________________________________________________________ +void TransitivePathBinSearch::insertIntoMap(Map& map, Id key, Id value) const { + auto [it, success] = map.try_emplace(key, allocator()); + it->second.insert(value); +} diff --git a/src/engine/TransitivePathBinSearch.h b/src/engine/TransitivePathBinSearch.h new file mode 100644 index 0000000000..4a7ef33928 --- /dev/null +++ b/src/engine/TransitivePathBinSearch.h @@ -0,0 +1,239 @@ +// Copyright 2019, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) + +#pragma once + +#include + +#include +#include +#include + +#include "TransitivePathBase.h" +#include "engine/Operation.h" +#include "engine/QueryExecutionTree.h" +#include "engine/idTable/IdTable.h" + +struct BinSearchMap { + std::span startIds_; + std::span targetIds_; + + std::span successors(const Id node) const { + // auto startIt = std::lower_bound(startIds_.begin(), startIds_.end(), + // node); auto endIt = std::upper_bound(startIds_.begin(), startIds_.end(), + // node); + // + // auto startIndex = std::distance(startIds_.begin(), startIt); + // auto endIndex = std::distance(startIds_.begin(), endIt); + // + // return targetIds_.subspan(startIndex, endIndex - startIndex); + + auto range = std::ranges::equal_range(startIds_, node); + + auto startIndex = std::distance(startIds_.begin(), range.begin()); + + return targetIds_.subspan(startIndex, range.size()); + } +}; + +class TransitivePathBinSearch : public TransitivePathBase { + // We deliberately use the `std::` variants of a hash set and hash map because + // `absl`s types are not exception safe. + constexpr static auto hash = [](Id id) { + return std::hash{}(id.getBits()); + }; + using Set = std::unordered_set, + ad_utility::AllocatorWithLimit>; + using Map = std::unordered_map< + Id, Set, decltype(hash), std::equal_to, + ad_utility::AllocatorWithLimit>>; + + public: + TransitivePathBinSearch(QueryExecutionContext* qec, + std::shared_ptr child, + const TransitivePathSide& leftSide, + const TransitivePathSide& rightSide, size_t minDist, + size_t maxDist); + + /** + * @brief Compute the transitive hull with a bound side. + * This function is called when the startSide is bound and + * it is a variable. The other IdTable contains the result + * of the start side and will be used to get the start nodes. + * + * @tparam RES_WIDTH Number of columns of the result table + * @tparam SUB_WIDTH Number of columns of the sub table + * @tparam SIDE_WIDTH Number of columns of the + * @param res The result table which will be filled in-place + * @param sub The IdTable for the sub result + * @param startSide The start side for the transitive hull + * @param targetSide The target side for the transitive hull + * @param startSideTable The IdTable of the startSide + */ + + template + void computeTransitivePathBound(IdTable* res, const IdTable& sub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, + const IdTable& startSideTable) const; + + /** + * @brief Compute the transitive hull. + * This function is called when no side is bound (or an id). + * + * @tparam RES_WIDTH Number of columns of the result table + * @tparam SUB_WIDTH Number of columns of the sub table + * @param res The result table which will be filled in-place + * @param sub The IdTable for the sub result + * @param startSide The start side for the transitive hull + * @param targetSide The target side for the transitive hull + */ + + template + void computeTransitivePath(IdTable* res, const IdTable& sub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const; + + private: + /** + * @brief Decide on which transitive path side the hull computation should + * start and where it should end. The start and target side are chosen by + * the following criteria: + * + * 1. If a side is bound, then this side will be the start side. + * 2. If a side is an id, then this side will be the start side. + * 3. If both sides are variables, the left side is chosen as start + * (arbitrarily). + * + * @return std::pair The first entry + * of the pair is the start side, the second entry is the target side. + */ + std::pair decideDirection(); + + /** + * @brief Compute the result for this TransitivePath operation + * This function chooses the start and target side for the transitive + * hull computation. This choice of the start side has a large impact + * on the time it takes to compute the hull. The set of nodes on the + * start side should be as small as possible. + * + * @return ResultTable The result of the TransitivePath operation + */ + ResultTable computeResult() override; + + /** + * @brief Compute the transitive hull starting at the given nodes, + * using the given Map. + * + * @param edges Adjacency lists, mapping Ids (nodes) to their connected + * Ids. + * @param nodes A list of Ids. These Ids are used as starting points for the + * transitive hull. Thus, this parameter guides the performance of this + * algorithm. + * @param target Optional target Id. If supplied, only paths which end + * in this Id are added to the hull. + * @return Map Maps each Id to its connected Ids in the transitive hull + */ + Map transitiveHull(const BinSearchMap& edges, + const std::vector& startNodes, + std::optional target) const; + + /** + * @brief Fill the given table with the transitive hull and use the + * startSideTable to fill in the rest of the columns. + * This function is called if the start side is bound and a variable. + * + * @tparam WIDTH The number of columns of the result table. + * @tparam START_WIDTH The number of columns of the start table. + * @param table The result table which will be filled. + * @param hull The transitive hull. + * @param nodes The start nodes of the transitive hull. These need to be in + * the same order and amount as the starting side nodes in the startTable. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull + * @param startSideTable An IdTable that holds other results. The other + * results will be transferred to the new result table. + * @param skipCol This column contains the Ids of the start side in the + * startSideTable and will be skipped. + */ + template + static void fillTableWithHull(IdTableStatic& table, const Map& hull, + std::vector& nodes, size_t startSideCol, + size_t targetSideCol, + const IdTable& startSideTable, size_t skipCol); + + /** + * @brief Fill the given table with the transitive hull. + * This function is called if the sides are unbound or ids. + * + * @tparam WIDTH The number of columns of the result table. + * @param table The result table which will be filled. + * @param hull The transitive hull. + * @param startSideCol The column of the result table for the startSide of the + * hull + * @param targetSideCol The column of the result table for the targetSide of + * the hull + */ + template + static void fillTableWithHull(IdTableStatic& table, const Map& hull, + size_t startSideCol, size_t targetSideCol); + + /** + * @brief Prepare a Map and a nodes vector for the transitive hull + * computation. + * + * @tparam SUB_WIDTH Number of columns of the sub table + * @tparam SIDE_WIDTH Number of columns of the startSideTable + * @param sub The sub table result + * @param startSide The TransitivePathSide where the edges start + * @param targetSide The TransitivePathSide where the edges end + * @param startSideTable An IdTable containing the Ids for the startSide + * @return std::pair> A Map and Id vector (nodes) for the + * transitive hull computation + */ + template + std::pair> setupMapAndNodes( + const IdTable& sub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide, + const IdTable& startSideTable) const; + + /** + * @brief Prepare a Map and a nodes vector for the transitive hull + * computation. + * + * @tparam SUB_WIDTH Number of columns of the sub table + * @param sub The sub table result + * @param startSide The TransitivePathSide where the edges start + * @param targetSide The TransitivePathSide where the edges end + * @return std::pair> A Map and Id vector (nodes) for the + * transitive hull computation + */ + template + std::pair> setupMapAndNodes( + const IdTable& sub, const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const; + + // initialize the map from the subresult + template + BinSearchMap setupEdgesMap(const IdTable& dynSub, + const TransitivePathSide& startSide, + const TransitivePathSide& targetSide) const; + + // initialize a vector for the starting nodes (Ids) + template + static std::span setupNodes(const IdTable& table, size_t col); + + // Copy the columns from the input table to the output table + template + static void copyColumns(const IdTableView& inputTable, + IdTableStatic& outputTable, + size_t inputRow, size_t outputRow, size_t skipCol); + + // A small helper function: Insert the `value` to the set at `map[key]`. + // As the sets all have an allocator with memory limit, this construction is a + // little bit more involved, so this can be a separate helper function. + void insertIntoMap(Map& map, Id key, Id value) const; +}; diff --git a/src/engine/TransitivePathFallback.h b/src/engine/TransitivePathFallback.h index 9c2e1c9e11..75680ead86 100644 --- a/src/engine/TransitivePathFallback.h +++ b/src/engine/TransitivePathFallback.h @@ -70,10 +70,6 @@ class TransitivePathFallback : public TransitivePathBase { const TransitivePathSide& startSide, const TransitivePathSide& targetSide) const; - // void computeTransitivePath( - // IdTable* res, const IdTable& sub, const TransitivePathSide& startSide, - // const TransitivePathSide& targetSide) const override; - private: /** * @brief Compute the result for this TransitivePath operation diff --git a/src/engine/TransitivePathGraphblas.h b/src/engine/TransitivePathGraphblas.h index 65c1b60800..2a37107439 100644 --- a/src/engine/TransitivePathGraphblas.h +++ b/src/engine/TransitivePathGraphblas.h @@ -82,10 +82,6 @@ class TransitivePathGraphblas : public TransitivePathBase { const TransitivePathSide& startSide, const TransitivePathSide& targetSide) const; - // void computeTransitivePath( - // IdTable* res, const IdTable& sub, const TransitivePathSide& startSide, - // const TransitivePathSide& targetSide) const override; - private: /** * @brief Compute the result for this TransitivePath operation diff --git a/src/global/Constants.h b/src/global/Constants.h index 284c13f8f9..8122fc7624 100644 --- a/src/global/Constants.h +++ b/src/global/Constants.h @@ -240,6 +240,7 @@ inline auto& RuntimeParameters() { 30s}), SizeT<"lazy-index-scan-max-size-materialization">{1'000'000}, Bool<"use-graphblas">{false}, + Bool<"use-binsearch">{true}, Bool<"use-group-by-hash-map-optimization">{false}}; }(); return params; diff --git a/test/TransitivePathTest.cpp b/test/TransitivePathTest.cpp index ced9ab1379..ef69e51b07 100644 --- a/test/TransitivePathTest.cpp +++ b/test/TransitivePathTest.cpp @@ -4,15 +4,20 @@ #include +#include + #include "./IndexTestHelpers.h" #include "./util/AllocatorTestHelpers.h" #include "./util/IdTestHelpers.h" +#include "engine/QueryExecutionTree.h" #include "engine/TransitivePathBase.h" +#include "engine/ValuesForTesting.h" using ad_utility::testing::getQec; using ad_utility::testing::makeAllocator; namespace { auto V = ad_utility::testing::VocabId; +using Vars = std::vector>; // First sort both of the inputs and then ASSERT their equality. Needed for // results of the TransitivePath operations which have a non-deterministic order @@ -36,6 +41,17 @@ void assertSameUnorderedContent(const IdTable& a, const IdTable& b) { } } // namespace +std::shared_ptr makePath(IdTable input, Vars vars, + TransitivePathSide& left, + TransitivePathSide& right, + size_t minDist, size_t maxDist) { + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(input), vars); + return TransitivePathBase::makeTransitivePath(qec, subtree, left, right, + minDist, maxDist); +} + TEST(TransitivePathTest, idToId) { IdTable sub(2, makeAllocator()); sub.push_back({V(0), V(1)}); @@ -43,19 +59,16 @@ TEST(TransitivePathTest, idToId) { sub.push_back({V(1), V(3)}); sub.push_back({V(2), V(3)}); - IdTable result(2, makeAllocator()); - IdTable expected(2, makeAllocator()); expected.push_back({V(0), V(3)}); TransitivePathSide left(std::nullopt, 0, V(0), 0); TransitivePathSide right(std::nullopt, 1, V(3), 1); - auto T = TransitivePathBase::makeTransitivePath( - getQec(), nullptr, left, right, 1, std::numeric_limits::max()); + auto T = makePath(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, std::numeric_limits::max()); - // T->computeTransitivePath<2, 2>(&result, sub, left, right); - // T->computeTransitivePath(&result, sub, left, right); - assertSameUnorderedContent(expected, result); + auto resultTable = T->computeResultOnlyForTesting(); + assertSameUnorderedContent(expected, resultTable.idTable()); } TEST(TransitivePathTest, idToVar) { @@ -65,8 +78,6 @@ TEST(TransitivePathTest, idToVar) { sub.push_back({V(1), V(3)}); sub.push_back({V(2), V(3)}); - IdTable result(2, makeAllocator()); - IdTable expected(2, makeAllocator()); expected.push_back({V(0), V(1)}); expected.push_back({V(0), V(2)}); @@ -74,22 +85,40 @@ TEST(TransitivePathTest, idToVar) { TransitivePathSide left(std::nullopt, 0, V(0), 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - auto T = TransitivePathBase::makeTransitivePath( - getQec(), nullptr, left, right, 1, std::numeric_limits::max()); + auto T = makePath(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, std::numeric_limits::max()); - // T.computeTransitivePath<2, 2>(&result, sub, left, right); - // T->computeTransitivePath(&result, sub, left, right); - assertSameUnorderedContent(expected, result); + auto resultTable = T->computeResultOnlyForTesting(); + assertSameUnorderedContent(expected, resultTable.idTable()); } -TEST(TransitivePathTest, varTovar) { +TEST(TransitivePathTest, varToId) { IdTable sub(2, makeAllocator()); sub.push_back({V(0), V(1)}); sub.push_back({V(1), V(2)}); sub.push_back({V(1), V(3)}); sub.push_back({V(2), V(3)}); - IdTable result(2, makeAllocator()); + IdTable expected(2, makeAllocator()); + expected.push_back({V(2), V(3)}); + expected.push_back({V(1), V(3)}); + expected.push_back({V(0), V(3)}); + + TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); + TransitivePathSide right(std::nullopt, 1, V(3), 1); + auto T = makePath(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, std::numeric_limits::max()); + + auto resultTable = T->computeResultOnlyForTesting(); + assertSameUnorderedContent(expected, resultTable.idTable()); +} + +TEST(TransitivePathTest, varTovar) { + IdTable sub(2, makeAllocator()); + sub.push_back({V(0), V(1)}); + sub.push_back({V(1), V(2)}); + sub.push_back({V(1), V(3)}); + sub.push_back({V(2), V(3)}); IdTable expected(2, makeAllocator()); expected.push_back({V(0), V(1)}); @@ -101,11 +130,11 @@ TEST(TransitivePathTest, varTovar) { TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - auto T = TransitivePathBase::makeTransitivePath( - getQec(), nullptr, right, left, 1, std::numeric_limits::max()); + auto T = makePath(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, std::numeric_limits::max()); - // T->computeTransitivePath(&result, sub, left, right); - assertSameUnorderedContent(expected, result); + auto resultTable = T->computeResultOnlyForTesting(); + assertSameUnorderedContent(expected, resultTable.idTable()); } TEST(TransitivePathTest, unlimitedMaxLength) { @@ -119,8 +148,6 @@ TEST(TransitivePathTest, unlimitedMaxLength) { // Disconnected component. sub.push_back({V(10), V(11)}); - IdTable result(2, makeAllocator()); - IdTable expected(2, makeAllocator()); expected.push_back({V(0), V(2)}); expected.push_back({V(0), V(4)}); @@ -143,14 +170,14 @@ TEST(TransitivePathTest, unlimitedMaxLength) { TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - auto T = TransitivePathBase::makeTransitivePath( - getQec(), nullptr, left, right, 1, std::numeric_limits::max()); + auto T = makePath(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, std::numeric_limits::max()); - // T->computeTransitivePath(&result, sub, left, right); - assertSameUnorderedContent(expected, result); + auto resultTable = T->computeResultOnlyForTesting(); + assertSameUnorderedContent(expected, resultTable.idTable()); } -TEST(TransitivePathTest, maxLength2) { +TEST(TransitivePathTest, maxLength2FromVariable) { IdTable sub(2, makeAllocator()); sub.push_back({V(0), V(2)}); sub.push_back({V(2), V(4)}); @@ -161,8 +188,6 @@ TEST(TransitivePathTest, maxLength2) { // Disconnected component. sub.push_back({V(10), V(11)}); - IdTable result(2, makeAllocator()); - IdTable expected(2, makeAllocator()); expected.push_back({V(0), V(2)}); @@ -181,32 +206,57 @@ TEST(TransitivePathTest, maxLength2) { TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); - auto T = TransitivePathBase::makeTransitivePath(getQec(), nullptr, left, - right, 1, 2); - // T.computeTransitivePath<2, 2>(&result, sub, left, right); - // T->computeTransitivePath(&result, sub, left, right); - assertSameUnorderedContent(expected, result); - - result.clear(); - expected.clear(); + auto T = makePath(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, 2); + auto resultTable = T->computeResultOnlyForTesting(); + assertSameUnorderedContent(expected, resultTable.idTable()); +} + +TEST(TransitivePathTest, maxLength2FromId) { + IdTable sub(2, makeAllocator()); + sub.push_back({V(0), V(2)}); + sub.push_back({V(2), V(4)}); + sub.push_back({V(4), V(7)}); + sub.push_back({V(0), V(7)}); + sub.push_back({V(3), V(3)}); + sub.push_back({V(7), V(0)}); + // Disconnected component. + sub.push_back({V(10), V(11)}); + + IdTable expected(2, makeAllocator()); + expected.push_back({V(7), V(0)}); expected.push_back({V(7), V(2)}); expected.push_back({V(7), V(7)}); - left.value_ = V(7); - right.value_ = Variable{"?target"}; - // T.computeTransitivePath<2, 2>(&result, sub, left, right); - // T->computeTransitivePath(&result, sub, left, right); - assertSameUnorderedContent(expected, result); + TransitivePathSide left(std::nullopt, 0, V(7), 0); + TransitivePathSide right(std::nullopt, 1, Variable{"?target"}, 1); + auto T = makePath(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, 2); + auto resultTable = T->computeResultOnlyForTesting(); + assertSameUnorderedContent(expected, resultTable.idTable()); +} + +TEST(TransitivePathTest, maxLength2ToId) { + IdTable sub(2, makeAllocator()); + sub.push_back({V(0), V(2)}); + sub.push_back({V(2), V(4)}); + sub.push_back({V(4), V(7)}); + sub.push_back({V(0), V(7)}); + sub.push_back({V(3), V(3)}); + sub.push_back({V(7), V(0)}); + // Disconnected component. + sub.push_back({V(10), V(11)}); + + IdTable expected(2, makeAllocator()); - result.clear(); - expected.clear(); expected.push_back({V(0), V(2)}); expected.push_back({V(7), V(2)}); - left.value_ = Variable{"?start"}; - right.value_ = V(2); - // T.computeTransitivePath<2, 2>(&result, sub, right, left); - // T->computeTransitivePath(&result, sub, right, left); - assertSameUnorderedContent(expected, result); + TransitivePathSide left(std::nullopt, 0, Variable{"?start"}, 0); + TransitivePathSide right(std::nullopt, 1, V(2), 1); + auto T = makePath(std::move(sub), {Variable{"?start"}, Variable{"?target"}}, + left, right, 1, 2); + auto resultTable = T->computeResultOnlyForTesting(); + assertSameUnorderedContent(expected, resultTable.idTable()); } From 4e64224119fdd0238ebf6e97fcdee3bb9c7a9714 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Wed, 13 Mar 2024 16:38:22 +0100 Subject: [PATCH 37/92] Fix merge conflicts. --- test/TransitivePathTest.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/TransitivePathTest.cpp b/test/TransitivePathTest.cpp index 0766ef5ccb..a60c928a80 100644 --- a/test/TransitivePathTest.cpp +++ b/test/TransitivePathTest.cpp @@ -8,6 +8,7 @@ #include "./util/AllocatorTestHelpers.h" #include "./util/IdTestHelpers.h" +#include "./util/IndexTestHelpers.h" #include "engine/QueryExecutionTree.h" #include "engine/TransitivePathBase.h" #include "engine/ValuesForTesting.h" From b79470dd79071064e97791e0b1ee7b04651c41bf Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Wed, 13 Mar 2024 16:39:08 +0100 Subject: [PATCH 38/92] Moved sort to constructor --- src/engine/TransitivePathBase.cpp | 12 +++++++----- src/engine/TransitivePathBinSearch.cpp | 8 +++++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/engine/TransitivePathBase.cpp b/src/engine/TransitivePathBase.cpp index 6c2e9297c7..4d22ae93ef 100644 --- a/src/engine/TransitivePathBase.cpp +++ b/src/engine/TransitivePathBase.cpp @@ -241,14 +241,16 @@ std::shared_ptr TransitivePathBase::bindLeftOrRightSide( // `Operation`, which would then ignore the changes in `variableColumnMap_` // made below (see `Operation::getInternallyVisibleVariableColumns` and // `Operation::getExternallyVariableColumns`). - std::shared_ptr p = - TransitivePathBase::makeTransitivePath(getExecutionContext(), subtree_, - lhs_, rhs_, minDist_, maxDist_); + auto lhs = lhs_; + auto rhs = rhs_; if (isLeft) { - p->lhs_.treeAndCol_ = {leftOrRightOp, inputCol}; + lhs.treeAndCol_ = {leftOrRightOp, inputCol}; } else { - p->rhs_.treeAndCol_ = {leftOrRightOp, inputCol}; + rhs.treeAndCol_ = {leftOrRightOp, inputCol}; } + std::shared_ptr p = + TransitivePathBase::makeTransitivePath(getExecutionContext(), subtree_, + lhs, rhs, minDist_, maxDist_); // Note: The `variable` in the following structured binding is `const`, even // if we bind by value. We deliberately make one unnecessary copy of the diff --git a/src/engine/TransitivePathBinSearch.cpp b/src/engine/TransitivePathBinSearch.cpp index a64899a5fd..4366a99e0e 100644 --- a/src/engine/TransitivePathBinSearch.cpp +++ b/src/engine/TransitivePathBinSearch.cpp @@ -20,7 +20,11 @@ TransitivePathBinSearch::TransitivePathBinSearch( QueryExecutionContext* qec, std::shared_ptr child, const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, size_t minDist, size_t maxDist) - : TransitivePathBase(qec, child, leftSide, rightSide, minDist, maxDist) {} + : TransitivePathBase(qec, child, leftSide, rightSide, minDist, maxDist) { + auto [startSide, targetSide] = decideDirection(); + subtree_ = QueryExecutionTree::createSortedTree( + subtree_, {startSide.subCol_, targetSide.subCol_}); +} // _____________________________________________________________________________ template @@ -130,8 +134,6 @@ ResultTable TransitivePathBinSearch::computeResult() { "not supported"); } auto [startSide, targetSide] = decideDirection(); - subtree_ = QueryExecutionTree::createSortedTree( - subtree_, {startSide.subCol_, targetSide.subCol_}); shared_ptr subRes = subtree_->getResult(); IdTable idTable{allocator()}; From 965d25a34442476d128a86829c7addc9b03b10c8 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Sat, 16 Mar 2024 11:00:17 +0100 Subject: [PATCH 39/92] Removed GraphBlas references and implementation --- Dockerfile | 4 +- src/engine/CMakeLists.txt | 6 +- src/engine/GrbGlobalContext.h | 31 --- src/engine/GrbMatrix.cpp | 249 ----------------- src/engine/GrbMatrix.h | 194 ------------- src/engine/TransitivePathBase.cpp | 11 +- src/engine/TransitivePathBase.h | 14 +- src/engine/TransitivePathGraphblas.cpp | 370 ------------------------- src/engine/TransitivePathGraphblas.h | 211 -------------- src/global/Constants.h | 1 - test/CMakeLists.txt | 2 - test/GrbMatrixTest.cpp | 188 ------------- 12 files changed, 15 insertions(+), 1266 deletions(-) delete mode 100644 src/engine/GrbGlobalContext.h delete mode 100644 src/engine/GrbMatrix.cpp delete mode 100644 src/engine/GrbMatrix.h delete mode 100644 src/engine/TransitivePathGraphblas.cpp delete mode 100644 src/engine/TransitivePathGraphblas.h delete mode 100644 test/GrbMatrixTest.cpp diff --git a/Dockerfile b/Dockerfile index 0af15c76f8..611c014507 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository -y ppa:mhier/libboost-latest FROM base as builder -RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev libgraphblas-dev +RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev COPY . /app/ @@ -21,7 +21,7 @@ RUN ctest --rerun-failed --output-on-failure FROM base as runtime WORKDIR /app ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev libgraphblas-dev +RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev ARG UID=1000 RUN groupadd -r qlever && useradd --no-log-init -r -u $UID -g qlever qlever && chown qlever:qlever /app diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index e16e00a210..6d3ca3de66 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -8,9 +8,9 @@ add_library(engine Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp OptionalJoin.cpp CountAvailablePredicates.cpp GroupBy.cpp HasPredicateScan.cpp Union.cpp MultiColumnJoin.cpp TransitivePathBase.cpp TransitivePathFallback.cpp - TransitivePathGraphblas.cpp TransitivePathBinSearch.cpp Service.cpp + TransitivePathBinSearch.cpp Service.cpp Values.cpp Bind.cpp Minus.cpp RuntimeInformation.cpp CheckUsePatternTrick.cpp VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp - idTable/CompressedExternalIdTable.h GrbMatrix.cpp) -qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams graphblas) + idTable/CompressedExternalIdTable.h) +qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams) diff --git a/src/engine/GrbGlobalContext.h b/src/engine/GrbGlobalContext.h deleted file mode 100644 index b979c7b117..0000000000 --- a/src/engine/GrbGlobalContext.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2024, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Herrmann (johannes.roland.herrmann@mars.uni-freiburg.de) - -extern "C" { -#include -} - -/** - * @class GrbGlobalContext - * @brief This Singleton class is based on the design by Scott Meyers. The basic - * idea is that the singleton object exists in a 'magic' state within the - * getContext() function. This is threadsafe. - * - * Reference: - * https://laristra.github.io/flecsi/src/developer-guide/patterns/meyers_singleton.html - * - */ -class GrbGlobalContext { - GrbGlobalContext() { GrB_init(GrB_NONBLOCKING); } - ~GrbGlobalContext() { GrB_finalize(); } - - public: - static GrbGlobalContext& getContext() { - static GrbGlobalContext context; - return context; - } - - GrbGlobalContext(const GrbGlobalContext&) = delete; - GrbGlobalContext& operator=(const GrbGlobalContext&) = delete; -}; diff --git a/src/engine/GrbMatrix.cpp b/src/engine/GrbMatrix.cpp deleted file mode 100644 index 1b6c1b19cf..0000000000 --- a/src/engine/GrbMatrix.cpp +++ /dev/null @@ -1,249 +0,0 @@ -// Copyright 2024, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Herrmann (johannes.roland.herrmann@mars.uni-freiburg.de) - -#include "GrbMatrix.h" - -extern "C" { -#include -} - -#include -#include - -#include "util/Exception.h" - -// _____________________________________________________________________________ -GrbMatrix::GrbMatrix(size_t numRows, size_t numCols) { - auto info = GrB_Matrix_new(rawMatrix(), GrB_BOOL, numRows, numCols); - handleError(info); -} - -// _____________________________________________________________________________ -GrbMatrix GrbMatrix::clone() const { - GrbMatrix matrixCopy; - auto info = - GrB_Matrix_new(matrixCopy.rawMatrix(), GrB_BOOL, numRows(), numCols()); - handleError(info); - info = GrB_Matrix_dup(matrixCopy.rawMatrix(), matrix()); - handleError(info); - - return matrixCopy; -} - -// _____________________________________________________________________________ -void GrbMatrix::setElement(size_t row, size_t col, bool value) { - auto info = GrB_Matrix_setElement_BOOL(matrix(), value, row, col); - handleError(info); -} - -// _____________________________________________________________________________ -bool GrbMatrix::getElement(size_t row, size_t col) const { - bool result; - auto info = GrB_Matrix_extractElement_BOOL(&result, matrix(), row, col); - if (info == GrB_NO_VALUE) { - return false; - } - handleError(info); - return result; -} - -// _____________________________________________________________________________ -GrbMatrix GrbMatrix::build(const std::vector& rowIndices, - const std::vector& colIndices, - size_t numRows, size_t numCols) { - auto matrix = GrbMatrix(numRows, numCols); - GrB_Index nvals = rowIndices.size(); - if (nvals == 0) { - return matrix; - } - - std::unique_ptr values{new bool[nvals]()}; - for (size_t i = 0; i < nvals; i++) { - values[i] = true; - } - auto info = GrB_Matrix_build_BOOL(matrix.matrix(), rowIndices.data(), - colIndices.data(), values.get(), nvals, - GxB_IGNORE_DUP); - GrbMatrix::handleError(info); - return matrix; -} - -// _____________________________________________________________________________ -GrbMatrix GrbMatrix::diag(size_t nvals) { - auto result = GrbMatrix(nvals, nvals); - - for (size_t i = 0; i < nvals; i++) { - result.setElement(i, i, true); - } - - return result; -} - -// _____________________________________________________________________________ -std::pair, std::vector> GrbMatrix::extractTuples() - const { - size_t nvals = numNonZero(); - std::vector rowIndices; - rowIndices.resize(nvals); - std::vector colIndices; - colIndices.resize(nvals); - std::unique_ptr values{new bool[nvals]()}; - auto info = GrB_Matrix_extractTuples_BOOL( - rowIndices.data(), colIndices.data(), values.get(), &nvals, matrix()); - handleError(info); - - return {rowIndices, colIndices}; -} - -// _____________________________________________________________________________ -std::vector GrbMatrix::extractColumn(size_t colIndex) const { - return extract(colIndex, GrB_NULL); -} - -// _____________________________________________________________________________ -std::vector GrbMatrix::extractRow(size_t rowIndex) const { - // The descriptor GrB_DESC_T0 transposes the second input, which is the matrix - return extract(rowIndex, GrB_DESC_T0); -} - -// _____________________________________________________________________________ -size_t GrbMatrix::numNonZero() const { - size_t nvals; - auto info = GrB_Matrix_nvals(&nvals, matrix()); - handleError(info); - return nvals; -} - -// _____________________________________________________________________________ -size_t GrbMatrix::numRows() const { - size_t nrows; - auto info = GrB_Matrix_nrows(&nrows, matrix()); - handleError(info); - return nrows; -} - -// _____________________________________________________________________________ -size_t GrbMatrix::numCols() const { - size_t ncols; - auto info = GrB_Matrix_ncols(&ncols, matrix()); - handleError(info); - return ncols; -} - -// _____________________________________________________________________________ -GrbMatrix GrbMatrix::transpose() const { - GrbMatrix transposed; - auto info = - GrB_Matrix_new(transposed.rawMatrix(), GrB_BOOL, numCols(), numRows()); - handleError(info); - info = GrB_transpose(transposed.matrix(), GrB_NULL, GrB_NULL, matrix(), - GrB_NULL); - handleError(info); - - return transposed; -} - -// _____________________________________________________________________________ -void GrbMatrix::accumulateMultiply(const GrbMatrix& otherMatrix) const { - auto info = GrB_mxm(matrix(), GrB_NULL, GrB_LOR, GrB_LOR_LAND_SEMIRING_BOOL, - matrix(), otherMatrix.matrix(), GrB_NULL); - handleError(info); -} - -// _____________________________________________________________________________ -GrbMatrix GrbMatrix::multiply(const GrbMatrix& otherMatrix) const { - size_t resultNumRows = numRows(); - size_t resultNumCols = otherMatrix.numCols(); - GrbMatrix result; - auto info = GrB_Matrix_new(result.rawMatrix(), GrB_BOOL, resultNumRows, - resultNumCols); - handleError(info); - - info = - GrB_mxm(result.matrix(), GrB_NULL, GrB_NULL, GrB_LOR_LAND_SEMIRING_BOOL, - matrix(), otherMatrix.matrix(), GrB_NULL); - handleError(info); - - return result; -} - -// _____________________________________________________________________________ -GrB_Matrix* GrbMatrix::rawMatrix() const { - if (matrix_.get() != nullptr) { - return matrix_.get(); - } - AD_THROW("GrbMatrix error: internal GrB_Matrix is null"); -} - -// _____________________________________________________________________________ -std::vector GrbMatrix::extract(size_t index, - GrB_Descriptor desc) const { - GrB_Vector vector; - size_t vectorSize; - if (desc == GrB_NULL) { - vectorSize = numRows(); - } else { - vectorSize = numCols(); - } - auto info = GrB_Vector_new(&vector, GrB_BOOL, vectorSize); - handleError(info); - - info = GrB_Col_extract(vector, GrB_NULL, GrB_NULL, matrix(), GrB_ALL, - vectorSize, index, desc); - handleError(info); - - size_t vectorNvals; - info = GrB_Vector_nvals(&vectorNvals, vector); - handleError(info); - - std::vector indices; - indices.resize(vectorNvals); - info = GrB_Vector_extractTuples_BOOL(indices.data(), nullptr, &vectorNvals, - vector); - handleError(info); - - info = GrB_Vector_free(&vector); - handleError(info); - - return indices; -} - -// _____________________________________________________________________________ -void GrbMatrix::handleError(GrB_Info info) { - switch (info) { - case GrB_SUCCESS: - return; - case GrB_NO_VALUE: - AD_THROW("GraphBLAS error: entry does not appear in the matrix"); - case GrB_UNINITIALIZED_OBJECT: - AD_THROW("GraphBLAS error: object has not been initialized"); - case GrB_NULL_POINTER: - AD_THROW("GraphBLAS error: input pointer is NULL"); - case GrB_INVALID_VALUE: - AD_THROW("GraphBLAS error: generic error code; some value is bad"); - case GrB_INVALID_INDEX: - AD_THROW("GraphBLAS error: a row or column index is out of bounds"); - case GrB_DOMAIN_MISMATCH: - AD_THROW("GraphBLAS error: object domains are not compatible"); - case GrB_DIMENSION_MISMATCH: - AD_THROW("GraphBLAS error: matrix dimensions do not match"); - case GrB_OUTPUT_NOT_EMPTY: - AD_THROW("GraphBLAS error: output matrix already has values in it"); - case GrB_NOT_IMPLEMENTED: - AD_THROW("GraphBLAS error: not implemented in SuiteSparse:GraphBLAS"); - case GrB_PANIC: - AD_THROW("GraphBLAS error: unrecoverable error"); - case GrB_OUT_OF_MEMORY: - AD_THROW("GraphBLAS error: out of memory"); - case GrB_INSUFFICIENT_SPACE: - AD_THROW("GraphBLAS error: output array not large enough"); - case GrB_INVALID_OBJECT: - AD_THROW("GraphBLAS error: object is corrupted"); - case GrB_INDEX_OUT_OF_BOUNDS: - AD_THROW("GraphBLAS error: a row or column is out of bounds"); - case GrB_EMPTY_OBJECT: - AD_THROW("GraphBLAS error: an input scalar has no entry"); - } - AD_FAIL(); -} diff --git a/src/engine/GrbMatrix.h b/src/engine/GrbMatrix.h deleted file mode 100644 index 82268e612f..0000000000 --- a/src/engine/GrbMatrix.h +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright 2024, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Herrmann (johannes.roland.herrmann@mars.uni-freiburg.de) - -#pragma once - -#include -extern "C" { -#include -} - -#include -#include - -/** - * @class GrbMatrix - * @brief This class wraps the functionality of the GraphBLAS object GrB_Matrix. - * Currently only boolean matrices are supported. - */ -class GrbMatrix { - private: - using MatrixDeleter = - decltype([](GrB_Matrix* matrix) { GrB_Matrix_free(matrix); }); - using MatrixPtr = std::unique_ptr; - MatrixPtr matrix_ = - std::unique_ptr(new GrB_Matrix()); - - public: - /** - * @brief Construct a matrix with the given dimensions - * - * @param numRows - * @param numCols - */ - GrbMatrix(size_t numRows, size_t numCols); - - GrbMatrix() = default; - - // Move constructor - GrbMatrix(GrbMatrix&& otherMatrix) = default; - GrbMatrix& operator=(GrbMatrix&&) = default; - - // Disable copy constructor and assignment operator - GrbMatrix(const GrbMatrix&) = delete; - GrbMatrix& operator=(const GrbMatrix&) = delete; - - /** - * @brief Create a matrix and fill it with the data of this matrix. - * - * @return GrbMatrix duplicate matrix - */ - GrbMatrix clone() const; - - /** - * @brief Set an element in the matrix to a specified value. - * - * @param row Row index, must be smaller than numRows() - * @param col Column index, must be smaller than numCols() - * @param value Boolean, which value to set - */ - void setElement(size_t row, size_t col, bool value); - - /** - * @brief Get an element from the matrix. - * - * @param row Row index, must be smaller than numRows() - * @param col Column index, must be smaller than numCols() - * @return Boolean value - */ - bool getElement(size_t row, size_t col) const; - - /** - * @brief Create a matrix from the given lists of indices. For each given pair - * of indices, the corresponding entry in the result matrix is set to true. - * All other entries are false (by default). - * The vectors rowIndices and colIndices have to be the same length. Their - * entries have to be smaller than numRows and numCols respectively. - * - * @param rowIndices Vector of row indices, entries must be smaller than - * numRows - * @param colIndices Vector of column indices, entries must be smaller than - * numCols - * @param numRows Number of rows of the result matrix - * @param numCols Number of columns of the result matrix - * @return New matrix with given entries set to true - */ - static GrbMatrix build(const std::vector& rowIndices, - const std::vector& colIndices, size_t numRows, - size_t numCols); - - /** - * @brief Create a square, diagonal matrix. All entries on the diagonal are - * set to true, all others to false. The resulting matrix will have nvals rows - * and columns. - * - * @param nvals - * @return - */ - static GrbMatrix diag(size_t nvals); - - /** - * @brief Extract all true entries from the matrix. The first entry in the - * pair is the row index, the second entry is the column index. - */ - std::pair, std::vector> extractTuples() const; - - /** - * @brief Extract a column from the matrix. Returns all row indices where this - * column's entries are true. - * - * @param colIndex - */ - std::vector extractColumn(size_t colIndex) const; - - /** - * @brief Extract a row from the matrix. Returns all column indices where this - * rows's entries are true. - * - * @param rowIndex - */ - std::vector extractRow(size_t rowIndex) const; - - /** - * @brief Number of "true" values in the matrix. - * - * @return - */ - size_t numNonZero() const; - - /** - * @brief Number of rows of the matrix. - * - * @return - */ - size_t numRows() const; - - /** - * @brief Number of columns of the matrix. - * - * @return - */ - size_t numCols() const; - - /** - * @brief Create a new matrix, which is the transpose of this matrix. - * - * @return - */ - GrbMatrix transpose() const; - - /** - * @brief Multiply this matrix with the other matrix and accumulate the result - * in this matrix. Logical or is used for accumulation. - * - * @param otherMatrix - */ - void accumulateMultiply(const GrbMatrix& otherMatrix) const; - - /** - * @brief Multiply this matrix with another matrix and write the result to a - * new matrix. - * - * @param otherMatrix - * @return - */ - GrbMatrix multiply(const GrbMatrix& otherMatrix) const; - - /** - * @brief Get a reference to the internal matrix. - * - * @return - */ - GrB_Matrix& matrix() const { return *matrix_; } - - private: - /** - * @brief Get a raw pointer to the internal matrix. If this pointer is the - * nullptr, an Exception is thrown. - * - * @return - */ - GrB_Matrix* rawMatrix() const; - - std::vector extract(size_t index, GrB_Descriptor desc) const; - - /** - * @brief Handle the GrB_Info object. GrB_SUCCESS is ignored, all other return - * valus cause an Exception. - * See also GraphBLAS userguide, section 5.5 - * - * @param info - */ - static void handleError(GrB_Info info); -}; diff --git a/src/engine/TransitivePathBase.cpp b/src/engine/TransitivePathBase.cpp index 4d22ae93ef..1f90bcffd9 100644 --- a/src/engine/TransitivePathBase.cpp +++ b/src/engine/TransitivePathBase.cpp @@ -13,7 +13,6 @@ #include "engine/IndexScan.h" #include "engine/TransitivePathBinSearch.h" #include "engine/TransitivePathFallback.h" -#include "engine/TransitivePathGraphblas.h" #include "global/Constants.h" #include "util/Exception.h" @@ -192,21 +191,17 @@ std::shared_ptr TransitivePathBase::makeTransitivePath( QueryExecutionContext* qec, std::shared_ptr child, const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, size_t minDist, size_t maxDist) { - bool useGraphblas = RuntimeParameters().get<"use-graphblas">(); bool useBinSearch = RuntimeParameters().get<"use-binsearch">(); return makeTransitivePath(qec, child, leftSide, rightSide, minDist, maxDist, - useGraphblas, useBinSearch); + useBinSearch); } // _____________________________________________________________________________ std::shared_ptr TransitivePathBase::makeTransitivePath( QueryExecutionContext* qec, std::shared_ptr child, const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, - size_t minDist, size_t maxDist, bool useGraphblas, bool useBinSearch) { - if (useGraphblas) { - return std::make_shared( - qec, child, leftSide, rightSide, minDist, maxDist); - } else if (useBinSearch) { + size_t minDist, size_t maxDist, bool useBinSearch) { + if (useBinSearch) { return std::make_shared( qec, child, leftSide, rightSide, minDist, maxDist); } else { diff --git a/src/engine/TransitivePathBase.h b/src/engine/TransitivePathBase.h index a55ab724e3..09251d5742 100644 --- a/src/engine/TransitivePathBase.h +++ b/src/engine/TransitivePathBase.h @@ -128,8 +128,8 @@ class TransitivePathBase : public Operation { /** * @brief Make a concrete TransitivePath object using the given parameters. - * The concrete object will either be TransitivePathBase or - * TransitivePathGraphblas, depending on the useGraphblas flag. + * The concrete object will either be TransitivePathFallback or + * TransitivePathBinSearch, depending on the useBinSearch flag. * * @param qec QueryExecutionContext for the TransitivePath Operation * @param child QueryExecutionTree for the subquery of the TransitivePath @@ -139,18 +139,18 @@ class TransitivePathBase : public Operation { * number of nodes) * @param maxDist Maximum distance a resulting path may have (distance = * number of nodes) - * @param useGraphblas If true, the returned object will be a - * TransitivePathGraphblas. Else it will be a TransitivePathFallback + * @param useBinSearch If true, the returned object will be a + * TransitivePathBinSearch. Else it will be a TransitivePathFallback */ static std::shared_ptr makeTransitivePath( QueryExecutionContext* qec, std::shared_ptr child, const TransitivePathSide& leftSide, const TransitivePathSide& rightSide, - size_t minDist, size_t maxDist, bool useGraphblas, bool useBinSearch); + size_t minDist, size_t maxDist, bool useBinSearch); /** * @brief Make a concrete TransitivePath object using the given parameters. - * The concrete object will either be TransitivePathBase or - * TransitivePathGraphblas, depending on the runtime constant "use-graphblas". + * The concrete object will either be TransitivePathFallback or + * TransitivePathBinSearch, depending on the runtime constant "use-binsearch". * * @param qec QueryExecutionContext for the TransitivePath Operation * @param child QueryExecutionTree for the subquery of the TransitivePath diff --git a/src/engine/TransitivePathGraphblas.cpp b/src/engine/TransitivePathGraphblas.cpp deleted file mode 100644 index 5f726a23ae..0000000000 --- a/src/engine/TransitivePathGraphblas.cpp +++ /dev/null @@ -1,370 +0,0 @@ -// Copyright 2019, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) - -#include "TransitivePathGraphblas.h" - -#include -#include -#include -#include - -#include "engine/CallFixedSize.h" -#include "engine/GrbGlobalContext.h" -#include "engine/TransitivePathBase.h" -#include "util/Exception.h" -#include "util/Timer.h" - -// _____________________________________________________________________________ -TransitivePathGraphblas::TransitivePathGraphblas( - QueryExecutionContext* qec, std::shared_ptr child, - TransitivePathSide leftSide, TransitivePathSide rightSide, size_t minDist, - size_t maxDist) - : TransitivePathBase(qec, child, leftSide, rightSide, minDist, maxDist) {} - -// _____________________________________________________________________________ -template -void TransitivePathGraphblas::computeTransitivePathBound( - IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, const IdTable& startSideTable) const { - IdTableStatic res = std::move(*dynRes).toStatic(); - - const IdTableView sub = dynSub.asStaticView(); - decltype(auto) startCol = sub.getColumn(startSide.subCol_); - decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); - - auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); - timer.start(); - - GrbGlobalContext::getContext(); - auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); - - checkCancellation(); - - std::span startNodes = - startSideTable.getColumn(startSide.treeAndCol_->second); - GrbMatrix startNodeMatrix = - setupStartNodeMatrix(startNodes, graph.numRows(), mapping); - - timer.stop(); - auto initTime = timer.msecs(); - timer.start(); - - auto hull = transitiveHull(graph, std::move(startNodeMatrix)); - if (!targetSide.isVariable()) { - Id target = std::get(targetSide.value_); - size_t targetIndex = mapping.getIndex(target); - hull = getTargetRow(hull, targetIndex); - } - - timer.stop(); - auto hullTime = timer.msecs(); - timer.start(); - - TransitivePathGraphblas::fillTableWithHull( - res, hull, mapping, startSideTable, startNodes, startSide.outputCol_, - targetSide.outputCol_, startSide.treeAndCol_.value().second); - - timer.stop(); - auto fillTime = timer.msecs(); - - auto& info = runtimeInfo(); - info.addDetail("Initialization time", initTime.count()); - info.addDetail("Hull time", hullTime.count()); - info.addDetail("IdTable fill time", fillTime.count()); - - *dynRes = std::move(res).toDynamic(); -} - -// _____________________________________________________________________________ -template -void TransitivePathGraphblas::computeTransitivePath( - IdTable* dynRes, const IdTable& dynSub, const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const { - IdTableStatic res = std::move(*dynRes).toStatic(); - - const IdTableView sub = dynSub.asStaticView(); - decltype(auto) startCol = sub.getColumn(startSide.subCol_); - decltype(auto) targetCol = sub.getColumn(targetSide.subCol_); - - auto timer = ad_utility::Timer(ad_utility::Timer::Stopped); - timer.start(); - - GrbGlobalContext::getContext(); - auto [graph, mapping] = setupMatrix(startCol, targetCol, sub.size()); - - timer.stop(); - auto initTime = timer.msecs(); - timer.start(); - - GrbMatrix hull; - if (!startSide.isVariable()) { - std::vector startNode{std::get(startSide.value_)}; - GrbMatrix startMatrix = - setupStartNodeMatrix(startNode, graph.numRows(), mapping); - hull = transitiveHull(graph, std::move(startMatrix)); - } else { - hull = transitiveHull(graph, std::nullopt); - } - - timer.stop(); - auto hullTime = timer.msecs(); - timer.start(); - - if (!targetSide.isVariable()) { - Id target = std::get(targetSide.value_); - size_t targetIndex = mapping.getIndex(target); - hull = getTargetRow(hull, targetIndex); - } - - if (!startSide.isVariable()) { - std::vector startNode{std::get(startSide.value_)}; - TransitivePathGraphblas::fillTableWithHull( - res, hull, mapping, startNode, startSide.outputCol_, - targetSide.outputCol_); - } else { - TransitivePathGraphblas::fillTableWithHull( - res, hull, mapping, startSide.outputCol_, targetSide.outputCol_); - } - - timer.stop(); - auto fillTime = timer.msecs(); - - auto& info = runtimeInfo(); - info.addDetail("Initialization time", initTime.count()); - info.addDetail("Hull time", hullTime.count()); - info.addDetail("IdTable fill time", fillTime.count()); - - *dynRes = std::move(res).toDynamic(); -} - -// _____________________________________________________________________________ -ResultTable TransitivePathGraphblas::computeResult() { - if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() && - rhs_.isVariable()) { - AD_THROW( - "This query might have to evalute the empty path, which is currently " - "not supported"); - } - shared_ptr subRes = subtree_->getResult(); - - IdTable idTable{allocator()}; - - idTable.setNumColumns(getResultWidth()); - - size_t subWidth = subRes->idTable().numColumns(); - - auto computeForOneSide = [this, &idTable, subRes, subWidth]( - auto& boundSide, - auto& otherSide) -> ResultTable { - shared_ptr sideRes = - boundSide.treeAndCol_.value().first->getResult(); - size_t sideWidth = sideRes->idTable().numColumns(); - - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), - &TransitivePathGraphblas::computeTransitivePathBound, this, - &idTable, subRes->idTable(), boundSide, otherSide, - sideRes->idTable()); - - return {std::move(idTable), resultSortedOn(), - ResultTable::getSharedLocalVocabFromNonEmptyOf(*sideRes, *subRes)}; - }; - - if (lhs_.isBoundVariable()) { - return computeForOneSide(lhs_, rhs_); - } else if (rhs_.isBoundVariable()) { - return computeForOneSide(rhs_, lhs_); - // Right side is an Id - } else if (!rhs_.isVariable()) { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePathGraphblas::computeTransitivePath, this, - &idTable, subRes->idTable(), rhs_, lhs_); - // No side is a bound variable, the right side is an unbound variable - // and the left side is either an unbound Variable or an ID. - } else { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePathGraphblas::computeTransitivePath, this, - &idTable, subRes->idTable(), lhs_, rhs_); - } - - // NOTE: The only place, where the input to a transitive path operation is not - // an index scan (which has an empty local vocabulary by default) is the - // `LocalVocabTest`. But it doesn't harm to propagate the local vocab here - // either. - return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; -} - -// _____________________________________________________________________________ -GrbMatrix TransitivePathGraphblas::transitiveHull( - const GrbMatrix& graph, std::optional startNodes) const { - size_t pathLength = 0; - GrbMatrix result; - - if (startNodes) { - result = std::move(startNodes.value()); - } else { - result = GrbMatrix::diag(graph.numRows()); - } - - if (minDist_ > 0) { - result = result.multiply(graph); - pathLength++; - } - - size_t previousNvals = 0; - size_t nvals = result.numNonZero(); - while (nvals > previousNvals && pathLength < maxDist_) { - previousNvals = result.numNonZero(); - result.accumulateMultiply(graph); - checkCancellation(); - nvals = result.numNonZero(); - pathLength++; - } - return result; -} - -// _____________________________________________________________________________ -template -void TransitivePathGraphblas::fillTableWithHull(IdTableStatic& table, - const GrbMatrix& hull, - const IdMapping& mapping, - size_t startSideCol, - size_t targetSideCol) { - auto [rowIndices, colIndices] = hull.extractTuples(); - - for (size_t i = 0; i < rowIndices.size(); i++) { - table.emplace_back(); - auto startIndex = rowIndices[i]; - auto targetIndex = colIndices[i]; - Id startId = mapping.getId(startIndex); - Id targetId = mapping.getId(targetIndex); - table(i, startSideCol) = startId; - table(i, targetSideCol) = targetId; - } -} - -// _____________________________________________________________________________ -template -void TransitivePathGraphblas::fillTableWithHull(IdTableStatic& table, - const GrbMatrix& hull, - const IdMapping& mapping, - std::span startNodes, - size_t startSideCol, - size_t targetSideCol) { - size_t resultRowIndex = 0; - size_t rowIndex = 0; - - for (auto startNode : startNodes) { - std::vector indices = hull.extractRow(rowIndex); - for (size_t index : indices) { - Id targetNode = mapping.getId(index); - table.emplace_back(); - table(resultRowIndex, startSideCol) = startNode; - table(resultRowIndex, targetSideCol) = targetNode; - resultRowIndex++; - } - rowIndex++; - } -} - -// _____________________________________________________________________________ -template -void TransitivePathGraphblas::fillTableWithHull( - IdTableStatic& table, const GrbMatrix& hull, - const IdMapping& mapping, const IdTable& startSideTable, - std::span startNodes, size_t startSideCol, size_t targetSideCol, - size_t skipCol) { - IdTableView startView = - startSideTable.asStaticView(); - - size_t resultRowIndex = 0; - size_t rowIndex = 0; - for (auto startNode : startNodes) { - std::vector indices = hull.extractRow(rowIndex); - for (size_t index : indices) { - Id targetNode = mapping.getId(index); - table.emplace_back(); - table(resultRowIndex, startSideCol) = startNode; - table(resultRowIndex, targetSideCol) = targetNode; - - TransitivePathGraphblas::copyColumns( - startView, table, rowIndex, resultRowIndex, skipCol); - resultRowIndex++; - } - rowIndex++; - } -} - -// _____________________________________________________________________________ -GrbMatrix TransitivePathGraphblas::getTargetRow(const GrbMatrix& hull, - size_t targetIndex) const { - auto transformer = GrbMatrix(hull.numCols(), hull.numCols()); - transformer.setElement(targetIndex, targetIndex, true); - return hull.multiply(transformer); -} - -// _____________________________________________________________________________ -std::tuple TransitivePathGraphblas::setupMatrix( - std::span startCol, std::span targetCol, - size_t numRows) const { - std::vector rowIndices; - std::vector colIndices; - IdMapping mapping; - - for (size_t i = 0; i < numRows; i++) { - auto startId = startCol[i]; - auto targetId = targetCol[i]; - auto startIndex = mapping.addId(startId); - auto targetIndex = mapping.addId(targetId); - - rowIndices.push_back(startIndex); - colIndices.push_back(targetIndex); - checkCancellation(); - } - - ad_utility::Timer t{ad_utility::Timer::Started}; - auto matrix = - GrbMatrix::build(rowIndices, colIndices, mapping.size(), mapping.size()); - runtimeInfo().addDetail("matrix-build-time-graphblas", t.msecs().count()); - return {std::move(matrix), std::move(mapping)}; -} - -// _____________________________________________________________________________ -GrbMatrix TransitivePathGraphblas::setupStartNodeMatrix( - std::span startIds, size_t numCols, IdMapping mapping) const { - // stardIds.size() is the maximum possible number of columns for the - // startMatrix, but if some start node does not have a link in the graph it - // will be skipped, resulting in a zero column at the end of the startMatrix - auto startMatrix = GrbMatrix(startIds.size(), numCols); - size_t rowIndex = 0; - for (Id id : startIds) { - if (!mapping.contains(id)) { - continue; - } - size_t colIndex = mapping.getIndex(id); - startMatrix.setElement(rowIndex, colIndex, true); - rowIndex++; - checkCancellation(); - } - return startMatrix; -} - -// _____________________________________________________________________________ -template -void TransitivePathGraphblas::copyColumns( - const IdTableView& inputTable, - IdTableStatic& outputTable, size_t inputRow, size_t outputRow, - size_t skipCol) { - size_t inCol = 0; - size_t outCol = 2; - while (inCol < inputTable.numColumns() && outCol < outputTable.numColumns()) { - if (skipCol == inCol) { - inCol++; - continue; - } - - outputTable(outputRow, outCol) = inputTable(inputRow, inCol); - inCol++; - outCol++; - } -} diff --git a/src/engine/TransitivePathGraphblas.h b/src/engine/TransitivePathGraphblas.h deleted file mode 100644 index 2a37107439..0000000000 --- a/src/engine/TransitivePathGraphblas.h +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright 2019, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Florian Kramer (florian.kramer@neptun.uni-freiburg.de) - -#pragma once - -#include - -#include "TransitivePathBase.h" -#include "engine/GrbMatrix.h" -#include "engine/Operation.h" -#include "engine/QueryExecutionTree.h" -#include "engine/idTable/IdTable.h" -#include "util/HashMap.h" - -// This struct keeps track of the mapping between Ids and matrix indices -struct IdMapping { - bool contains(Id id) const { return idMap_.contains(id); } - - size_t addId(Id id) { - if (!idMap_.contains(id)) { - indexMap_.push_back(id); - } - idMap_.try_emplace(id, indexMap_.size() - 1); - return idMap_[id]; - } - - Id getId(size_t index) const { return indexMap_.at(index); } - - size_t getIndex(Id id) const { return idMap_.at(id); } - - size_t size() const { return indexMap_.size(); } - - private: - ad_utility::HashMap idMap_; - - std::vector indexMap_; -}; - -class TransitivePathGraphblas : public TransitivePathBase { - public: - TransitivePathGraphblas(QueryExecutionContext* qec, - std::shared_ptr child, - TransitivePathSide leftSide, - TransitivePathSide rightSide, size_t minDist, - size_t maxDist); - - /** - * @brief Compute the transitive hull with a bound side. - * This function is called when the startSide is bound and - * it is a variable. The other IdTable contains the result - * of the start side and will be used to get the start nodes. - * - * @tparam RES_WIDTH Number of columns of the result table - * @tparam SUB_WIDTH Number of columns of the sub table - * @tparam SIDE_WIDTH Number of columns of the - * @param res The result table which will be filled in-place - * @param sub The IdTable for the sub result - * @param startSide The start side for the transitive hull - * @param targetSide The target side for the transitive hull - * @param startSideTable The IdTable of the startSide - */ - template - void computeTransitivePathBound(IdTable* res, const IdTable& sub, - const TransitivePathSide& startSide, - const TransitivePathSide& targetSide, - const IdTable& startSideTable) const; - - /** - * @brief Compute the transitive hull. - * This function is called when no side is bound (or an id). - * - * @tparam RES_WIDTH Number of columns of the result table - * @tparam SUB_WIDTH Number of columns of the sub table - * @param res The result table which will be filled in-place - * @param sub The IdTable for the sub result - * @param startSide The start side for the transitive hull - * @param targetSide The target side for the transitive hull - */ - template - void computeTransitivePath(IdTable* res, const IdTable& sub, - const TransitivePathSide& startSide, - const TransitivePathSide& targetSide) const; - - private: - /** - * @brief Compute the result for this TransitivePath operation - * This function chooses the start and target side for the transitive - * hull computation. This choice of the start side has a large impact - * on the time it takes to compute the hull. The set of nodes on the - * start side should be as small as possible. - * - * @return ResultTable The result of the TransitivePath operation - */ - ResultTable computeResult() override; - - /** - * @brief Compute the transitive hull of the graph. If given startNodes, - * compute the transitive hull starting at the startNodes. - * - * @param graph Boolean, square, sparse, adjacency matrix. Row i, column j is - * true, iff. there is an edge going from i to j in the graph. - * @param startNodes Boolean, sparse, adjacency matrix, marking the start - * nodes. There is one row for each start node. The number of columns has to - * be equal to the number of columns of the graph matrix. - * @return An adjacency matrix containing the transitive hull - */ - GrbMatrix transitiveHull(const GrbMatrix& graph, - std::optional startNodes) const; - - /** - * @brief Fill the IdTable with the given transitive hull. - * - * @tparam WIDTH The number of columns of the result table. - * @param table The result table which will be filled. - * @param hull The transitive hull. Represented by a sparse, boolean adjacency - * matrix - * @param mapping IdMapping, which maps Ids to matrix indices and vice versa. - * @param startSideCol The column of the result table for the startSide of the - * hull - * @param targetSideCol The column of the result table for the targetSide of - * the hull - */ - template - static void fillTableWithHull(IdTableStatic& table, - const GrbMatrix& hull, const IdMapping& mapping, - size_t startSideCol, size_t targetSideCol); - - /** - * @brief Fill the IdTable with the given transitive hull. This function is - * used in case the hull computation has one (or more) Ids as start nodes. - * - * @tparam WIDTH The number of columns of the result table. - * @param table The result table which will be filled. - * @param hull The transitive hull. Represented by a sparse, boolean adjacency - * matrix - * @param mapping IdMapping, which maps Ids to matrix indices and vice versa. - * @param startNodes Ids of the start nodes. - * @param startSideCol The column of the result table for the startSide of the - * hull - * @param targetSideCol The column of the result table for the targetSide of - * the hull - */ - template - static void fillTableWithHull(IdTableStatic& table, - const GrbMatrix& hull, const IdMapping& mapping, - std::span startNodes, - size_t startSideCol, size_t targetSideCol); - - /** - * @brief Fill the IdTable with the given transitive hull. This function is - * used if the start side was already bound and there is an IdTable from which - * data has to be copied to the result table. - * - * @tparam WIDTH The number of columns of the result table. - * @tparam START_WIDTH The number of columns of the start table. - * @param table The result table which will be filled. - * @param hull The transitive hull. Represented by a sparse, boolean adjacency - * matrix - * @param mapping IdMapping, which maps Ids to matrix indices and vice versa. - * @param startNodes Ids of the start nodes. - * @param startSideCol The column of the result table for the startSide of the - * hull - * @param targetSideCol The column of the result table for the targetSide of - * the hull - * @param skipCol This column contains the Ids of the start side in the - * startSideTable and will be skipped. - */ - template - static void fillTableWithHull(IdTableStatic& table, - const GrbMatrix& hull, const IdMapping& mapping, - const IdTable& startSideTable, - std::span startNodes, - size_t startSideCol, size_t targetSideCol, - size_t skipCol); - - GrbMatrix getTargetRow(const GrbMatrix& hull, size_t targetIndex) const; - - /** - * @brief Create a boolean, sparse adjacency matrix from the given edges. The - * edges are given as lists, where one list contains the start node of the - * edge and the other list contains the target node of the edge. - * Also create an IdMapping, which maps the given Ids to matrix indices. - * - * @param startCol Column from the IdTable, which contains edge start nodes - * @param targetCol Column from the IdTable, which contains edge target nodes - * @param numRows Number of rows in the IdTable - */ - std::tuple setupMatrix(std::span startCol, - std::span targetCol, - size_t numRows) const; - - /** - * @brief Create a boolean, sparse, adjacency matrix which holds the starting - * nodes for the transitive hull computation. - * - * @param startIds List of Ids where the transitive hull computation should - * start - * @param numRows Number of rows in the IdTable where startIds comes from - * @param mapping An IdMapping between Ids and matrix indices - * @return Matrix with one row for each start node - */ - GrbMatrix setupStartNodeMatrix(std::span startIds, size_t numRows, - IdMapping mapping) const; - - // Copy the columns from the input table to the output table - template - static void copyColumns(const IdTableView& inputTable, - IdTableStatic& outputTable, - size_t inputRow, size_t outputRow, size_t skipCol); -}; diff --git a/src/global/Constants.h b/src/global/Constants.h index c85e9db713..325fe9f5d5 100644 --- a/src/global/Constants.h +++ b/src/global/Constants.h @@ -242,7 +242,6 @@ inline auto& RuntimeParameters() { DurationParameter{ 30s}), SizeT<"lazy-index-scan-max-size-materialization">{1'000'000}, - Bool<"use-graphblas">{false}, Bool<"use-binsearch">{true}, Bool<"group-by-hash-map-enabled">{false}}; }(); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4578ecf3f3..e106effa80 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -149,8 +149,6 @@ addLinkAndDiscoverTest(MultiColumnJoinTest engine) addLinkAndDiscoverTest(IdTableTest util) -addLinkAndDiscoverTest(GrbMatrixTest engine graphblas) - addLinkAndDiscoverTest(TransitivePathTest engine) addLinkAndDiscoverTest(BatchedPipelineTest) diff --git a/test/GrbMatrixTest.cpp b/test/GrbMatrixTest.cpp deleted file mode 100644 index b3eba03f44..0000000000 --- a/test/GrbMatrixTest.cpp +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Herrmann (johannes.roland.herrmann@mars.uni-freiburg.de) - -#include -#include - -#include "engine/GrbGlobalContext.h" -#include "engine/GrbMatrix.h" - -// This helper function checks all important proprties of a matrix. -// One matrix consists of row index, column index and value in this order. -// Entries which do not appear in the entries vector are ignored. -using Entries = std::vector>; -void checkMatrix(GrbMatrix& matrix, size_t numRows, size_t numCols, - size_t numNonZero, Entries entries) { - EXPECT_THAT(matrix.numNonZero(), numNonZero); - EXPECT_THAT(matrix.numRows(), numRows); - EXPECT_THAT(matrix.numCols(), numCols); - - for (auto [rowIndex, colIndex, value] : entries) { - EXPECT_THAT(matrix.getElement(rowIndex, colIndex), value); - } -} - -TEST(GrbMatrixTest, constructor) { - GrbGlobalContext::getContext(); - - GrbMatrix matrix = GrbMatrix(2, 3); - - checkMatrix(matrix, 2, 3, 0, {}); -} - -TEST(GrbMatrixTest, clone) { - GrbGlobalContext::getContext(); - - GrbMatrix matrix1 = GrbMatrix(2, 2); - matrix1.setElement(0, 0, true); - - GrbMatrix matrix2 = matrix1.clone(); - - matrix1.setElement(1, 1, true); - - checkMatrix(matrix2, 2, 2, 1, - {{0, 0, true}, {0, 1, false}, {1, 0, false}, {1, 1, false}}); -} - -TEST(GrbMatrixTest, getSetElement) { - GrbGlobalContext::getContext(); - - GrbMatrix matrix = GrbMatrix(3, 3); - matrix.setElement(1, 0, true); - matrix.setElement(0, 2, true); - - checkMatrix(matrix, 3, 3, 2, {{1, 0, true}, {0, 2, true}}); -} - -TEST(GrbMatrixTest, build) { - GrbGlobalContext::getContext(); - - std::vector rowIndices{0, 0, 1}; - std::vector colIndices{1, 2, 2}; - - GrbMatrix matrix = GrbMatrix::build(rowIndices, colIndices, 3, 3); - - checkMatrix(matrix, 3, 3, 3, {{0, 1, true}, {0, 2, true}, {1, 2, true}}); -} - -TEST(GrbMatrixTest, diag) { - GrbGlobalContext::getContext(); - - auto matrix = GrbMatrix::diag(3); - - checkMatrix(matrix, 3, 3, 3, {{0, 0, true}, {1, 1, true}, {2, 2, true}}); -} - -TEST(GrbMatrixTest, extractTuples) { - GrbGlobalContext::getContext(); - - GrbMatrix matrix = GrbMatrix(3, 3); - - matrix.setElement(0, 1, true); - matrix.setElement(0, 2, true); - matrix.setElement(1, 2, true); - - auto [rowIndices, colIndices] = matrix.extractTuples(); - - std::vector expectedRowIndices{0, 0, 1}; - std::vector expectedColIndices{1, 2, 2}; - auto expected = {expectedRowIndices, expectedColIndices}; - auto got = {rowIndices, colIndices}; - - EXPECT_THAT(got, testing::UnorderedElementsAreArray(expected)); -} - -TEST(GrbMatrixTest, extractColumn) { - GrbGlobalContext::getContext(); - - GrbMatrix matrix = GrbMatrix(3, 3); - - matrix.setElement(0, 1, true); - matrix.setElement(2, 1, true); - - std::vector colIndices = matrix.extractColumn(1); - - std::vector expected{0, 2}; - - EXPECT_THAT(colIndices, testing::UnorderedElementsAreArray(expected)); -} - -TEST(GrbMatrixTest, extractRow) { - GrbGlobalContext::getContext(); - - GrbMatrix matrix = GrbMatrix(3, 3); - - matrix.setElement(1, 0, true); - matrix.setElement(1, 2, true); - - std::vector rowIndices = matrix.extractRow(1); - - std::vector expected{0, 2}; - - EXPECT_THAT(rowIndices, testing::UnorderedElementsAreArray(expected)); -} - -TEST(GrbMatrixTest, multiplySquareMatrices) { - GrbGlobalContext::getContext(); - - GrbMatrix matrix1 = GrbMatrix(2, 2); - matrix1.setElement(0, 0, true); - matrix1.setElement(1, 1, true); - - GrbMatrix matrix2 = GrbMatrix(2, 2); - matrix2.setElement(0, 0, true); - matrix2.setElement(1, 0, true); - - GrbMatrix matrix3 = matrix1.multiply(matrix2); - - checkMatrix(matrix3, 2, 2, 2, {{0, 0, true}, {1, 0, true}}); -} - -TEST(GrbMatrixTest, multiplyShapedMatrices) { - GrbGlobalContext::getContext(); - - GrbMatrix matrix1 = GrbMatrix(2, 3); - matrix1.setElement(0, 0, true); - matrix1.setElement(1, 1, true); - - GrbMatrix matrix2 = GrbMatrix(3, 2); - matrix2.setElement(0, 0, true); - matrix2.setElement(1, 0, true); - matrix2.setElement(2, 0, true); - - GrbMatrix matrix3 = matrix1.multiply(matrix2); - - checkMatrix(matrix3, 2, 2, 2, {{0, 0, true}, {1, 0, true}}); -} - -TEST(GrbMatrixTest, transpose) { - GrbGlobalContext::getContext(); - - auto matrix = GrbMatrix(2, 3); - - matrix.setElement(0, 0, true); - matrix.setElement(0, 1, true); - matrix.setElement(0, 2, true); - - GrbMatrix result = matrix.transpose(); - - checkMatrix(result, 3, 2, 3, {{0, 0, true}, {1, 0, true}, {2, 0, true}}); -} - -TEST(GrbMatrixTest, accumulateMultiply) { - GrbGlobalContext::getContext(); - - GrbMatrix matrix1 = GrbMatrix(2, 2); - matrix1.setElement(0, 0, true); - matrix1.setElement(1, 1, true); - - GrbMatrix matrix2 = GrbMatrix(2, 2); - matrix2.setElement(0, 1, true); - matrix2.setElement(1, 0, true); - - matrix1.accumulateMultiply(matrix2); - - checkMatrix(matrix1, 2, 2, 4, - {{0, 0, true}, {0, 1, true}, {1, 0, true}, {1, 1, true}}); -} From 9eea0f63d6953d4fcd5cd4b36d8dcf06717beb74 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Sat, 16 Mar 2024 11:58:08 +0100 Subject: [PATCH 40/92] Use TransitivePathFallback by default, so tests pass --- src/global/Constants.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/global/Constants.h b/src/global/Constants.h index 325fe9f5d5..53850f6302 100644 --- a/src/global/Constants.h +++ b/src/global/Constants.h @@ -242,7 +242,7 @@ inline auto& RuntimeParameters() { DurationParameter{ 30s}), SizeT<"lazy-index-scan-max-size-materialization">{1'000'000}, - Bool<"use-binsearch">{true}, + Bool<"use-binsearch">{false}, Bool<"group-by-hash-map-enabled">{false}}; }(); return params; From 7852bc310c55aff592066bfccac3d8069fb51f08 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Tue, 19 Mar 2024 10:52:30 +0100 Subject: [PATCH 41/92] Updated TransitiveFallback computeResult --- src/engine/TransitivePathBinSearch.h | 9 ------ src/engine/TransitivePathFallback.cpp | 43 +++++++++++++-------------- src/engine/TransitivePathFallback.h | 15 ++++++++++ 3 files changed, 36 insertions(+), 31 deletions(-) diff --git a/src/engine/TransitivePathBinSearch.h b/src/engine/TransitivePathBinSearch.h index 4a7ef33928..e993c535e5 100644 --- a/src/engine/TransitivePathBinSearch.h +++ b/src/engine/TransitivePathBinSearch.h @@ -20,15 +20,6 @@ struct BinSearchMap { std::span targetIds_; std::span successors(const Id node) const { - // auto startIt = std::lower_bound(startIds_.begin(), startIds_.end(), - // node); auto endIt = std::upper_bound(startIds_.begin(), startIds_.end(), - // node); - // - // auto startIndex = std::distance(startIds_.begin(), startIt); - // auto endIndex = std::distance(startIds_.begin(), endIt); - // - // return targetIds_.subspan(startIndex, endIndex - startIndex); - auto range = std::ranges::equal_range(startIds_, node); auto startIndex = std::distance(startIds_.begin(), range.begin()); diff --git a/src/engine/TransitivePathFallback.cpp b/src/engine/TransitivePathFallback.cpp index d5d67714eb..26388a5de2 100644 --- a/src/engine/TransitivePathFallback.cpp +++ b/src/engine/TransitivePathFallback.cpp @@ -106,6 +106,20 @@ void TransitivePathFallback::computeTransitivePath( *dynRes = std::move(res).toDynamic(); } +// _____________________________________________________________________________ +std::pair +TransitivePathFallback::decideDirection() { + if (lhs_.isBoundVariable()) { + LOG(DEBUG) << "Computing TransitivePath left to right" << std::endl; + return {lhs_, rhs_}; + } else if (rhs_.isBoundVariable() || !rhs_.isVariable()) { + LOG(DEBUG) << "Computing TransitivePath right to left" << std::endl; + return {rhs_, lhs_}; + } + LOG(DEBUG) << "Computing TransitivePath left to right" << std::endl; + return {lhs_, rhs_}; +} + // _____________________________________________________________________________ ResultTable TransitivePathFallback::computeResult() { if (minDist_ == 0 && !isBoundOrId() && lhs_.isVariable() && @@ -114,6 +128,7 @@ ResultTable TransitivePathFallback::computeResult() { "This query might have to evalute the empty path, which is currently " "not supported"); } + auto [startSide, targetSide] = decideDirection(); shared_ptr subRes = subtree_->getResult(); IdTable idTable{allocator()}; @@ -122,38 +137,22 @@ ResultTable TransitivePathFallback::computeResult() { size_t subWidth = subRes->idTable().numColumns(); - auto computeForOneSide = [this, &idTable, subRes, subWidth]( - auto& boundSide, - auto& otherSide) -> ResultTable { + if (startSide.isBoundVariable()) { shared_ptr sideRes = - boundSide.treeAndCol_.value().first->getResult(); + startSide.treeAndCol_.value().first->getResult(); size_t sideWidth = sideRes->idTable().numColumns(); CALL_FIXED_SIZE((std::array{resultWidth_, subWidth, sideWidth}), &TransitivePathFallback::computeTransitivePathBound, this, - &idTable, subRes->idTable(), boundSide, otherSide, + &idTable, subRes->idTable(), startSide, targetSide, sideRes->idTable()); return {std::move(idTable), resultSortedOn(), ResultTable::getSharedLocalVocabFromNonEmptyOf(*sideRes, *subRes)}; - }; - - if (lhs_.isBoundVariable()) { - return computeForOneSide(lhs_, rhs_); - } else if (rhs_.isBoundVariable()) { - return computeForOneSide(rhs_, lhs_); - // Right side is an Id - } else if (!rhs_.isVariable()) { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePathFallback::computeTransitivePath, this, - &idTable, subRes->idTable(), rhs_, lhs_); - // No side is a bound variable, the right side is an unbound variable - // and the left side is either an unbound Variable or an ID. - } else { - CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), - &TransitivePathFallback::computeTransitivePath, this, - &idTable, subRes->idTable(), lhs_, rhs_); } + CALL_FIXED_SIZE((std::array{resultWidth_, subWidth}), + &TransitivePathFallback::computeTransitivePath, this, + &idTable, subRes->idTable(), startSide, targetSide); // NOTE: The only place, where the input to a transitive path operation is not // an index scan (which has an empty local vocabulary by default) is the diff --git a/src/engine/TransitivePathFallback.h b/src/engine/TransitivePathFallback.h index 75680ead86..e897a17447 100644 --- a/src/engine/TransitivePathFallback.h +++ b/src/engine/TransitivePathFallback.h @@ -71,6 +71,21 @@ class TransitivePathFallback : public TransitivePathBase { const TransitivePathSide& targetSide) const; private: + /** + * @brief Decide on which transitive path side the hull computation should + * start and where it should end. The start and target side are chosen by + * the following criteria: + * + * 1. If a side is bound, then this side will be the start side. + * 2. If a side is an id, then this side will be the start side. + * 3. If both sides are variables, the left side is chosen as start + * (arbitrarily). + * + * @return std::pair The first entry + * of the pair is the start side, the second entry is the target side. + */ + std::pair decideDirection(); + /** * @brief Compute the result for this TransitivePath operation * This function chooses the start and target side for the transitive From 2c9b11789b6f2da5c6ba065bb75fea212ac9285f Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 25 Mar 2024 13:57:08 +0100 Subject: [PATCH 42/92] Moved Map and Set definition --- src/engine/TransitivePathBase.h | 11 +++++++++++ src/engine/TransitivePathBinSearch.h | 12 ------------ src/engine/TransitivePathFallback.h | 12 ------------ 3 files changed, 11 insertions(+), 24 deletions(-) diff --git a/src/engine/TransitivePathBase.h b/src/engine/TransitivePathBase.h index 09251d5742..e7d84dab09 100644 --- a/src/engine/TransitivePathBase.h +++ b/src/engine/TransitivePathBase.h @@ -58,6 +58,17 @@ struct TransitivePathSide { class TransitivePathBase : public Operation { protected: + // We deliberately use the `std::` variants of a hash set and hash map because + // `absl`s types are not exception safe. + constexpr static auto hash = [](Id id) { + return std::hash{}(id.getBits()); + }; + using Set = std::unordered_set, + ad_utility::AllocatorWithLimit>; + using Map = std::unordered_map< + Id, Set, decltype(hash), std::equal_to, + ad_utility::AllocatorWithLimit>>; + std::shared_ptr subtree_; TransitivePathSide lhs_; TransitivePathSide rhs_; diff --git a/src/engine/TransitivePathBinSearch.h b/src/engine/TransitivePathBinSearch.h index e993c535e5..e1f458632c 100644 --- a/src/engine/TransitivePathBinSearch.h +++ b/src/engine/TransitivePathBinSearch.h @@ -6,7 +6,6 @@ #include -#include #include #include @@ -29,17 +28,6 @@ struct BinSearchMap { }; class TransitivePathBinSearch : public TransitivePathBase { - // We deliberately use the `std::` variants of a hash set and hash map because - // `absl`s types are not exception safe. - constexpr static auto hash = [](Id id) { - return std::hash{}(id.getBits()); - }; - using Set = std::unordered_set, - ad_utility::AllocatorWithLimit>; - using Map = std::unordered_map< - Id, Set, decltype(hash), std::equal_to, - ad_utility::AllocatorWithLimit>>; - public: TransitivePathBinSearch(QueryExecutionContext* qec, std::shared_ptr child, diff --git a/src/engine/TransitivePathFallback.h b/src/engine/TransitivePathFallback.h index e897a17447..8c0ad4564e 100644 --- a/src/engine/TransitivePathFallback.h +++ b/src/engine/TransitivePathFallback.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include "TransitivePathBase.h" @@ -13,17 +12,6 @@ #include "engine/idTable/IdTable.h" class TransitivePathFallback : public TransitivePathBase { - // We deliberately use the `std::` variants of a hash set and hash map because - // `absl`s types are not exception safe. - constexpr static auto hash = [](Id id) { - return std::hash{}(id.getBits()); - }; - using Set = std::unordered_set, - ad_utility::AllocatorWithLimit>; - using Map = std::unordered_map< - Id, Set, decltype(hash), std::equal_to, - ad_utility::AllocatorWithLimit>>; - public: TransitivePathFallback(QueryExecutionContext* qec, std::shared_ptr child, From 2fea8d7bdf2495c599913ac095d06360cf769503 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 25 Mar 2024 14:03:52 +0100 Subject: [PATCH 43/92] Added checkCancellation to BinSearch --- src/engine/TransitivePathBinSearch.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/engine/TransitivePathBinSearch.cpp b/src/engine/TransitivePathBinSearch.cpp index 4366a99e0e..3186cd6c18 100644 --- a/src/engine/TransitivePathBinSearch.cpp +++ b/src/engine/TransitivePathBinSearch.cpp @@ -191,10 +191,8 @@ TransitivePathBinSearch::Map TransitivePathBinSearch::transitiveHull( } while (stack.size() > 0) { - // auto [node, steps] = stack.back(); - auto pair = stack.back(); - auto node = pair.first; - auto steps = pair.second; + checkCancellation(); + auto [node, steps] = stack.back(); stack.pop_back(); if (steps <= maxDist_ && marks.count(node) == 0) { From 8039accf80b8aa885646468ab48f4c7a6e3e1c3e Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 25 Mar 2024 14:08:40 +0100 Subject: [PATCH 44/92] Removed suite-sparse from git workflows --- .github/workflows/install-dependencies-ubuntu/action.yml | 2 +- .github/workflows/macos.yml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/install-dependencies-ubuntu/action.yml b/.github/workflows/install-dependencies-ubuntu/action.yml index 204c8f0448..b5c248dfed 100644 --- a/.github/workflows/install-dependencies-ubuntu/action.yml +++ b/.github/workflows/install-dependencies-ubuntu/action.yml @@ -19,7 +19,7 @@ runs: - name: Install third-party libraries if: inputs.install-third-party-libraries == 'true' run: | - sudo apt-get install -y libicu-dev tzdata libzstd-dev libjemalloc-dev libgraphblas-dev + sudo apt-get install -y libicu-dev tzdata libzstd-dev libjemalloc-dev shell: bash - name: Install boost from PPA diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 2365ffd894..d0efe2c515 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -41,7 +41,6 @@ jobs: run: | brew install llvm@16 brew install conan@2 - brew install suite-sparse echo 'export PATH="/usr/local/opt/llvm@16/bin:$PATH"' >> ~/.bash_profile echo PATH="/usr/local/opt/llvm@16/bin:$PATH" >> $GITHUB_ENV echo 'export LDFLAGS="-L/usr/local/opt/llvm@16/lib -L/usr/local/opt/llvm@16/lib/c++ -Wl,-rpath,/usr/local/opt/llvm@16/lib/c++"' >> ~/.bash_profile From 95f6dad7bc7a9435b32862308a08a61f29246864 Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 25 Mar 2024 14:14:00 +0100 Subject: [PATCH 45/92] Style fix --- src/engine/ExportQueryExecutionTrees.cpp | 3 ++- test/QueryPlannerTestHelpers.h | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 75aee6ee1f..22066b076a 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -129,7 +129,8 @@ nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( auto sz = local.size(); LOG(INFO) << "Local size " << sz << std::endl; auto* qec = qet.getQec(); - LOG(INFO) << qec->getIndex().getVocab().size() << "end of bla size" << std::endl; + LOG(INFO) << qec->getIndex().getVocab().size() << "end of bla size" + << std::endl; const auto& optionalStringAndXsdType = idToStringAndType( qet.getQec()->getIndex(), currentId, resultTable->localVocab()); if (!optionalStringAndXsdType.has_value()) { diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index b242d6c111..5941bc7fdd 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -149,8 +149,7 @@ inline auto CountAvailablePredicates = [](size_t subjectColumnIdx, const Variable& predicateVar, const Variable& countVar, const std::same_as auto&... childMatchers) - requires(sizeof...(childMatchers) <= 1) -{ + requires(sizeof...(childMatchers) <= 1) { return RootOperation<::CountAvailablePredicates>(AllOf( AD_PROPERTY(::CountAvailablePredicates, subjectColumnIndex, Eq(subjectColumnIdx)), From 75f45930ff87fcc091002bed655b5a4876edbbda Mon Sep 17 00:00:00 2001 From: Johannes Herrmann Date: Mon, 25 Mar 2024 19:04:51 +0100 Subject: [PATCH 46/92] Rebased against current master --- .github/workflows/native-build.yml | 4 +- CMakeLists.txt | 5 +- src/TurtleParserMain.cpp | 201 ------------------ src/engine/ExportQueryExecutionTrees.cpp | 25 +-- src/engine/QueryPlanner.cpp | 19 +- src/engine/Server.cpp | 17 +- .../sparqlExpressions/LiteralExpression.h | 28 ++- .../sparqlExpressions/RegexExpression.cpp | 27 +-- .../RelationalExpressions.cpp | 2 +- src/global/Constants.h | 4 +- src/index/Index.cpp | 15 +- src/index/Index.h | 7 +- src/index/IndexBuilderTypes.h | 38 ++-- src/index/IndexImpl.cpp | 55 +++-- src/index/IndexImpl.h | 23 +- src/index/Vocabulary.cpp | 17 +- src/index/Vocabulary.h | 14 +- src/index/VocabularyMerger.h | 3 +- src/parser/Iri.cpp | 37 +++- src/parser/Iri.h | 28 ++- src/parser/Literal.cpp | 103 ++++++--- src/parser/Literal.h | 71 ++++--- src/parser/LiteralOrIri.cpp | 12 +- src/parser/LiteralOrIri.h | 43 +++- src/parser/NormalizedString.h | 5 + src/parser/ParsedQuery.cpp | 8 +- src/parser/ParsedQuery.h | 6 +- src/parser/TripleComponent.cpp | 25 +-- src/parser/TripleComponent.h | 85 +++----- src/parser/TurtleParser.cpp | 129 ++++++----- src/parser/TurtleParser.h | 36 +--- .../sparqlParser/SparqlQleverVisitor.cpp | 48 ++--- src/parser/sparqlParser/SparqlQleverVisitor.h | 4 +- src/util/AsioHelpers.h | 69 +++--- src/util/CancellationHandle.cpp | 4 + src/util/CancellationHandle.h | 1 + src/util/Conversions.cpp | 13 +- src/util/Conversions.h | 13 +- src/util/FsstCompressor.h | 49 +++-- src/util/Serializer/Serializer.h | 2 +- src/util/StringUtils.h | 22 +- src/util/http/HttpClient.cpp | 4 +- test/AsioHelpersTest.cpp | 14 +- test/CancellationHandleTest.cpp | 18 ++ test/GroupByTest.cpp | 32 +-- test/HasPredicateScanTest.cpp | 10 +- test/IndexTest.cpp | 124 +++++------ test/JoinTest.cpp | 12 +- test/LocalVocabTest.cpp | 55 ++--- test/QueryPlannerTest.cpp | 74 ++++--- test/QueryPlannerTestHelpers.h | 20 +- test/ServiceTest.cpp | 6 +- test/SparqlAntlrParserTest.cpp | 118 ++++++---- test/SparqlExpressionTest.cpp | 48 +++-- test/SparqlParserTest.cpp | 57 ++--- test/TransitivePathTest.cpp | 1 + test/TripleComponentTest.cpp | 12 +- test/TurtleParserTest.cpp | 132 ++++++------ test/ValuesTest.cpp | 12 +- test/engine/IndexScanTest.cpp | 36 ++-- test/parser/LiteralOrIriTest.cpp | 20 +- test/util/IndexTestHelpers.cpp | 16 +- test/util/TripleComponentTestHelpers.h | 21 +- toolchains/clang14.cmake | 3 - toolchains/clang15.cmake | 3 - toolchains/clang18.cmake | 3 + toolchains/gcc13.cmake | 2 +- 67 files changed, 1141 insertions(+), 1029 deletions(-) delete mode 100644 src/TurtleParserMain.cpp delete mode 100644 toolchains/clang14.cmake delete mode 100644 toolchains/clang15.cmake create mode 100644 toolchains/clang18.cmake diff --git a/.github/workflows/native-build.yml b/.github/workflows/native-build.yml index 0590d4d0f5..cdfc2b1549 100644 --- a/.github/workflows/native-build.yml +++ b/.github/workflows/native-build.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: compiler: [gcc, clang] - compiler-version: [11, 12, 13, 16, 17] + compiler-version: [11, 12, 13, 16, 17, 18] warnings: [ "-Wall -Wextra -Werror " ] build-type: [Release] expensive-tests: [true] @@ -30,6 +30,8 @@ jobs: compiler-version: 16 - compiler: gcc compiler-version: 17 + - compiler: gcc + compiler-version: 18 - compiler: clang compiler-version: 11 - compiler: clang diff --git a/CMakeLists.txt b/CMakeLists.txt index bf68faaf09..379816c78b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -299,7 +299,7 @@ FetchContent_Declare( FetchContent_Declare( fsst GIT_REPOSITORY https://github.com/cwida/fsst.git - GIT_TAG c541ffdc2be43443c6a772eb6207eec1b587dbe8 + GIT_TAG c8719ef0aa3740da9685ad2738bb9c8ecc327944 ) @@ -414,9 +414,6 @@ add_executable(ServerMain src/ServerMain.cpp) qlever_target_link_libraries (ServerMain engine ${CMAKE_THREAD_LIBS_INIT} Boost::program_options) target_precompile_headers(ServerMain REUSE_FROM engine) -add_executable(TurtleParserMain src/TurtleParserMain.cpp) -qlever_target_link_libraries(TurtleParserMain parser ${CMAKE_THREAD_LIBS_INIT}) - add_executable(VocabularyMergerMain src/VocabularyMergerMain.cpp) qlever_target_link_libraries(VocabularyMergerMain index ${CMAKE_THREAD_LIBS_INIT}) diff --git a/src/TurtleParserMain.cpp b/src/TurtleParserMain.cpp deleted file mode 100644 index 50508f2fd8..0000000000 --- a/src/TurtleParserMain.cpp +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Kalmbach(joka921) - -#include - -#include -#include -#include - -#include "parser/TurtleParser.h" -#include "util/Log.h" - -using std::cout; -using std::endl; - -/** - * @brief Instantiate a Parser that parses filename and writes the resulting - * triples to argument out. - * - * @tparam Parser A Parser that supports a call to getline that yields a triple - * @param out the parsed triples are written to this file - * @param filename the filename from which the triples are parsed, can be - * "/dev/stdin" - */ -template -void writeNTImpl(std::ostream& out, const std::string& filename) { - Parser p(filename); - TurtleTriple triple; - size_t numTriples = 0; - while (p.getLine(triple)) { - out << triple.subject_ << " " << triple.predicate_ << " " - << triple.object_.toRdfLiteral() << " .\n"; - numTriples++; - if (numTriples % 10000000 == 0) { - LOG(INFO) << "Parsed " << numTriples << " triples" << std::endl; - } - } -} - -/** - * @brief Decide according to arg fileFormat which parser to use. - * Then call writeNTImpl with the appropriate parser - * @param out Parsed triples will be written here. - * @param fileFormat One of [ttl|mmap] - * @param filename Will read from this file, might be /dev/stdin - */ -template -void writeNT(std::ostream& out, const string& fileFormat, - const std::string& filename) { - if (fileFormat == "ttl" || fileFormat == "nt") { - writeNTImpl>(out, filename); - } else { - LOG(ERROR) << "writeNT was called with unknown file format " << fileFormat - << ". This should never happen, terminating" << std::endl; - LOG(ERROR) << "Please specify a valid file format" << std::endl; - exit(1); - } -} - -void writeNTDispatch(std::ostream& out, const string& fileFormat, - const std::string& filename, - const std::string& regexEngine) { - if (regexEngine == "re2") { - writeNT(out, fileFormat, filename); - } else if (regexEngine == "ctre") { - LOG(INFO) << WARNING_ASCII_ONLY_PREFIXES << std::endl; - writeNT(out, fileFormat, filename); - } else { - LOG(ERROR) - << "Please specify a valid regex engine via the -r flag. " - "Options are \"re2\" or \"ctre\" (The latter only works correct if " - "prefix names only use ASCII characters but is faster" - << std::endl; - exit(1); - } -} - -// _______________________________________________________________________________________________________________ -void printUsage(char* execName) { - std::ios coutState(nullptr); - coutState.copyfmt(cout); - cout << std::setfill(' ') << std::left; - - cout << "Usage: " << execName << " -i [OPTIONS]" << endl << endl; - cout << "Options" << endl; - cout << " " << std::setw(20) << "F, file-format" << std::setw(1) << " " - << " Specify format of the input file. Must be one of " - "[nt|ttl|mmap]." - << " " << std::setw(36) - << "If not set, we will try to deduce from the filename" << endl - << " " << std::setw(36) - << "(mmap assumes an on-disk turtle file that can be mmapped to memory)" - << endl; - cout << " " << std::setw(20) << "i, input-file" << std::setw(1) << " " - << " The file to be parsed from. If omitted, we will read from stdin" - << endl; - cout << " " << std::setw(20) << "o, output-file" << std::setw(1) << " " - << " The NTriples file to be Written to. If omitted, we will write to " - "stdout" - << endl; - cout << " " << std::setw(20) << "r, regex-engine" << std::setw(1) << " " - << R"( The regex engine used for lexing. Must be one of "re2" or "ctre")" - << endl; - cout.copyfmt(coutState); -} - -// ________________________________________________________________________ -int main(int argc, char** argv) { - // we possibly write to stdout to pipe it somewhere else, so redirect all - // logging output to std::err - ad_utility::setGlobalLoggingStream(&std::cerr); - struct option options[] = {{"help", no_argument, NULL, 'h'}, - {"file-format", required_argument, NULL, 'F'}, - {"input-file", required_argument, NULL, 'i'}, - {"output-file", required_argument, NULL, 'o'}, - {"regex-engine", required_argument, NULL, 'r'}, - {NULL, 0, NULL, 0}}; - - string inputFile, outputFile, fileFormat, regexEngine; - while (true) { - int c = getopt_long(argc, argv, "F:i:o:r:h", options, nullptr); - if (c == -1) { - break; - } - switch (c) { - case 'h': - printUsage(argv[0]); - return 0; - case 'i': - inputFile = optarg; - break; - case 'o': - outputFile = optarg; - break; - case 'F': - fileFormat = optarg; - break; - case 'r': - regexEngine = optarg; - break; - default: - cout << endl - << "! ERROR in processing options (getopt returned '" << c - << "' = 0x" << std::setbase(16) << c << ")" << endl - << endl; - printUsage(argv[0]); - exit(1); - } - } - - if (fileFormat.empty()) { - bool filetypeDeduced = false; - if (inputFile.ends_with(".nt")) { - fileFormat = "nt"; - filetypeDeduced = true; - } else if (inputFile.ends_with(".ttl")) { - fileFormat = "ttl"; - filetypeDeduced = true; - } else { - LOG(WARN) - << " Could not deduce the type of the input knowledge-base-file by " - "its extension. Assuming the input to be turtle. Please specify " - "--file-format (-F) if this is not correct" - << std::endl; - } - if (filetypeDeduced) { - LOG(INFO) << "Assuming input file format to be " << fileFormat - << " due to the input file's extension." << std::endl; - LOG(INFO) - << "If this is wrong, please manually specify the --file-format " - "(-F) flag" - << std::endl; - } - } - - if (inputFile.empty()) { - LOG(INFO) << "No input file was specified, parsing from stdin" << std::endl; - inputFile = "/dev/stdin"; - } else if (inputFile == "-") { - LOG(INFO) << "Parsing from stdin" << std::endl; - inputFile = "/dev/stdin"; - } - - LOG(INFO) << "Trying to parse from input file " << inputFile << std::endl; - - if (!outputFile.empty()) { - std::ofstream of(outputFile); - if (!of) { - LOG(ERROR) << "Error opening '" << outputFile << "'" << std::endl; - printUsage(argv[0]); - exit(1); - } - LOG(INFO) << "Writing to file " << outputFile << std::endl; - writeNTDispatch(of, fileFormat, inputFile, regexEngine); - of.close(); - } else { - LOG(INFO) << "Writing to stdout" << std::endl; - writeNTDispatch(std::cout, fileFormat, inputFile, regexEngine); - } -} diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 22066b076a..8f3b49f0ca 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -117,7 +117,9 @@ nlohmann::json ExportQueryExecutionTrees::idTableToQLeverJSONArray( nlohmann::json json = nlohmann::json::array(); for (size_t rowIndex : getRowIndices(limitAndOffset, data)) { - json.emplace_back(); + // We need the explicit `array` constructor for the special case of zero + // variables. + json.push_back(nlohmann::json::array()); auto& row = json.back(); for (const auto& opt : columns) { if (!opt) { @@ -212,15 +214,19 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id, std::optional entity = index.idToOptionalString(id.getVocabIndex()); AD_CONTRACT_CHECK(entity.has_value()); + // TODO make this more efficient AND more correct + auto litOrIri = + ad_utility::triple_component::LiteralOrIri::fromStringRepresentation( + entity.value()); if constexpr (onlyReturnLiterals) { - if (!entity.value().starts_with('"')) { + if (!litOrIri.isLiteral()) { return std::nullopt; } } if constexpr (removeQuotesAndAngleBrackets) { - entity = RdfEscaping::normalizedContentFromLiteralOrIri( - std::move(entity.value())); + entity = asStringViewUnsafe(litOrIri.getContent()); } + // TODO handle the exporting of literals more correctly. return std::pair{escapeFunction(std::move(entity.value())), nullptr}; } case LocalVocabIndex: { @@ -396,12 +402,6 @@ nlohmann::json ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON( QueryExecutionTree::ColumnIndicesAndTypes selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, true); - // This can never happen, because empty SELECT clauses are not supported by - // QLever. Should we ever support triples without variables then this might - // theoretically happen in combination with `SELECT *`, but then this still - // can be changed. - AD_CORRECTNESS_CHECK(!selectedColumnIndices.empty()); - return ExportQueryExecutionTrees::idTableToQLeverJSONArray( qet, limitAndOffset, selectedColumnIndices, std::move(resultTable), std::move(cancellationHandle)); @@ -431,11 +431,6 @@ ExportQueryExecutionTrees::selectQueryResultToStream( << std::endl; auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, true); - // This case should only fail if we have no variables selected at all. - // This case should be handled earlier by the parser. - // TODO What do we want to do for variables that don't - // appear in the query body? - AD_CONTRACT_CHECK(!selectedColumnIndices.empty()); const auto& idTable = resultTable->idTable(); // special case : binary export of IdTable diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index ac5c060a45..261057838b 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -180,6 +180,10 @@ QueryExecutionTree QueryPlanner::createExecutionTree(ParsedQuery& pq) { std::vector QueryPlanner::optimize( ParsedQuery::GraphPattern* rootPattern) { + // Handle the empty pattern + if (rootPattern->_graphPatterns.empty()) { + return {makeSubtreePlan(_qec)}; + } // here we collect a set of possible plans for each of our children. // always only holds plans for children that can be joined in an // arbitrary order @@ -215,11 +219,6 @@ std::vector QueryPlanner::optimize( // find a single best candidate for a given graph pattern auto optimizeSingle = [this](const auto pattern) -> SubtreePlan { auto v = optimize(pattern); - if (v.empty()) { - throw std::runtime_error( - "grandchildren or lower of a Plan to be optimized may never be " - "empty"); - } auto idx = findCheapestExecutionTree(v); return std::move(v[idx]); }; @@ -279,11 +278,8 @@ std::vector QueryPlanner::optimize( } else { static_assert( std::is_same_v, std::decay_t>); - if (v.empty()) { - throw std::runtime_error( - "grandchildren or lower of a Plan to be optimized may never be " - "empty. Please report this"); - } + // Empty group graph patterns should have been handled previously. + AD_CORRECTNESS_CHECK(!v.empty()); // optionals that occur before any of their variables have been bound // actually behave like ordinary (Group)GraphPatterns @@ -382,6 +378,7 @@ std::vector QueryPlanner::optimize( makeSubtreePlan(_qec, left._qet, right._qet); joinCandidates(std::vector{std::move(candidate)}); } else if constexpr (std::is_same_v) { + ParsedQuery& subquery = arg.get(); // TODO We currently do not optimize across subquery borders // but abuse them as "optimization hints". In theory, one could even // remove the ORDER BY clauses of a subquery if we can prove that @@ -389,7 +386,7 @@ std::vector QueryPlanner::optimize( // For a subquery, make sure that one optimal result for each ordering // of the result (by a single column) is contained. - auto candidatesForSubquery = createExecutionTrees(arg.get()); + auto candidatesForSubquery = createExecutionTrees(subquery); // Make sure that variables that are not selected by the subquery are // not visible. auto setSelectedVariables = [&](SubtreePlan& plan) { diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index 2984a6e5be..ba0476a279 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -793,9 +793,18 @@ boost::asio::awaitable Server::processQuery( template Awaitable Server::computeInNewThread(Function function, SharedCancellationHandle handle) { - auto timerRunning = std::make_shared(true); - auto inner = [function = std::move(function), timerRunning]() mutable -> T { - timerRunning->clear(); + // `interruptible` will set the shared state of this promise + // with a function that can be used to cancel the timer. + std::promise> cancelTimerPromise{}; + auto cancelTimerFuture = cancelTimerPromise.get_future(); + + auto inner = [function = std::move(function), + cancelTimerFuture = + std::move(cancelTimerFuture)]() mutable -> T { + // Ensure future is ready by the time this is called. + AD_CORRECTNESS_CHECK(cancelTimerFuture.wait_for(std::chrono::milliseconds{ + 0}) == std::future_status::ready); + cancelTimerFuture.get()(); return std::invoke(std::move(function)); }; // interruptible doesn't make the awaitable return faster when cancelled, @@ -804,7 +813,7 @@ Awaitable Server::computeInNewThread(Function function, return ad_utility::interruptible( ad_utility::runFunctionOnExecutor(threadPool_.get_executor(), std::move(inner), net::use_awaitable), - std::move(handle), std::move(timerRunning)); + std::move(handle), std::move(cancelTimerPromise)); } // _____________________________________________________________________________ diff --git a/src/engine/sparqlExpressions/LiteralExpression.h b/src/engine/sparqlExpressions/LiteralExpression.h index a040f855fd..729a240dc0 100644 --- a/src/engine/sparqlExpressions/LiteralExpression.h +++ b/src/engine/sparqlExpressions/LiteralExpression.h @@ -5,6 +5,7 @@ #pragma once #include "engine/sparqlExpressions/SparqlExpression.h" +#include "util/TypeTraits.h" namespace sparqlExpression { namespace detail { @@ -38,24 +39,27 @@ class LiteralExpression : public SparqlExpression { // Evaluating just returns the constant/literal value. ExpressionResult evaluate(EvaluationContext* context) const override { - // Common code for the `Literal` and `std::string` case. - auto getIdOrString = [this, - &context](const std::string& s) -> ExpressionResult { + // Common code for the `Literal` and `Iri` case. + auto getIdOrString = + [this, + &context](const ad_utility::SameAsAny auto& s) + -> ExpressionResult { if (auto ptr = cachedResult_.load(std::memory_order_relaxed)) { return *ptr; } - Id id; - bool idWasFound = context->_qec.getIndex().getId(s, &id); - IdOrString result = idWasFound ? IdOrString{id} : IdOrString{s}; + auto id = context->_qec.getIndex().getId(s); + IdOrString result = + id.has_value() ? IdOrString{id.value()} + : IdOrString{std::string{s.toStringRepresentation()}}; auto ptrForCache = std::make_unique(result); ptrForCache.reset(std::atomic_exchange_explicit( &cachedResult_, ptrForCache.release(), std::memory_order_relaxed)); context->cancellationHandle_->throwIfCancelled(); return result; }; - if constexpr (std::is_same_v) { - return getIdOrString(_value.rawContent()); - } else if constexpr (std::is_same_v) { + if constexpr (ad_utility::SameAsAny) { return getIdOrString(_value); } else if constexpr (std::is_same_v) { return evaluateIfVariable(context, _value); @@ -99,7 +103,9 @@ class LiteralExpression : public SparqlExpression { } else if constexpr (std::is_same_v) { return absl::StrCat("#valueId ", _value.getBits(), "#"); } else if constexpr (std::is_same_v) { - return absl::StrCat("#literal: ", _value.rawContent()); + return absl::StrCat("#literal: ", _value.toStringRepresentation()); + } else if constexpr (std::is_same_v) { + return absl::StrCat("#iri: ", _value.toStringRepresentation()); } else if constexpr (std::is_same_v>) { // We should never cache this, as objects of this type of expression are // used exactly *once* in the HashMap optimization of the GROUP BY @@ -175,7 +181,7 @@ class LiteralExpression : public SparqlExpression { /// The actual instantiations and aliases of LiteralExpressions. using VariableExpression = detail::LiteralExpression<::Variable>; -using IriExpression = detail::LiteralExpression; +using IriExpression = detail::LiteralExpression; using StringLiteralExpression = detail::LiteralExpression; using IdExpression = detail::LiteralExpression; diff --git a/src/engine/sparqlExpressions/RegexExpression.cpp b/src/engine/sparqlExpressions/RegexExpression.cpp index 3b4e20f995..e5454e6e72 100644 --- a/src/engine/sparqlExpressions/RegexExpression.cpp +++ b/src/engine/sparqlExpressions/RegexExpression.cpp @@ -67,15 +67,6 @@ std::optional getPrefixRegex(std::string regex) { return regex; } -// Assert that `input` starts and ends with double quotes `"` and remove those -// quotes. -std::string removeQuotes(std::string_view input) { - AD_CORRECTNESS_CHECK(input.size() >= 2 && input.starts_with('"') && - input.ends_with('"')); - input.remove_prefix(1); - input.remove_suffix(1); - return std::string{input}; -} } // namespace sparqlExpression::detail namespace sparqlExpression { @@ -93,16 +84,15 @@ RegexExpression::RegexExpression( "REGEX expressions are currently supported only on variables."); } std::string regexString; - std::string originalRegexString; if (auto regexPtr = dynamic_cast(regex.get())) { - originalRegexString = regexPtr->value().normalizedLiteralContent().get(); - if (!regexPtr->value().datatypeOrLangtag().empty()) { + const auto& regexLiteral = regexPtr->value(); + regexString = asStringViewUnsafe(regexLiteral.getContent()); + if (regexLiteral.hasDatatype() || regexLiteral.hasLanguageTag()) { throw std::runtime_error( "The second argument to the REGEX function (which contains the " "regular expression) must not contain a language tag or a datatype"); } - regexString = detail::removeQuotes(originalRegexString); } else { throw std::runtime_error( "The second argument to the REGEX function must be a " @@ -111,15 +101,14 @@ RegexExpression::RegexExpression( if (optionalFlags.has_value()) { if (auto flagsPtr = dynamic_cast( optionalFlags.value().get())) { - std::string_view originalFlags = - flagsPtr->value().normalizedLiteralContent().get(); - if (!flagsPtr->value().datatypeOrLangtag().empty()) { + const auto& flagsLiteral = flagsPtr->value(); + std::string_view flags = asStringViewUnsafe(flagsLiteral.getContent()); + if (flagsLiteral.hasDatatype() || flagsLiteral.hasLanguageTag()) { throw std::runtime_error( "The third argument to the REGEX function (which contains optional " "flags to configure the evaluation) must not contain a language " "tag or a datatype"); } - auto flags = detail::removeQuotes(originalFlags); auto firstInvalidFlag = flags.find_first_not_of("imsu"); if (firstInvalidFlag != std::string::npos) { throw std::runtime_error{absl::StrCat( @@ -148,8 +137,8 @@ RegexExpression::RegexExpression( const auto& r = std::get(regex_); if (r.error_code() != RE2::NoError) { throw std::runtime_error{absl::StrCat( - "The regex ", originalRegexString, - " is not supported by QLever (which uses Google's RE2 library). " + "The regex \"", regexString, + "\" is not supported by QLever (which uses Google's RE2 library). " "Error from RE2 is: ", r.error())}; } diff --git a/src/engine/sparqlExpressions/RelationalExpressions.cpp b/src/engine/sparqlExpressions/RelationalExpressions.cpp index 424f5670b8..0a328c7bf5 100644 --- a/src/engine/sparqlExpressions/RelationalExpressions.cpp +++ b/src/engine/sparqlExpressions/RelationalExpressions.cpp @@ -299,7 +299,7 @@ RelationalExpression::getLanguageFilterExpression() const { // TODO Is this even allowed by the grammar? return LangFilterData{ varPtr->variable(), - std::string{langPtr->value().normalizedLiteralContent().get()}}; + std::string{asStringViewUnsafe(langPtr->value().getContent())}}; }; const auto& child1 = children_[0]; diff --git a/src/global/Constants.h b/src/global/Constants.h index 53850f6302..6e70367ff4 100644 --- a/src/global/Constants.h +++ b/src/global/Constants.h @@ -64,9 +64,9 @@ static const std::string HAS_PREDICATE_PREDICATE = makeInternalIri("has-predicate"); static const std::string HAS_PATTERN_PREDICATE = makeInternalIri("has-pattern"); static constexpr std::pair GEOF_PREFIX = { - "geof:", " MATH_PREFIX = { - "math:", " Index::idToOptionalString(WordVocabIndex id) const { } // ____________________________________________________________________________ -bool Index::getId(const std::string& element, Id* id) const { - return pimpl_->getId(element, id); +std::optional Index::getId( + const ad_utility::triple_component::LiteralOrIri& element) const { + return pimpl_->getId(element); +} +// ____________________________________________________________________________ +std::optional Index::getId( + const ad_utility::triple_component::Iri& element) const { + return pimpl_->getId(element); +} +// ____________________________________________________________________________ +std::optional Index::getId( + const ad_utility::triple_component::Literal& element) const { + return pimpl_->getId(element); } // ____________________________________________________________________________ diff --git a/src/index/Index.h b/src/index/Index.h index a0ccbd8c42..ce0426b9cc 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -124,7 +124,12 @@ class Index { [[nodiscard]] std::optional idToOptionalString( WordVocabIndex id) const; - bool getId(const std::string& element, Id* id) const; + std::optional getId( + const ad_utility::triple_component::LiteralOrIri& element) const; + std::optional getId( + const ad_utility::triple_component::Literal& element) const; + std::optional getId( + const ad_utility::triple_component::Iri& element) const; [[nodiscard]] Vocab::PrefixRanges prefixRanges(std::string_view prefix) const; diff --git a/src/index/IndexBuilderTypes.h b/src/index/IndexBuilderTypes.h index d9645138ca..bdbbde28bc 100644 --- a/src/index/IndexBuilderTypes.h +++ b/src/index/IndexBuilderTypes.h @@ -13,6 +13,7 @@ #include "global/Id.h" #include "index/ConstantsIndexBuilding.h" #include "index/StringSortComparator.h" +#include "parser/TripleComponent.h" #include "util/Conversions.h" #include "util/HashMap.h" #include "util/Serializer/Serializer.h" @@ -22,11 +23,11 @@ // An IRI or a literal together with the information, whether it should be part // of the external vocabulary struct PossiblyExternalizedIriOrLiteral { - PossiblyExternalizedIriOrLiteral(std::string iriOrLiteral, + PossiblyExternalizedIriOrLiteral(TripleComponent iriOrLiteral, bool isExternal = false) : iriOrLiteral_{std::move(iriOrLiteral)}, isExternal_{isExternal} {} PossiblyExternalizedIriOrLiteral() = default; - std::string iriOrLiteral_; + TripleComponent iriOrLiteral_; bool isExternal_ = false; AD_SERIALIZE_FRIEND_FUNCTION(PossiblyExternalizedIriOrLiteral) { @@ -155,17 +156,19 @@ struct alignas(256) ItemMapManager { const auto& key = std::get(keyOrId); auto& map = map_.map_; auto& buffer = map_.buffer_; - auto it = map.find(key.iriOrLiteral_); + auto repr = key.iriOrLiteral_.toRdfLiteral(); + auto it = map.find(repr); if (it == map.end()) { uint64_t res = map.size() + minId_; // We have to first add the string to the buffer, otherwise we don't have // a persistent `string_view` to add to the `map`. - auto keyView = buffer.addString(key.iriOrLiteral_); + auto keyView = buffer.addString(repr); + // TODO The LocalVocabIndexAndSplitVal should work on + // `Literal|Iri|BlankNode` directly. map.try_emplace( keyView, LocalVocabIndexAndSplitVal{ res, comparator_->extractAndTransformComparableNonOwning( - key.iriOrLiteral_, - TripleComponentComparator::Level::TOTAL, + repr, TripleComponentComparator::Level::TOTAL, key.isExternal_, &buffer.charAllocator())}); return Id::makeFromVocabIndex(VocabIndex::make(res)); } else { @@ -236,7 +239,8 @@ auto getIdMapLambdas( // The LANGUAGE_PREDICATE gets the first ID in each map. TODO // This is not necessary for the actual QLever code, but certain unit tests // currently fail without it. - itemArray[j]->getId(LANGUAGE_PREDICATE); + itemArray[j]->getId(TripleComponent{ + ad_utility::triple_component::Iri::fromIriref(LANGUAGE_PREDICATE)}); } using OptionalIds = std::array>, 3>; @@ -257,14 +261,14 @@ auto getIdMapLambdas( if (!lt.langtag_.empty()) { // the object of the triple was a literal // with a language tag // get the Id for the corresponding langtag Entity - auto langTagId = - map.getId(ad_utility::convertLangtagToEntityUri(lt.langtag_)); + auto langTagId = map.getId(TripleComponent{ + ad_utility::convertLangtagToEntityUri(lt.langtag_)}); // get the Id for the tagged predicate, e.g. @en@rdfs:label - auto langTaggedPredId = - map.getId(ad_utility::convertToLanguageTaggedPredicate( - std::get(lt.triple_[1]) - .iriOrLiteral_, - lt.langtag_)); + const auto& iri = + std::get(lt.triple_[1]) + .iriOrLiteral_.getIri(); + auto langTaggedPredId = map.getId(TripleComponent{ + ad_utility::convertToLanguageTaggedPredicate(iri, lt.langtag_)}); auto& spoIds = *res[0]; // ids of original triple // TODO replace the std::array by an explicit IdTriple class, // then the emplace calls don't need the explicit type. @@ -273,7 +277,11 @@ auto getIdMapLambdas( std::array{spoIds[0], langTaggedPredId, spoIds[2]}); // extra triple ql:language-tag <@language> res[2].emplace(std::array{ - spoIds[2], map.getId(LANGUAGE_PREDICATE), langTagId}); + spoIds[2], + map.getId( + TripleComponent{ad_utility::triple_component::Iri::fromIriref( + LANGUAGE_PREDICATE)}), + langTagId}); } return res; }; diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 8a21b4ee1c..db8633f9bf 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -936,7 +936,13 @@ LangtagAndTriple IndexImpl::tripleToInternalRepresentation( LangtagAndTriple result{"", {}}; auto& resultTriple = result.triple_; resultTriple[0] = std::move(triple.subject_); - resultTriple[1] = std::move(triple.predicate_); + resultTriple[1] = TripleComponent{std::move(triple.predicate_)}; + if (triple.object_.isLiteral()) { + const auto& lit = triple.object_.getLiteral(); + if (lit.hasLanguageTag()) { + result.langtag_ = std::string(asStringViewUnsafe(lit.getLanguageTag())); + } + } // If the object of the triple can be directly folded into an ID, do so. Note // that the actual folding is done by the `TripleComponent`. @@ -948,7 +954,7 @@ LangtagAndTriple IndexImpl::tripleToInternalRepresentation( resultTriple[2] = idIfNotString.value(); } else { // `toRdfLiteral` handles literals as well as IRIs correctly. - resultTriple[2] = std::move(triple.object_).toRdfLiteral(); + resultTriple[2] = std::move(triple.object_); } for (size_t i = 0; i < 3; ++i) { @@ -958,15 +964,13 @@ LangtagAndTriple IndexImpl::tripleToInternalRepresentation( continue; } auto& component = std::get(el); - auto& iriOrLiteral = component.iriOrLiteral_; - iriOrLiteral = vocab_.getLocaleManager().normalizeUtf8(iriOrLiteral); - if (vocab_.shouldBeExternalized(iriOrLiteral)) { + const auto& iriOrLiteral = component.iriOrLiteral_; + // TODO Perform this normalization right at the beginning of the + // parsing. iriOrLiteral = + // vocab_.getLocaleManager().normalizeUtf8(iriOrLiteral); + if (vocab_.shouldBeExternalized(iriOrLiteral.toRdfLiteral())) { component.isExternal_ = true; } - // Only the third element (the object) might contain a language tag. - if (i == 2 && isLiteral(iriOrLiteral)) { - result.langtag_ = decltype(vocab_)::getLanguage(iriOrLiteral); - } } return result; } @@ -1288,13 +1292,32 @@ std::optional IndexImpl::idToOptionalString(WordVocabIndex id) const { } // ___________________________________________________________________________ -bool IndexImpl::getId(const string& element, Id* id) const { - // TODO we should parse doubles correctly in the SparqlParser and - // then return the correct ids here or somewhere else. - VocabIndex vocabId; - auto success = getVocab().getId(element, &vocabId); - *id = Id::makeFromVocabIndex(vocabId); - return success; +std::optional IndexImpl::getIdImpl(const auto& element) const { + VocabIndex vocabIndex; + auto success = + getVocab().getId(element.toStringRepresentation(), &vocabIndex); + if (!success) { + return std::nullopt; + } + return Id::makeFromVocabIndex(vocabIndex); +} + +// ___________________________________________________________________________ +std::optional IndexImpl::getId( + const ad_utility::triple_component::LiteralOrIri& element) const { + return getIdImpl(element); +} + +// ___________________________________________________________________________ +std::optional IndexImpl::getId( + const ad_utility::triple_component::Literal& element) const { + return getIdImpl(element); +} + +// ___________________________________________________________________________ +std::optional IndexImpl::getId( + const ad_utility::triple_component::Iri& element) const { + return getIdImpl(element); } // ___________________________________________________________________________ diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index edd2e604a9..d8622f450f 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -260,8 +260,18 @@ class IndexImpl { std::optional idToOptionalString(WordVocabIndex id) const; + private: + // ___________________________________________________________________________ + std::optional getIdImpl(const auto& element) const; + + public: // ___________________________________________________________________________ - bool getId(const string& element, Id* id) const; + std::optional getId( + const ad_utility::triple_component::LiteralOrIri& element) const; + std::optional getId( + const ad_utility::triple_component::Literal& element) const; + std::optional getId( + const ad_utility::triple_component::Iri& element) const; // ___________________________________________________________________________ Index::Vocab::PrefixRanges prefixRanges(std::string_view prefix) const; @@ -654,10 +664,13 @@ class IndexImpl { std::vector> ignoredRanges; ignoredRanges.emplace_back(qlever::getBoundsForSpecialIds()); - auto literalRanges = getVocab().prefixRanges("\""); - auto taggedPredicatesRanges = getVocab().prefixRanges("@"); - auto internalEntitiesRanges = - getVocab().prefixRanges(INTERNAL_ENTITIES_URI_PREFIX); + auto literalRanges = + getVocab().prefixRanges(ad_utility::triple_component::literalPrefix); + auto taggedPredicatesRanges = + getVocab().prefixRanges(ad_utility::languageTaggedPredicatePrefix); + auto internal = INTERNAL_ENTITIES_URI_PREFIX; + internal[0] = ad_utility::triple_component::iriPrefixChar; + auto internalEntitiesRanges = getVocab().prefixRanges(internal); auto pushIgnoredRange = [&ignoredRanges](const auto& ranges) { for (const auto& range : ranges.ranges()) { diff --git a/src/index/Vocabulary.cpp b/src/index/Vocabulary.cpp index f64ef4daae..af4f37ea9c 100644 --- a/src/index/Vocabulary.cpp +++ b/src/index/Vocabulary.cpp @@ -93,13 +93,13 @@ void Vocabulary::createFromSet( // _____________________________________________________________________________ template -bool Vocabulary::stringIsLiteral(const string& s) { +bool Vocabulary::stringIsLiteral(std::string_view s) { return s.starts_with('"'); } // _____________________________________________________________________________ template -bool Vocabulary::shouldBeExternalized(const string& s) const { +bool Vocabulary::shouldBeExternalized(string_view s) const { // TODO Completely refactor the Vocabulary on the different // Types, it is a mess. @@ -118,7 +118,8 @@ bool Vocabulary::shouldBeExternalized(const string& s) const { // ___________________________________________________________________ template -bool Vocabulary::shouldEntityBeExternalized(const string& word) const { +bool Vocabulary::shouldEntityBeExternalized( + std::string_view word) const { // Never externalize the internal IRIs as they are sometimes added before or // after the externalization happens and we thus get inconsistent behavior // etc. for `ql:langtag`. @@ -144,7 +145,7 @@ bool Vocabulary::shouldEntityBeExternalized(const string& word) const { // ___________________________________________________________________ template bool Vocabulary::shouldLiteralBeExternalized( - const string& word) const { + std::string_view word) const { for (const auto& p : externalizedPrefixes_) { if (word.starts_with(p)) { return true; @@ -155,7 +156,7 @@ bool Vocabulary::shouldLiteralBeExternalized( return true; } - const string lang = getLanguage(word); + const std::string_view lang = getLanguage(word); if (lang == "") { return false; } @@ -169,7 +170,7 @@ bool Vocabulary::shouldLiteralBeExternalized( } // _____________________________________________________________________________ template -string Vocabulary::getLanguage(const string& literal) { +std::string_view Vocabulary::getLanguage(std::string_view literal) { auto lioAt = literal.rfind('@'); if (lioAt != string::npos) { auto lioQ = literal.rfind('\"'); @@ -231,7 +232,7 @@ auto Vocabulary::upper_bound(const string& word, // _____________________________________________________________________________ template -auto Vocabulary::lower_bound(const string& word, +auto Vocabulary::lower_bound(std::string_view word, const SortLevel level) const -> IndexType { return IndexType::make(internalVocabulary_.lower_bound(word, level)._index); @@ -258,7 +259,7 @@ AccessReturnType_t Vocabulary::at( // _____________________________________________________________________________ template -bool Vocabulary::getId(const string& word, IndexType* idx) const { +bool Vocabulary::getId(std::string_view word, IndexType* idx) const { if (!shouldBeExternalized(word)) { // need the TOTAL level because we want the unique word. *idx = lower_bound(word, SortLevel::TOTAL); diff --git a/src/index/Vocabulary.h b/src/index/Vocabulary.h index 94d7120d1f..96940ea265 100644 --- a/src/index/Vocabulary.h +++ b/src/index/Vocabulary.h @@ -171,7 +171,7 @@ class Vocabulary { //! Get an Id from the vocabulary for some "normal" word. //! Return value signals if something was found at all. - bool getId(const string& word, IndexType* idx) const; + bool getId(std::string_view word, IndexType* idx) const; // Get the index range for the given prefix or `std::nullopt` if no word with // the given prefix exists in the vocabulary. @@ -190,20 +190,20 @@ class Vocabulary { void createFromSet(const ad_utility::HashSet& set, const std::string& filename); - static bool stringIsLiteral(const string& s); + static bool stringIsLiteral(std::string_view s); bool isIri(IndexT index) const { return prefixRangesIris_.contain(index); } bool isLiteral(IndexT index) const { return prefixRangesLiterals_.contain(index); } - bool shouldBeExternalized(const string& word) const; + bool shouldBeExternalized(std::string_view word) const; - bool shouldEntityBeExternalized(const string& word) const; + bool shouldEntityBeExternalized(std::string_view word) const; - bool shouldLiteralBeExternalized(const string& word) const; + bool shouldLiteralBeExternalized(std::string_view word) const; - static string getLanguage(const string& literal); + static string_view getLanguage(std::string_view literal); // set the list of prefixes for words which will become part of the // externalized vocabulary. Good for entity names that normally don't appear @@ -240,7 +240,7 @@ class Vocabulary { } // Wraps std::lower_bound and returns an index instead of an iterator - IndexType lower_bound(const string& word, + IndexType lower_bound(std::string_view word, const SortLevel level = SortLevel::QUARTERNARY) const; // _______________________________________________________________ diff --git a/src/index/VocabularyMerger.h b/src/index/VocabularyMerger.h index 3abe63fc7d..e718e361f2 100644 --- a/src/index/VocabularyMerger.h +++ b/src/index/VocabularyMerger.h @@ -72,7 +72,8 @@ struct VocabularyMetaData { // The number of distinct blank nodes that were found and immediately // converted to an ID without becoming part of the vocabulary. size_t numBlankNodesTotal_ = 0; - IdRangeForPrefix langTaggedPredicates_{"@"}; + IdRangeForPrefix langTaggedPredicates_{ + std::string{ad_utility::languageTaggedPredicatePrefix}}; IdRangeForPrefix internalEntities_{INTERNAL_ENTITIES_URI_PREFIX}; // Return true iff the `id` belongs to one of the two ranges that contain diff --git a/src/parser/Iri.cpp b/src/parser/Iri.cpp index 8bdadcb856..0fd8254f89 100644 --- a/src/parser/Iri.cpp +++ b/src/parser/Iri.cpp @@ -6,28 +6,49 @@ #include +#include "parser/LiteralOrIri.h" #include "util/StringUtils.h" namespace ad_utility::triple_component { // __________________________________________ -Iri::Iri(NormalizedString iri) : iri_{std::move(iri)} {} +Iri::Iri(std::string iri) : iri_{std::move(iri)} {} // __________________________________________ Iri::Iri(const Iri& prefix, NormalizedStringView suffix) - : iri_{NormalizedString{prefix.getContent()} + suffix} {}; + : iri_{absl::StrCat("<"sv, asStringViewUnsafe(prefix.getContent()), + asStringViewUnsafe(suffix), ">"sv)} {}; // __________________________________________ -NormalizedStringView Iri::getContent() const { return iri_; } +NormalizedStringView Iri::getContent() const { + return asNormalizedStringViewUnsafe(iri_).substr(1, iri_.size() - 2); +} + +// __________________________________________ +Iri Iri::fromIriref(std::string_view stringWithBrackets) { + auto first = stringWithBrackets.find('<'); + AD_CORRECTNESS_CHECK(first != std::string_view::npos); + return Iri{ + absl::StrCat(stringWithBrackets.substr(0, first + 1), + asStringViewUnsafe(RdfEscaping::normalizeIriWithBrackets( + stringWithBrackets.substr(first))), + ">"sv)}; +} // __________________________________________ -Iri Iri::iriref(std::string_view stringWithBrackets) { - return Iri{RdfEscaping::normalizeIriWithBrackets(stringWithBrackets)}; +Iri Iri::fromPrefixAndSuffix(const Iri& prefix, std::string_view suffix) { + auto suffixNormalized = RdfEscaping::unescapePrefixedIri(suffix); + return Iri{prefix, asNormalizedStringViewUnsafe(suffixNormalized)}; } // __________________________________________ -Iri Iri::prefixed(const Iri& prefix, std::string_view suffix) { - return Iri{std::move(prefix), - RdfEscaping::normalizeIriWithoutBrackets(suffix)}; +Iri Iri::fromStringRepresentation(std::string s) { + AD_CORRECTNESS_CHECK(s.starts_with("<") || s.starts_with("@")); + return Iri{std::move(s)}; } +// __________________________________________ +const std::string& Iri::toStringRepresentation() const { return iri_; } +// __________________________________________ +std::string& Iri::toStringRepresentation() { return iri_; } + } // namespace ad_utility::triple_component diff --git a/src/parser/Iri.h b/src/parser/Iri.h index 07197ba10c..69f3ee8085 100644 --- a/src/parser/Iri.h +++ b/src/parser/Iri.h @@ -10,29 +10,37 @@ namespace ad_utility::triple_component { -// A class to hold IRIs. It does not store the leading or trailing -// angled bracket. -// -// E.g. For the input "", -// only "http://example.org/books/book1" is to be stored in the iri_ variable. +// A class to hold IRIs. class Iri { private: - // Store the string value of the IRI without any leading or trailing angled + // Store the string value of the IRI including the angle brackets. // brackets. - NormalizedString iri_; + std::string iri_; // Create a new iri object - explicit Iri(NormalizedString iri); + explicit Iri(std::string iri); // Create a new iri using a prefix Iri(const Iri& prefix, NormalizedStringView suffix); public: + // A default constructed IRI is empty. + Iri() = default; + template + friend H AbslHashValue(H h, const std::same_as auto& iri) { + return H::combine(std::move(h), iri.iri_); + } + bool operator==(const Iri&) const = default; + static Iri fromStringRepresentation(std::string s); + + const std::string& toStringRepresentation() const; + std::string& toStringRepresentation(); + // Create a new iri given an iri with brackets - static Iri iriref(std::string_view stringWithBrackets); + static Iri fromIriref(std::string_view stringWithBrackets); // Create a new iri given a prefix iri and its suffix - static Iri prefixed(const Iri& prefix, std::string_view suffix); + static Iri fromPrefixAndSuffix(const Iri& prefix, std::string_view suffix); // Return the string value of the iri object without any leading or trailing // angled brackets. diff --git a/src/parser/Literal.cpp b/src/parser/Literal.cpp index 055aced2c6..dca504da59 100644 --- a/src/parser/Literal.cpp +++ b/src/parser/Literal.cpp @@ -7,37 +7,45 @@ #include #include -namespace ad_utility::triple_component { -// __________________________________________ -Literal::Literal(NormalizedString content) : content_{std::move(content)} {} +#include "parser/LiteralOrIri.h" -// __________________________________________ -Literal::Literal(NormalizedString content, Iri datatype) - : content_{std::move(content)}, descriptor_{std::move(datatype)} {} +static constexpr char quote{'"'}; +static constexpr char at{'@'}; +static constexpr char hat{'^'}; +namespace ad_utility::triple_component { // __________________________________________ -Literal::Literal(NormalizedString content, NormalizedString languageTag) - : content_{std::move(content)}, descriptor_{std::move(languageTag)} {} +Literal::Literal(std::string content, size_t beginOfSuffix) + : content_{std::move(content)}, beginOfSuffix_{beginOfSuffix} { + AD_CORRECTNESS_CHECK(content_.starts_with(quote)); + AD_CORRECTNESS_CHECK(beginOfSuffix_ >= 2); + AD_CORRECTNESS_CHECK(content_[beginOfSuffix_ - 1] == quote); + AD_CORRECTNESS_CHECK(beginOfSuffix_ == content_.size() || + content_[beginOfSuffix] == at || + content_[beginOfSuffix] == hat); +} // __________________________________________ -bool Literal::hasLanguageTag() const { - return std::holds_alternative(descriptor_); -} +bool Literal::hasLanguageTag() const { return getSuffix().starts_with(at); } // __________________________________________ -bool Literal::hasDatatype() const { - return std::holds_alternative(descriptor_); -} +bool Literal::hasDatatype() const { return getSuffix().starts_with(hat); } // __________________________________________ -NormalizedStringView Literal::getContent() const { return content_; } +NormalizedStringView Literal::getContent() const { + return content().substr(1, beginOfSuffix_ - 2); +} // __________________________________________ -Iri Literal::getDatatype() const { +NormalizedStringView Literal::getDatatype() const { if (!hasDatatype()) { AD_THROW("The literal does not have an explicit datatype."); } - return std::get(descriptor_); + // We don't return the enclosing + NormalizedStringView result = content(); + result.remove_prefix(beginOfSuffix_ + 3); + result.remove_suffix(1); + return result; } // __________________________________________ @@ -45,13 +53,13 @@ NormalizedStringView Literal::getLanguageTag() const { if (!hasLanguageTag()) { AD_THROW("The literal does not have an explicit language tag."); } - return std::get(descriptor_); + return content().substr(beginOfSuffix_ + 1); } // __________________________________________ -Literal Literal::literalWithQuotes( +Literal Literal::fromEscapedRdfLiteral( std::string_view rdfContentWithQuotes, - std::optional> descriptor) { + std::optional> descriptor) { NormalizedString content = RdfEscaping::normalizeLiteralWithQuotes(rdfContentWithQuotes); @@ -72,24 +80,59 @@ Literal Literal::literalWithoutQuotes( Literal Literal::literalWithNormalizedContent( NormalizedString normalizedRdfContent, std::optional> descriptor) { + auto quotes = "\""sv; + auto actualContent = + absl::StrCat(quotes, asStringViewUnsafe(normalizedRdfContent), quotes); + auto sz = actualContent.size(); + auto literal = Literal{std::move(actualContent), sz}; if (!descriptor.has_value()) { - return Literal(std::move(normalizedRdfContent)); + return literal; } using namespace RdfEscaping; - auto visitLanguageTag = - [&normalizedRdfContent](std::string&& languageTag) -> Literal { - return {std::move(normalizedRdfContent), - normalizeLanguageTag(std::move(languageTag))}; + auto visitLanguageTag = [&literal](std::string_view languageTag) { + literal.addLanguageTag(languageTag); }; - auto visitDatatype = [&normalizedRdfContent](Iri&& datatype) -> Literal { - return {std::move(normalizedRdfContent), std::move(datatype)}; + auto visitDatatype = [&literal](const Iri& datatype) { + literal.addDatatype(datatype); }; - return std::visit( - ad_utility::OverloadCallOperator{visitDatatype, visitLanguageTag}, - std::move(descriptor.value())); + std::visit(ad_utility::OverloadCallOperator{visitDatatype, visitLanguageTag}, + std::move(descriptor.value())); + return literal; +} + +// __________________________________________ +void Literal::addLanguageTag(std::string_view languageTag) { + AD_CORRECTNESS_CHECK(!hasDatatype() && !hasLanguageTag()); + if (languageTag.starts_with('@')) { + absl::StrAppend(&content_, languageTag); + } else { + absl::StrAppend(&content_, "@"sv, languageTag); + } +} + +// __________________________________________ +void Literal::addDatatype(const Iri& datatype) { + AD_CORRECTNESS_CHECK(!hasDatatype() && !hasLanguageTag()); + absl::StrAppend(&content_, "^^"sv, datatype.toStringRepresentation()); +} + +// __________________________________________ +const std::string& Literal::toStringRepresentation() const { return content_; } + +// __________________________________________ +std::string& Literal::toStringRepresentation() { return content_; } + +// __________________________________________ +Literal Literal::fromStringRepresentation(std::string internal) { + // TODO This is a little dangerous as there might be quotes in the + // IRI which might lead to unexpected results here. + AD_CORRECTNESS_CHECK(internal.starts_with('"')); + auto endIdx = internal.rfind('"'); + AD_CORRECTNESS_CHECK(endIdx > 0); + return Literal{std::move(internal), endIdx + 1}; } } // namespace ad_utility::triple_component diff --git a/src/parser/Literal.h b/src/parser/Literal.h index 3b79a54b78..65ac735c86 100644 --- a/src/parser/Literal.h +++ b/src/parser/Literal.h @@ -11,36 +11,50 @@ namespace ad_utility::triple_component { // A class to hold literal values. class Literal { private: - // Store the string value of the literal without the surrounding quotation - // marks or trailing descriptor. - // "Hello World"@en -> Hello World - NormalizedString content_; - - using LiteralDescriptorVariant = - std::variant; - - // Store the optional language tag or the optional datatype if applicable - // without their prefixes. - // "Hello World"@en -> en - // "Hello World"^^test:type -> test:type - LiteralDescriptorVariant descriptor_; + // Store the normalized version of the literal, including possible datatypes + // and descriptors. + // For example `"Hello World"@en` or `"With"Quote"^^` (note + // that the quote in the middle is unescaped because this is the normalized + // form that QLever stores. + std::string content_; + // The position after the closing `"`, so either the size of the string, or + // the position of the `@` or `^^` for literals with language tags or + // datatypes. + std::size_t beginOfSuffix_; // Create a new literal without any descriptor - explicit Literal(NormalizedString content); - - // Create a new literal with a datatype - Literal(NormalizedString content, Iri datatype); + explicit Literal(std::string content, size_t beginOfSuffix_); - // Create a new literal with a language tag - Literal(NormalizedString content, NormalizedString languageTag); - - // Similar to `literalWithQuotes`, except the rdfContent is expected to + // Similar to `fromEscapedRdfLiteral`, except the rdfContent is expected to // already be normalized static Literal literalWithNormalizedContent( NormalizedString normalizedRdfContent, - std::optional> descriptor = std::nullopt); + std::optional> descriptor = std::nullopt); + + // Internal helper function. Return either the empty string (for a plain + // literal), `@langtag` or `^^`. + std::string_view getSuffix() const { + std::string_view result = content_; + result.remove_prefix(beginOfSuffix_); + return result; + } + + NormalizedStringView content() const { + return asNormalizedStringViewUnsafe(content_); + } public: + template + friend H AbslHashValue(H h, const std::same_as auto& literal) { + return H::combine(std::move(h), literal.content_); + } + bool operator==(const Literal&) const = default; + + const std::string& toStringRepresentation() const; + std::string& toStringRepresentation(); + + static Literal fromStringRepresentation(std::string internal); + // Return true if the literal has an assigned language tag bool hasLanguageTag() const; @@ -57,18 +71,21 @@ class Literal { // Return the datatype of the literal, if available, without leading ^^ // prefix. Throws an exception if the literal has no datatype. - Iri getDatatype() const; + NormalizedStringView getDatatype() const; // For documentation, see documentation of function - // LiteralORIri::literalWithQuotes - static Literal literalWithQuotes( + // LiteralORIri::fromEscapedRdfLiteral + static Literal fromEscapedRdfLiteral( std::string_view rdfContentWithQuotes, - std::optional> descriptor = std::nullopt); + std::optional> descriptor = std::nullopt); + + void addLanguageTag(std::string_view languageTag); + void addDatatype(const Iri& datatype); // For documentation, see documentation of function // LiteralORIri::literalWithoutQuotes static Literal literalWithoutQuotes( std::string_view rdfContentWithoutQuotes, - std::optional> descriptor = std::nullopt); + std::optional> descriptor = std::nullopt); }; } // namespace ad_utility::triple_component diff --git a/src/parser/LiteralOrIri.cpp b/src/parser/LiteralOrIri.cpp index c7d0878928..3fb41d3eb2 100644 --- a/src/parser/LiteralOrIri.cpp +++ b/src/parser/LiteralOrIri.cpp @@ -66,7 +66,9 @@ NormalizedStringView LiteralOrIri::getLanguageTag() const { } // __________________________________________ -Iri LiteralOrIri::getDatatype() const { return getLiteral().getDatatype(); } +NormalizedStringView LiteralOrIri::getDatatype() const { + return getLiteral().getDatatype(); +} // __________________________________________ NormalizedStringView LiteralOrIri::getContent() const { @@ -80,21 +82,21 @@ NormalizedStringView LiteralOrIri::getContent() const { // __________________________________________ LiteralOrIri LiteralOrIri::iriref(const std::string& stringWithBrackets) { - return LiteralOrIri{Iri::iriref(stringWithBrackets)}; + return LiteralOrIri{Iri::fromIriref(stringWithBrackets)}; } // __________________________________________ LiteralOrIri LiteralOrIri::prefixedIri(const Iri& prefix, std::string_view suffix) { - return LiteralOrIri{Iri::prefixed(prefix, suffix)}; + return LiteralOrIri{Iri::fromPrefixAndSuffix(prefix, suffix)}; } // __________________________________________ LiteralOrIri LiteralOrIri::literalWithQuotes( std::string_view rdfContentWithQuotes, std::optional> descriptor) { - return LiteralOrIri( - Literal::literalWithQuotes(rdfContentWithQuotes, std::move(descriptor))); + return LiteralOrIri(Literal::fromEscapedRdfLiteral(rdfContentWithQuotes, + std::move(descriptor))); } // __________________________________________ diff --git a/src/parser/LiteralOrIri.h b/src/parser/LiteralOrIri.h index 301d7b2183..ba34b7c8e9 100644 --- a/src/parser/LiteralOrIri.h +++ b/src/parser/LiteralOrIri.h @@ -4,18 +4,26 @@ #pragma once +#include + #include #include "parser/Iri.h" #include "parser/Literal.h" +#include "util/Exception.h" namespace ad_utility::triple_component { +static constexpr char literalPrefixChar = '"'; +static constexpr char iriPrefixChar = '<'; +static constexpr std::string_view iriPrefix{&iriPrefixChar, 1}; +static constexpr std::string_view literalPrefix{&literalPrefixChar, 1}; // A wrapper class that can contain either an Iri or a Literal object. class LiteralOrIri { private: using LiteralOrIriVariant = std::variant; LiteralOrIriVariant data_; + public: // Return contained Iri object if available, throw exception otherwise const Iri& getIri() const; @@ -23,13 +31,36 @@ class LiteralOrIri { // otherwise const Literal& getLiteral() const; - public: // Create a new LiteralOrIri based on a Literal object explicit LiteralOrIri(Literal literal); // Create a new LiteralOrIri based on an Iri object explicit LiteralOrIri(Iri iri); + const std::string& toStringRepresentation() const { + auto impl = [](const auto& val) -> decltype(auto) { + return val.toStringRepresentation(); + }; + return std::visit(impl, data_); + } + + static LiteralOrIri fromStringRepresentation(std::string internal) { + char tag = internal.front(); + if (tag == iriPrefixChar) { + return LiteralOrIri{Iri::fromStringRepresentation(std::move(internal))}; + } else { + AD_CORRECTNESS_CHECK(tag == literalPrefixChar); + return LiteralOrIri{ + Literal::fromStringRepresentation(std::move(internal))}; + } + } + template + friend H AbslHashValue(H h, + const std::same_as auto& literalOrIri) { + return H::combine(std::move(h), literalOrIri.data_); + } + bool operator==(const LiteralOrIri&) const = default; + // Return true if object contains an Iri object bool isIri() const; @@ -60,7 +91,7 @@ class LiteralOrIri { // Return the datatype of the contained Literal without "^^" prefix. // Throw exception if no Literal object is contained or object has no // datatype. - Iri getDatatype() const; + NormalizedStringView getDatatype() const; // Return the content of the contained Iri, or the contained Literal NormalizedStringView getContent() const; @@ -77,13 +108,13 @@ class LiteralOrIri { // without any descriptor. static LiteralOrIri literalWithQuotes( std::string_view rdfContentWithQuotes, - std::optional> descriptor = std::nullopt); + std::optional> descriptor = std::nullopt); - // Similar to `literalWithQuotes`, except the rdfContent is expected to NOT BE - // surrounded by quotation marks. + // Similar to `fromEscapedRdfLiteral`, except the rdfContent is expected to + // NOT BE surrounded by quotation marks. static LiteralOrIri literalWithoutQuotes( std::string_view rdfContentWithoutQuotes, - std::optional> descriptor = std::nullopt); + std::optional> descriptor = std::nullopt); // Create a new iri given an iri with surrounding brackets static LiteralOrIri iriref(const std::string& stringWithBrackets); diff --git a/src/parser/NormalizedString.h b/src/parser/NormalizedString.h index ead416ae09..c5ddad5b23 100644 --- a/src/parser/NormalizedString.h +++ b/src/parser/NormalizedString.h @@ -9,6 +9,7 @@ struct NormalizedChar { char c_; + auto operator<=>(const NormalizedChar&) const = default; }; // A bespoke string representation that ensures the content @@ -24,3 +25,7 @@ inline std::string_view asStringViewUnsafe( return {reinterpret_cast(normalizedStringView.data()), normalizedStringView.size()}; } +inline NormalizedStringView asNormalizedStringViewUnsafe( + std::string_view input) { + return {reinterpret_cast(input.data()), input.size()}; +} diff --git a/src/parser/ParsedQuery.cpp b/src/parser/ParsedQuery.cpp index 03b415adf7..869edf503b 100644 --- a/src/parser/ParsedQuery.cpp +++ b/src/parser/ParsedQuery.cpp @@ -388,9 +388,8 @@ void ParsedQuery::GraphPattern::recomputeIds(size_t* id_count) { ParsedQuery::GraphPattern::GraphPattern() : _optional(false) {} // __________________________________________________________________________ -void ParsedQuery::GraphPattern::addLanguageFilter( - const Variable& variable, const std::string& languageInQuotes) { - auto langTag = languageInQuotes.substr(1, languageInQuotes.size() - 2); +void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable, + const std::string& langTag) { // Find all triples where the object is the `variable` and the predicate is // a simple `IRIREF` (neither a variable nor a complex property path). // Search in all the basic graph patterns, as filters have the complete @@ -418,7 +417,8 @@ void ParsedQuery::GraphPattern::addLanguageFilter( // Replace all the matching triples. for (auto* triplePtr : matchingTriples) { - triplePtr->p_._iri = '@' + langTag + '@' + triplePtr->p_._iri; + triplePtr->p_._iri = ad_utility::convertToLanguageTaggedPredicate( + triplePtr->p_._iri, langTag); } // Handle the case, that no suitable triple (see above) was found. In this diff --git a/src/parser/ParsedQuery.h b/src/parser/ParsedQuery.h index d3e0a802e6..d0ff26d53f 100644 --- a/src/parser/ParsedQuery.h +++ b/src/parser/ParsedQuery.h @@ -110,8 +110,10 @@ class SparqlTriple : public SparqlTripleBase { // actually is a property path. SparqlTripleSimple getSimple() const { AD_CONTRACT_CHECK(p_.isIri()); - TripleComponent p = isVariable(p_._iri) ? TripleComponent{Variable{p_._iri}} - : TripleComponent(p_._iri); + TripleComponent p = + isVariable(p_._iri) + ? TripleComponent{Variable{p_._iri}} + : TripleComponent(TripleComponent::Iri::fromIriref(p_._iri)); return {s_, p, o_, additionalScanColumns_}; } }; diff --git a/src/parser/TripleComponent.cpp b/src/parser/TripleComponent.cpp index 9309c2bc18..4b77bbb198 100644 --- a/src/parser/TripleComponent.cpp +++ b/src/parser/TripleComponent.cpp @@ -17,7 +17,9 @@ std::ostream& operator<<(std::ostream& stream, const TripleComponent& obj) { } else if constexpr (std::is_same_v) { stream << "UNDEF"; } else if constexpr (std::is_same_v) { - stream << value.rawContent(); + stream << value.toStringRepresentation(); + } else if constexpr (std::is_same_v) { + stream << value.toStringRepresentation(); } else if constexpr (std::is_same_v) { stream << "DATE: " << value.toStringAndType().first; } else if constexpr (std::is_same_v) { @@ -37,26 +39,11 @@ std::ostream& operator<<(std::ostream& stream, const TripleComponent& obj) { return std::move(stream).str(); } -// ____________________________________________________________________________ -TripleComponent::Literal::Literal( - const RdfEscaping::NormalizedRDFString& literal, - std::string_view langtagOrDatatype) { - const std::string& l = literal.get(); - AD_CORRECTNESS_CHECK(l.starts_with('"') && l.ends_with('"') && l.size() >= 2); - // TODO there also should be a strong type for the - // `langtagOrDatatype`. - AD_CONTRACT_CHECK(langtagOrDatatype.empty() || - langtagOrDatatype.starts_with('@') || - langtagOrDatatype.starts_with("^^")); - content_ = absl::StrCat(l, langtagOrDatatype); - startOfDatatype_ = l.size(); -} - // ____________________________________________________________________________ std::optional TripleComponent::toValueIdIfNotString() const { auto visitor = [](const T& value) -> std::optional { if constexpr (std::is_same_v || - std::is_same_v) { + std::is_same_v || std::is_same_v) { return std::nullopt; } else if constexpr (std::is_same_v) { return Id::makeFromInt(value); @@ -85,7 +72,9 @@ std::string TripleComponent::toRdfLiteral() const { } else if (isString()) { return getString(); } else if (isLiteral()) { - return getLiteral().rawContent(); + return getLiteral().toStringRepresentation(); + } else if (isIri()) { + return getIri().toStringRepresentation(); } else { auto [value, type] = ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue( diff --git a/src/parser/TripleComponent.h b/src/parser/TripleComponent.h index d807e752c7..474a65f18e 100644 --- a/src/parser/TripleComponent.h +++ b/src/parser/TripleComponent.h @@ -15,6 +15,7 @@ #include "global/Constants.h" #include "global/Id.h" #include "global/SpecialIds.h" +#include "parser/LiteralOrIri.h" #include "parser/RdfEscaping.h" #include "parser/data/Variable.h" #include "util/Date.h" @@ -28,6 +29,8 @@ /// of any other type). class TripleComponent { public: + using Literal = ad_utility::triple_component::Literal; + using Iri = ad_utility::triple_component::Iri; // Own class for the UNDEF value. struct UNDEF { // Default equality operator. @@ -40,53 +43,10 @@ class TripleComponent { } }; - // A class that stores a normalized RDF literal together with its datatype or - // language tag. - struct Literal { - private: - // The underlying storage. It consists of the normalized RDF literal - // concatenated with its datatype or language tag. - std::string content_; - // The index in the `content_` member, where the datatype or language tag - // begins. - size_t startOfDatatype_ = 0; - - public: - // Construct from a normalized literal and the (possibly empty) language tag - // or datatype. - explicit Literal(const RdfEscaping::NormalizedRDFString& literal, - std::string_view langtagOrDatatype = ""); - - // Get the literal in the form in which it is stored (the normalized literal - // concatenated with the language tag or datatype). It is only allowed to - // read the content or to move it out. That way the `Literal` can never - // become invalid via the `rawContent` method. - const std::string& rawContent() const& { return content_; } - std::string&& rawContent() && { return std::move(content_); } - - // Only get the normalized literal without the language tag or datatype. - RdfEscaping::NormalizedRDFStringView normalizedLiteralContent() const { - return RdfEscaping::NormalizedRDFStringView::make( - std::string_view{content_}.substr(0, startOfDatatype_)); - } - - // Only get the datatype or language tag. - std::string_view datatypeOrLangtag() const { - return std::string_view{content_}.substr(startOfDatatype_); - } - - // Equality and hashing are needed to store a `Literal` in a `HashMap`. - bool operator==(const Literal&) const = default; - template - friend H AbslHashValue(H h, const Literal& l) { - return H::combine(std::move(h), l.content_); - } - }; - private: // The underlying variant type. using Variant = std::variant; + Variable, Literal, Iri, DateOrLargeYear>; Variant _variant; public: @@ -177,6 +137,13 @@ class TripleComponent { } bool isLiteral() const { return std::holds_alternative(_variant); } + Literal& getLiteral() { return std::get(_variant); } + const Literal& getLiteral() const { return std::get(_variant); } + + bool isIri() const { return std::holds_alternative(_variant); } + + Iri& getIri() { return std::get(_variant); } + const Iri& getIri() const { return std::get(_variant); } bool isUndef() const { return std::holds_alternative(_variant); } @@ -201,9 +168,6 @@ class TripleComponent { } [[nodiscard]] Variable& getVariable() { return std::get(_variant); } - const Literal& getLiteral() const { return std::get(_variant); } - Literal& getLiteral() { return std::get(_variant); } - /// Convert to an RDF literal. `std::strings` will be emitted directly, /// `int64_t` is converted to a `xsd:integer` literal, and a `double` is /// converted to a `xsd:double`. @@ -224,10 +188,16 @@ class TripleComponent { template [[nodiscard]] std::optional toValueId( const Vocabulary& vocabulary) const { - if (isString() || isLiteral()) { + AD_CONTRACT_CHECK(!isString()); + if (isLiteral() || isIri()) { VocabIndex idx; - const std::string& content = - isString() ? getString() : getLiteral().rawContent(); + const std::string& content = [&]() -> const std::string& { + if (isLiteral()) { + return getLiteral().toStringRepresentation(); + } else { + return getIri().toStringRepresentation(); + } + }(); if (vocabulary.getId(content, &idx)) { return Id::makeFromVocabIndex(idx); } else if (qlever::specialIds.contains(content)) { @@ -253,11 +223,20 @@ class TripleComponent { if (!id) { // If `toValueId` could not convert to `Id`, we have a string, which we // look up in (and potentially add to) our local vocabulary. - AD_CORRECTNESS_CHECK(isString() || isLiteral()); + AD_CORRECTNESS_CHECK(isString() || isLiteral() || isIri()); + std::string& newWord = [&]() -> std::string& { + if (isString()) { + return getString(); + } else { + if (isLiteral()) { + return getLiteral().toStringRepresentation(); + } else { + return getIri().toStringRepresentation(); + } + } + }(); // NOTE: There is a `&&` version of `getIndexAndAddIfNotContained`. // Otherwise, `newWord` would be copied here despite the `std::move`. - std::string&& newWord = isString() ? std::move(getString()) - : std::move(getLiteral()).rawContent(); id = Id::makeFromLocalVocabIndex( localVocab.getIndexAndAddIfNotContained(std::move(newWord))); } diff --git a/src/parser/TurtleParser.cpp b/src/parser/TurtleParser.cpp index a6f412a4b4..bdd7dbb096 100644 --- a/src/parser/TurtleParser.cpp +++ b/src/parser/TurtleParser.cpp @@ -5,6 +5,8 @@ #include "parser/TurtleParser.h" +#include + #include #include "parser/RdfEscaping.h" @@ -32,8 +34,7 @@ bool TurtleParser::prefixID() { if (check(pnameNS()) && check(iriref()) && check(skip())) { // strip the angled brackes -> bla - prefixMap_[activePrefix_] = - stripAngleBrackets(lastParseResult_.getString()); + prefixMap_[activePrefix_] = lastParseResult_.getIri(); return true; } else { raise("Parsing @prefix definition failed"); @@ -48,7 +49,7 @@ template bool TurtleParser::base() { if (skip()) { if (iriref() && check(skip())) { - prefixMap_[""] = stripAngleBrackets(lastParseResult_.getString()); + prefixMap_[""] = lastParseResult_.getIri(); return true; } else { raise("Parsing @base definition failed"); @@ -63,8 +64,7 @@ template bool TurtleParser::sparqlPrefix() { if (skip()) { if (pnameNS() && iriref()) { - prefixMap_[activePrefix_] = - stripAngleBrackets(lastParseResult_.getString()); + prefixMap_[activePrefix_] = lastParseResult_.getIri(); return true; } else { raise("Parsing PREFIX definition failed"); @@ -79,7 +79,7 @@ template bool TurtleParser::sparqlBase() { if (skip()) { if (iriref()) { - prefixMap_[""] = stripAngleBrackets(lastParseResult_.getString()); + prefixMap_[""] = lastParseResult_.getIri(); return true; } else { raise("Parsing BASE definition failed"); @@ -100,7 +100,7 @@ bool TurtleParser::triples() { } } else { if (blankNodePropertyList()) { - activeSubject_ = lastParseResult_.getString(); + activeSubject_ = lastParseResult_; predicateObjectList(); return true; } else { @@ -150,7 +150,8 @@ bool TurtleParser::predicateSpecialA() { if (auto [success, word] = tok_.template getNextToken(); success) { (void)word; - activePredicate_ = ""s; + activePredicate_ = TripleComponent::Iri::fromIriref( + ""); return true; } else { return false; @@ -161,7 +162,7 @@ bool TurtleParser::predicateSpecialA() { template bool TurtleParser::subject() { if (blankNode() || iri() || collection()) { - activeSubject_ = lastParseResult_.getString(); + activeSubject_ = lastParseResult_; return true; } else { return false; @@ -172,7 +173,7 @@ bool TurtleParser::subject() { template bool TurtleParser::predicate() { if (iri()) { - activePredicate_ = lastParseResult_.getString(); + activePredicate_ = lastParseResult_.getIri(); return true; } else { return false; @@ -208,8 +209,8 @@ bool TurtleParser::blankNodePropertyList() { return false; } // save subject and predicate - string savedSubject = activeSubject_; - string savedPredicate = activePredicate_; + auto savedSubject = activeSubject_; + auto savedPredicate = activePredicate_; // new triple with blank node as object string blank = createAnonNode(); // the following triples have the blank node as subject @@ -239,17 +240,18 @@ bool TurtleParser::collection() { triples_.resize(triples_.size() - objects.size()); // TODO Move such functionality into a general util. auto makeIri = [](std::string_view suffix) { - return absl::StrCat("<", RDF_PREFIX, suffix, ">"); + return TripleComponent::Iri::fromIriref( + absl::StrCat("<", RDF_PREFIX, suffix, ">")); }; - static const std::string nil = makeIri("nil"); - static const std::string first = makeIri("first"); - static const std::string rest = makeIri("rest"); + static const auto nil = TripleComponent{makeIri("nil")}; + static const auto first = makeIri("first"); + static const auto rest = makeIri("rest"); if (objects.empty()) { lastParseResult_ = nil; } else { // Create a new blank node for each collection element. - std::vector blankNodes; + std::vector blankNodes; blankNodes.reserve(objects.size()); for (size_t i = 0; i < objects.size(); ++i) { blankNodes.push_back(createAnonNode()); @@ -272,19 +274,15 @@ bool TurtleParser::collection() { // ____________________________________________________________________________ template -void TurtleParser::parseDoubleConstant(const std::string& input) { - size_t position; - - bool errorOccured = false; - TripleComponent result; - try { - // We cannot directly store this in `lastParseResult_` because this might - // overwrite `input`. - result = std::stod(input, &position); - } catch (const std::exception& e) { - errorOccured = true; +void TurtleParser::parseDoubleConstant(std::string_view input) { + double result; + // The functions used below cannot deal with leading redundant '+' signs. + if (input.starts_with('+')) { + input.remove_prefix(1); } - if (errorOccured || position != input.size()) { + auto [firstNonMatching, errorCode] = + absl::from_chars(input.data(), input.data() + input.size(), result); + if (firstNonMatching != input.end() || errorCode != std::errc{}) { auto errorMessage = absl::StrCat( "Value ", input, " could not be parsed as a floating point value"); raiseOrIgnoreTriple(errorMessage); @@ -294,20 +292,21 @@ void TurtleParser::parseDoubleConstant(const std::string& input) { // ____________________________________________________________________________ template -void TurtleParser::parseIntegerConstant(const std::string& input) { +void TurtleParser::parseIntegerConstant(std::string_view input) { if (integerOverflowBehavior() == TurtleParserIntegerOverflowBehavior::AllToDouble) { return parseDoubleConstant(input); } - size_t position = 0; - - bool errorOccured = false; - TripleComponent result; - try { - // We cannot directly store this in `lastParseResult_` because this might - // overwrite `input`. - result = std::stoll(input, &position); - } catch (const std::out_of_range&) { + int64_t result{0}; + // The functions used below cannot deal with leading redundant '+' signs. + if (input.starts_with('+')) { + input.remove_prefix(1); + } + // We cannot directly store this in `lastParseResult_` because this might + // overwrite `input`. + auto [firstNonMatching, errorCode] = + std::from_chars(input.data(), input.data() + input.size(), result); + if (errorCode == std::errc::result_out_of_range) { if (integerOverflowBehavior() == TurtleParserIntegerOverflowBehavior::OverflowingToDouble) { return parseDoubleConstant(input); @@ -319,10 +318,7 @@ void TurtleParser::parseIntegerConstant(const std::string& input) { "\"parser-integer-overflow-behavior\""); raiseOrIgnoreTriple(errorMessage); } - } catch (const std::invalid_argument& e) { - errorOccured = true; - } - if (errorOccured || position != input.size()) { + } else if (firstNonMatching != input.end()) { auto errorMessage = absl::StrCat( "Value ", input, " could not be parsed as an integer value"); raiseOrIgnoreTriple(errorMessage); @@ -375,18 +371,18 @@ bool TurtleParser::rdfLiteral() { if (!stringParse()) { return false; } - RdfEscaping::NormalizedRDFString literalString{ - lastParseResult_.getLiteral().normalizedLiteralContent()}; + auto previous = lastParseResult_.getLiteral(); if (langtag()) { - lastParseResult_ = - TripleComponent::Literal{literalString, lastParseResult_.getString()}; + previous.addLanguageTag(lastParseResult_.getString()); + lastParseResult_ = std::move(previous); return true; // TODO this allows spaces here since the ^^ is unique in the // sparql syntax. is this correct? } else if (skip() && check(iri())) { - const auto typeIri = std::move(lastParseResult_.getString()); - auto type = stripAngleBrackets(typeIri); - std::string strippedLiteral{stripDoubleQuotes(literalString.get())}; + auto typeIri = std::move(lastParseResult_.getIri()); + std::string_view type = asStringViewUnsafe(typeIri.getContent()); + std::string_view strippedLiteral = + asStringViewUnsafe(previous.getContent()); try { // TODO clean this up by moving the check for the types to a // separate module. @@ -406,12 +402,12 @@ bool TurtleParser::rdfLiteral() { lastParseResult_ = false; } else { LOG(DEBUG) - << literalString.get() - << " could not be parsed as a boolean object of type " << typeIri + << strippedLiteral + << " could not be parsed as a boolean object of type " << type << ". It is treated as a plain string literal without datatype " "instead" << std::endl; - lastParseResult_ = TripleComponent::Literal{literalString}; + lastParseResult_ = std::move(previous); } } else if (type == XSD_DECIMAL_TYPE || type == XSD_DOUBLE_TYPE || type == XSD_FLOAT_TYPE) { @@ -425,28 +421,28 @@ bool TurtleParser::rdfLiteral() { } else if (type == XSD_GYEAR_TYPE) { lastParseResult_ = DateOrLargeYear::parseGYear(strippedLiteral); } else { - lastParseResult_ = TripleComponent::Literal{ - literalString, absl::StrCat("^^", typeIri)}; + previous.addDatatype(typeIri); + lastParseResult_ = std::move(previous); } return true; } catch (const DateParseException&) { LOG(DEBUG) - << literalString.get() - << " could not be parsed as a date object of type " << typeIri + << strippedLiteral << " could not be parsed as a date object of type " + << type << ". It is treated as a plain string literal without datatype " "instead" << std::endl; - lastParseResult_ = TripleComponent::Literal{literalString}; + lastParseResult_ = std::move(previous); return true; } catch (const DateOutOfRangeException& ex) { LOG(DEBUG) - << literalString.get() + << strippedLiteral << " could not be parsed as a date object for the following reason: " << ex.what() << ". It is treated as a plain string literal without datatype " "instead" << std::endl; - lastParseResult_ = TripleComponent::Literal{literalString}; + lastParseResult_ = std::move(previous); return true; } catch (const std::exception& e) { raise(e.what()); @@ -514,8 +510,8 @@ bool TurtleParser::stringParse() { raise("Unterminated string literal"); } // also include the quotation marks in the word - lastParseResult_ = TripleComponent::Literal{ - RdfEscaping::normalizeRDFLiteral(view.substr(0, endPos + startPos)), ""}; + lastParseResult_ = TripleComponent::Literal::fromEscapedRdfLiteral( + view.substr(0, endPos + startPos)); tok_.remove_prefix(endPos + startPos); return true; } @@ -541,9 +537,8 @@ bool TurtleParser::prefixedName() { } parseTerminal(); } - lastParseResult_ = - '<' + expandPrefix(activePrefix_) + - RdfEscaping::unescapePrefixedIri(lastParseResult_.getString()) + '>'; + lastParseResult_ = TripleComponent::Iri::fromPrefixAndSuffix( + expandPrefix(activePrefix_), lastParseResult_.getString()); return true; } @@ -643,7 +638,7 @@ bool TurtleParser::iriref() { } else { tok_.remove_prefix(endPos + 1); lastParseResult_ = - RdfEscaping::unescapeIriref(view.substr(0, endPos + 1)); + TripleComponent::Iri::fromIriref(view.substr(0, endPos + 1)); return true; } } else { @@ -654,7 +649,7 @@ bool TurtleParser::iriref() { return false; } lastParseResult_ = - RdfEscaping::unescapeIriref(lastParseResult_.getString()); + TripleComponent::Iri::fromIriref(lastParseResult_.getString()); return true; } } diff --git a/src/parser/TurtleParser.h b/src/parser/TurtleParser.h index 2840db59cd..75c1e9fa6f 100644 --- a/src/parser/TurtleParser.h +++ b/src/parser/TurtleParser.h @@ -38,28 +38,14 @@ enum class TurtleParserIntegerOverflowBehavior { }; struct TurtleTriple { - std::string subject_; - std::string predicate_; + // TODO The subject can only be IRI or BlankNode. + TripleComponent subject_; + TripleComponent::Iri predicate_; TripleComponent object_; bool operator==(const TurtleTriple&) const = default; }; -inline std::string_view stripAngleBrackets(std::string_view input) { - AD_CONTRACT_CHECK(input.starts_with('<') && input.ends_with('>')); - input.remove_prefix(1); - input.remove_suffix(1); - return input; -} - -inline std::string_view stripDoubleQuotes(std::string_view input) { - AD_CONTRACT_CHECK(input.starts_with('"') && input.ends_with('"') && - input.size() >= 2); - input.remove_prefix(1); - input.remove_suffix(1); - return input; -} - // A base class for all the different turtle parsers. class TurtleParserBase { private: @@ -168,13 +154,13 @@ class TurtleParser : public TurtleParserBase { // Maps prefixes to their expanded form, initialized with the empty base // (i.e. the prefix ":" maps to the empty IRI). - ad_utility::HashMap prefixMap_{{"", ""}}; + ad_utility::HashMap prefixMap_{{{}, {}}}; // There are turtle constructs that reuse prefixes, subjects and predicates // so we have to save the last seen ones. std::string activePrefix_; - std::string activeSubject_; - std::string activePredicate_; + TripleComponent activeSubject_; + TripleComponent::Iri activePredicate_; size_t numBlankNodes_ = 0; bool currentTripleIgnoredBecauseOfInvalidLiteral_ = false; @@ -194,8 +180,8 @@ class TurtleParser : public TurtleParserBase { void clear() { lastParseResult_ = ""; - activeSubject_.clear(); - activePredicate_.clear(); + activeSubject_ = TripleComponent::Iri::fromIriref("<>"); + activePredicate_ = TripleComponent::Iri::fromIriref("<>"); activePrefix_.clear(); prefixMap_.clear(); @@ -282,8 +268,8 @@ class TurtleParser : public TurtleParserBase { bool doubleParse(); // Two helper functions for the actual conversion from strings to numbers. - void parseDoubleConstant(const std::string& input); - void parseIntegerConstant(const std::string& input); + void parseDoubleConstant(std::string_view input); + void parseIntegerConstant(std::string_view input); // This version only works if no escape sequences were used. bool pnameLnRelaxed(); @@ -337,7 +323,7 @@ class TurtleParser : public TurtleParserBase { // map a turtle prefix to its expanded form. Throws if the prefix was not // properly registered before - string expandPrefix(const string& prefix) { + TripleComponent::Iri expandPrefix(const std::string& prefix) { if (!prefixMap_.count(prefix)) { raise("Prefix " + prefix + " was not previously defined using a PREFIX or @prefix " diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 8ba0f12752..9176765d18 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -55,20 +55,18 @@ std::string Visitor::getOriginalInputForContext( // ___________________________________________________________________________ ExpressionPtr Visitor::processIriFunctionCall( - const std::string& iri, std::vector argList, + const TripleComponent::Iri& iri, std::vector argList, const antlr4::ParserRuleContext* ctx) { - std::string_view functionName = iri; + std::string_view functionName = asStringViewUnsafe(iri.getContent()); std::string_view prefixName; // Helper lambda that checks if `functionName` starts with the given prefix. - // If yes, remove the prefix and the final `>` from `functionName` and set + // If yes, remove the prefix from `functionName` and set // `prefixName` to the short name of the prefix; see `global/Constants.h`. auto checkPrefix = [&functionName, &prefixName]( std::pair prefix) { if (functionName.starts_with(prefix.second)) { prefixName = prefix.first; functionName.remove_prefix(prefix.second.size()); - AD_CONTRACT_CHECK(functionName.ends_with('>')); - functionName.remove_suffix(1); return true; } else { return false; @@ -123,7 +121,8 @@ ExpressionPtr Visitor::processIriFunctionCall( return sparqlExpression::makeTanExpression(std::move(argList[0])); } } - reportNotSupported(ctx, "Function \"" + iri + "\" is"); + reportNotSupported(ctx, + "Function \""s + iri.toStringRepresentation() + "\" is"); } void Visitor::addVisibleVariable(Variable var) { @@ -336,13 +335,6 @@ GraphPattern Visitor::visit(Parser::GroupGraphPatternContext* ctx) { } else { AD_CORRECTNESS_CHECK(ctx->groupGraphPatternSub()); auto [subOps, filters] = visit(ctx->groupGraphPatternSub()); - - if (subOps.empty()) { - reportError(ctx, - "QLever currently doesn't support empty GroupGraphPatterns " - "and WHERE clauses"); - } - pattern._graphPatterns = std::move(subOps); for (auto& filter : filters) { if (auto langFilterData = @@ -404,7 +396,8 @@ BasicGraphPattern Visitor::visit(Parser::TriplesBlockContext* ctx) { return graphTerm.visit([](const T& element) -> TripleComponent { if constexpr (std::is_same_v) { return element; - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v || + std::is_same_v) { return TurtleStringParser::parseTripleObject( element.toSparql()); } else { @@ -615,11 +608,11 @@ RdfEscaping::NormalizedRDFString Visitor::visit(Parser::StringContext* ctx) { } // ____________________________________________________________________________________ -string Visitor::visit(Parser::IriContext* ctx) { - // TODO return an IRI, not a std::string. +TripleComponent::Iri Visitor::visit(Parser::IriContext* ctx) { string langtag = ctx->PREFIX_LANGTAG() ? ctx->PREFIX_LANGTAG()->getText() : ""; - return langtag + visitAlternative(ctx->iriref(), ctx->prefixedName()); + return TripleComponent::Iri::fromIriref( + langtag + visitAlternative(ctx->iriref(), ctx->prefixedName())); } // ____________________________________________________________________________________ @@ -1280,7 +1273,8 @@ PropertyPath Visitor::visit(Parser::PathPrimaryContext* ctx) { // simple `return visit(...)`. Then the three cases which are not the // `special a` case can be merged into a `visitAlternative(...)`. if (ctx->iri()) { - return PropertyPath::fromIri(visit(ctx->iri())); + return PropertyPath::fromIri( + std::string{visit(ctx->iri()).toStringRepresentation()}); } else if (ctx->path()) { return visit(ctx->path()); } else if (ctx->pathNegatedPropertySet()) { @@ -1428,7 +1422,11 @@ GraphTerm Visitor::visit(Parser::VarOrIriContext* ctx) { // TODO If `visit` returns an `Iri` and `GraphTerm` can be // constructed from an `Iri`, this whole function becomes // `visitAlternative`. - return GraphTerm{Iri{visit(ctx->iri())}}; + // TODO If we unify the two IRI and Literal types (the ones from + // the parser and from the `TripleComponent`, then this becomes much + // simpler. + return GraphTerm{ + Iri{std::string{visit(ctx->iri()).toStringRepresentation()}}}; } } @@ -1437,7 +1435,8 @@ GraphTerm Visitor::visit(Parser::GraphTermContext* ctx) { if (ctx->blankNode()) { return visit(ctx->blankNode()); } else if (ctx->iri()) { - return Iri{visit(ctx->iri())}; + // TODO Unify. + return Iri{std::string{visit(ctx->iri()).toStringRepresentation()}}; } else if (ctx->NIL()) { return Iri{""}; } else { @@ -1669,9 +1668,9 @@ ExpressionPtr Visitor::visit(Parser::PrimaryExpressionContext* ctx) { if (ctx->rdfLiteral()) { auto tripleComponent = TurtleStringParser::parseTripleObject( visit(ctx->rdfLiteral())); - if (tripleComponent.isString()) { - return make_unique(tripleComponent.getString()); - } else if (tripleComponent.isLiteral()) { + AD_CORRECTNESS_CHECK(!tripleComponent.isIri() && + !tripleComponent.isString()); + if (tripleComponent.isLiteral()) { return make_unique(tripleComponent.getLiteral()); } else { return make_unique( @@ -1963,7 +1962,8 @@ std::string Visitor::visit(Parser::RdfLiteralContext* ctx) { if (ctx->LANGTAG()) { ret += ctx->LANGTAG()->getText(); } else if (ctx->iri()) { - ret += ("^^" + visit(ctx->iri())); + // TODO Also unify the two Literal classes... + ret += ("^^" + std::string{visit(ctx->iri()).toStringRepresentation()}); } return ret; } diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h index 0f79dccdb8..0b00af4aeb 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.h +++ b/src/parser/sparqlParser/SparqlQleverVisitor.h @@ -448,7 +448,7 @@ class SparqlQleverVisitor { [[nodiscard]] static RdfEscaping::NormalizedRDFString visit( Parser::StringContext* ctx); - [[nodiscard]] string visit(Parser::IriContext* ctx); + [[nodiscard]] TripleComponent::Iri visit(Parser::IriContext* ctx); [[nodiscard]] static string visit(Parser::IrirefContext* ctx); @@ -485,7 +485,7 @@ class SparqlQleverVisitor { // Process an IRI function call. This is used in both `visitFunctionCall` and // `visitIriOrFunction`. [[nodiscard]] static ExpressionPtr processIriFunctionCall( - const std::string& iri, std::vector argList, + const TripleComponent::Iri& iri, std::vector argList, const antlr4::ParserRuleContext*); void addVisibleVariable(Variable var); diff --git a/src/util/AsioHelpers.h b/src/util/AsioHelpers.h index b946d6f168..826a01dbe0 100644 --- a/src/util/AsioHelpers.h +++ b/src/util/AsioHelpers.h @@ -112,28 +112,37 @@ auto runFunctionOnExecutor(Executor executor, Function function, /// on an awaitable object that doesn't do this on its own. It's always better /// to integrate cancellation checks right into the awaitable itself, but /// sometimes this doesn't work because it's part of a library or boost asio -/// itself. Once `timerRunning` resolves to `false` (or the awaitable is -/// finished, whatever happens first), the cancellation checks are stopped. -/// This needs to be called on a strand or in a single-threaded environment, -/// otherwise this may lead to race conditions, due to issues with boost asio. +/// itself. Once the value for `cancelCallback` is set and called by the caller +/// of this function (or the awaitable is finished, whatever happens first), the +/// cancellation checks are stopped. This needs to be called on a strand or in a +/// single-threaded environment, otherwise this will lead to race conditions, +/// due to issues with boost asio. template inline net::awaitable interruptible( net::awaitable awaitable, ad_utility::SharedCancellationHandle handle, - std::shared_ptr timerRunning = - std::make_shared(true), + std::promise> cancelCallback, ad_utility::source_location loc = ad_utility::source_location::current()) { using namespace net::experimental::awaitable_operators; + auto timer = + std::make_shared(co_await net::this_coro::executor); + auto running = std::make_shared(true); + auto cancelTimer = [timer, running]() mutable { + auto strand = timer->get_executor(); + running->clear(); + net::dispatch(strand, [timer = std::move(timer)]() { timer->cancel(); }); + }; + // Provide callback to outer world in order to cancel the timer pre-emptively. + cancelCallback.set_value(cancelTimer); - auto timerLoop = [](std::shared_ptr timerRunning, + auto timerLoop = [](std::shared_ptr timer, + std::shared_ptr running, ad_utility::SharedCancellationHandle handle, ad_utility::source_location loc) -> net::awaitable { constexpr auto timeout = DESIRED_CANCELLATION_CHECK_INTERVAL / 2; - absl::Cleanup cleanup{[&timerRunning]() { timerRunning->clear(); }}; - net::steady_timer timer{co_await net::this_coro::executor}; - while (timerRunning->test()) { + while (running->test()) { handle->throwIfCancelled(loc); - timer.expires_after(timeout); - auto [ec] = co_await timer.async_wait(net::as_tuple(net::deferred)); + timer->expires_after(timeout); + auto [ec] = co_await timer->async_wait(net::as_tuple(net::deferred)); if (ec) { AD_CORRECTNESS_CHECK(ec == net::error::operation_aborted); break; @@ -141,24 +150,30 @@ inline net::awaitable interruptible( } }; auto wrapper = [](net::awaitable awaitable, - std::shared_ptr timerRunning) mutable - -> net::awaitable { - absl::Cleanup cleanup{ - [timerRunning = std::move(timerRunning)]() { timerRunning->clear(); }}; + auto cancelTimer) mutable -> net::awaitable { + absl::Cleanup cleanup{std::move(cancelTimer)}; co_return co_await std::move(awaitable); }; - auto timerClone = timerRunning; try { - co_return co_await ( - timerLoop(std::move(timerClone), std::move(handle), std::move(loc)) && - wrapper(std::move(awaitable), std::move(timerRunning))); + co_return co_await (timerLoop(std::move(timer), std::move(running), + std::move(handle), std::move(loc)) && + wrapper(std::move(awaitable), std::move(cancelTimer))); } catch (const net::multiple_exceptions& e) { // Ignore other exceptions std::rethrow_exception(e.first_exception()); } } +/// Overload without an explicit promise object passed for convenience. +template +inline net::awaitable interruptible( + net::awaitable awaitable, ad_utility::SharedCancellationHandle handle, + ad_utility::source_location loc = ad_utility::source_location::current()) { + return interruptible(std::move(awaitable), std::move(handle), + std::promise>{}, std::move(loc)); +} + /// Helper function to wait for an awaitable that is supposed to be run on an io /// context. template @@ -167,14 +182,14 @@ inline T runAndWaitForAwaitable(net::awaitable awaitable, auto future = net::co_spawn(net::make_strand(ioContext), std::move(awaitable), net::use_future); - while (true) { - auto futureStatus = future.wait_for(std::chrono::milliseconds{0}); - if (futureStatus == std::future_status::ready) { - break; - } + std::future_status futureStatus; + do { + bool computedSomething = ioContext.poll_one(); + // 5ms is an arbitrarily chosen interval to not overload the CPU. + std::chrono::milliseconds timeout{computedSomething ? 0 : 5}; + futureStatus = future.wait_for(timeout); AD_CORRECTNESS_CHECK(futureStatus != std::future_status::deferred); - ioContext.poll_one(); - } + } while (futureStatus != std::future_status::ready); return future.get(); } } // namespace ad_utility diff --git a/src/util/CancellationHandle.cpp b/src/util/CancellationHandle.cpp index f17e2b5c26..7f14dab010 100644 --- a/src/util/CancellationHandle.cpp +++ b/src/util/CancellationHandle.cpp @@ -43,6 +43,10 @@ void CancellationHandle::startWatchDogInternal() requires WatchDogEnabled startTimeoutWindow_ = steady_clock::now(); cancellationState_.compare_exchange_strong(state, CHECK_WINDOW_MISSED, std::memory_order_relaxed); + } else if (detail::isCancelled(state)) { + // No need to keep the watchdog running if the handle was cancelled + // already + break; } } while (!watchDogState_.conditionVariable_.wait_for( lock, DESIRED_CANCELLATION_CHECK_INTERVAL, diff --git a/src/util/CancellationHandle.h b/src/util/CancellationHandle.h index a7bcacec8e..c928772eca 100644 --- a/src/util/CancellationHandle.h +++ b/src/util/CancellationHandle.h @@ -290,6 +290,7 @@ class CancellationHandle { FRIEND_TEST(CancellationHandle, verifyIsCancelledDoesPleaseWatchDog); FRIEND_TEST(CancellationHandle, verifyPleaseWatchDogDoesNotAcceptInvalidState); + FRIEND_TEST(CancellationHandle, verifyWatchDogEndsEarlyIfCancelled); }; using SharedCancellationHandle = std::shared_ptr>; diff --git a/src/util/Conversions.cpp b/src/util/Conversions.cpp index 0ea52a8d0c..1d56040aac 100644 --- a/src/util/Conversions.cpp +++ b/src/util/Conversions.cpp @@ -22,14 +22,21 @@ namespace ad_utility { // _________________________________________________________ -string convertLangtagToEntityUri(const string& tag) { - return makeInternalIri("@", tag); +triple_component::Iri convertLangtagToEntityUri(const string& tag) { + return triple_component::Iri::fromIriref(makeInternalIri("@", tag)); } // _________________________________________________________ std::string convertToLanguageTaggedPredicate(const string& pred, const string& langtag) { - return '@' + langtag + '@' + pred; + return absl::StrCat("@", langtag, "@", pred); +} + +// _________________________________________________________ +triple_component::Iri convertToLanguageTaggedPredicate( + const triple_component::Iri& pred, const std::string& langtag) { + return triple_component::Iri::fromIriref(absl::StrCat( + "@", langtag, "@<", asStringViewUnsafe(pred.getContent()), ">")); } } // namespace ad_utility diff --git a/src/util/Conversions.h b/src/util/Conversions.h index 9adf1aa435..c49f877c41 100644 --- a/src/util/Conversions.h +++ b/src/util/Conversions.h @@ -8,11 +8,20 @@ #include #include +#include "parser/LiteralOrIri.h" +#include "util/StringUtils.h" + namespace ad_utility { +static constexpr std::string_view languageTaggedPredicatePrefix = "@"; //! Convert a language tag like "@en" to the corresponding entity uri -//! for the efficient language filter -std::string convertLangtagToEntityUri(const std::string& tag); +//! for the efficient language filter. +// TODO The overload that takes and returns `std::string` can be +// removed as soon as we also store strongly-typed IRIs in the predicates of the +// `SparqlTriple` class. +triple_component::Iri convertLangtagToEntityUri(const std::string& tag); std::string convertToLanguageTaggedPredicate(const std::string& pred, const std::string& langtag); +triple_component::Iri convertToLanguageTaggedPredicate( + const triple_component::Iri& pred, const std::string& langtag); } // namespace ad_utility diff --git a/src/util/FsstCompressor.h b/src/util/FsstCompressor.h index 672cc01534..1e673f33ae 100644 --- a/src/util/FsstCompressor.h +++ b/src/util/FsstCompressor.h @@ -15,18 +15,27 @@ #include "util/Exception.h" #include "util/Log.h" +#include "util/TypeTraits.h" + +namespace detail { +// A helper function to cast `char*` to `unsigned char*` and `const char*` to +// `const unsigned char*` which is used below because FSST always works on +// unsigned character types. Note that this is one of the few cases where a +// `reinterpret_cast` is safe. +constexpr auto castToUnsignedPtr = + [] T>(T ptr) { + using Res = std::conditional_t, + const unsigned char*, unsigned char*>; + return reinterpret_cast(ptr); + }; +} // namespace detail // A simple C++ wrapper around the C-API of the `FSST` library. It consists of // two types, a thredsafe `FsstDecoder` that can be used to perform // decompression, and a single-threaded `FsstEncoder` for compression. -// TODO There are a lot of `const_cast`s that look rather fishy because -// they cast away constness. `FSST` currently has many function parameters that -// are logically const, but are not marked as const. I have opened a PR for FSST -// to make them const, as soon as this is merged, get rid of all the -// `const_cast`s and `mutable`s in this file. class FsstDecoder { private: - mutable fsst_decoder_t decoder_; + fsst_decoder_t decoder_; public: // The default constructor does lead to an invalid decoder, but is required @@ -41,11 +50,10 @@ class FsstDecoder { // Decompress a single string. std::string decompress(std::string_view str) const { std::string output; + auto cast = detail::castToUnsignedPtr; output.resize(8 * str.size()); - size_t size = fsst_decompress( - &decoder_, str.size(), - reinterpret_cast(const_cast(str.data())), - output.size(), reinterpret_cast(output.data())); + size_t size = fsst_decompress(&decoder_, str.size(), cast(str.data()), + output.size(), cast(output.data())); // FSST compresses at most by a factor of 8. AD_CORRECTNESS_CHECK(size <= output.size()); output.resize(size); @@ -91,8 +99,10 @@ class FsstRepeatedDecoder { return result; } // Allow this type to be trivially serializable, - friend std::true_type allowTrivialSerialization( - std::same_as auto, auto); + [[maybe_unused]] friend std::true_type allowTrivialSerialization( + std::same_as auto, auto) { + return {}; + } }; // The encoder class. @@ -105,6 +115,7 @@ class FsstEncoder { }; using Encoder = std::unique_ptr; Encoder encoder_; + static constexpr auto cast = detail::castToUnsignedPtr; public: // Create an `FsstEncoder`. The given `strings` are used to create the @@ -118,13 +129,11 @@ class FsstEncoder { std::string output; output.resize(7 + 2 * len); unsigned char* dummyOutput; - auto data = - reinterpret_cast(const_cast(word.data())); + auto data = cast(word.data()); size_t outputLen = 0; size_t numCompressed = fsst_compress(encoder_.get(), 1, &len, &data, output.size(), - reinterpret_cast(output.data()), - &outputLen, &dummyOutput); + cast(output.data()), &outputLen, &dummyOutput); AD_CORRECTNESS_CHECK(numCompressed == 1); output.resize(outputLen); return output; @@ -155,13 +164,12 @@ class FsstEncoder { static std::conditional_t makeEncoder( const auto& strings) { std::vector lengths; - std::vector pointers; + std::vector pointers; [[maybe_unused]] size_t totalSize = 0; for (const auto& string : strings) { lengths.push_back(string.size()); totalSize += string.size(); - pointers.push_back( - reinterpret_cast(const_cast(string.data()))); + pointers.push_back(cast(string.data())); } auto encoder = fsst_create(strings.size(), lengths.data(), pointers.data(), 0); @@ -179,8 +187,7 @@ class FsstEncoder { while (true) { size_t numCompressed = fsst_compress( encoder, strings.size(), lengths.data(), pointers.data(), - output.size(), reinterpret_cast(output.data()), - outputLengths.data(), + output.size(), cast(output.data()), outputLengths.data(), reinterpret_cast(outputPtrs.data())); // Typically one iteration should suffice, we repeat in a loop with // exponential growth of the output buffer. diff --git a/src/util/Serializer/Serializer.h b/src/util/Serializer/Serializer.h index 2a3c219678..b66a49d94b 100644 --- a/src/util/Serializer/Serializer.h +++ b/src/util/Serializer/Serializer.h @@ -234,7 +234,7 @@ void serialize(S& serializer, T&& t) { /// serialized. template requires std::is_arithmetic_v> -std::true_type allowTrivialSerialization(T, auto) { +[[maybe_unused]] std::true_type allowTrivialSerialization(T, auto) { return {}; } diff --git a/src/util/StringUtils.h b/src/util/StringUtils.h index bde650fa4c..7c0a49e2bd 100644 --- a/src/util/StringUtils.h +++ b/src/util/StringUtils.h @@ -310,8 +310,8 @@ std::string insertThousandSeparator(const std::string_view str, // these overloads are missing in the STL // TODO they can be constexpr once the compiler completely supports C++20 template -inline std::basic_string operator+(const std::basic_string& a, - std::basic_string_view b) { +inline std::basic_string strCatImpl(const std::basic_string_view& a, + std::basic_string_view b) { std::basic_string res; res.reserve(a.size() + b.size()); res += a; @@ -319,10 +319,18 @@ inline std::basic_string operator+(const std::basic_string& a, return res; } +template +inline std::basic_string operator+(const std::basic_string& a, + std::basic_string_view b) { + return strCatImpl(std::basic_string_view{a}, b); +} + +template +inline std::basic_string operator+(const std::basic_string_view& a, + std::basic_string b) { + return strCatImpl(a, std::basic_string_view{b}); +} + inline std::string operator+(char c, std::string_view b) { - std::string res; - res.reserve(1 + b.size()); - res += c; - res += b; - return res; + return strCatImpl(std::string_view(&c, 1), b); } diff --git a/src/util/http/HttpClient.cpp b/src/util/http/HttpClient.cpp index 3acef47af9..1f7429255d 100644 --- a/src/util/http/HttpClient.cpp +++ b/src/util/http/HttpClient.cpp @@ -107,9 +107,7 @@ HttpClientImpl::sendRequest( ad_utility::source_location loc = ad_utility::source_location::current()) -> T { return ad_utility::runAndWaitForAwaitable( - ad_utility::interruptible(std::move(awaitable), handle, - std::make_shared(true), - std::move(loc)), + ad_utility::interruptible(std::move(awaitable), handle, std::move(loc)), ioContext_); }; diff --git a/test/AsioHelpersTest.cpp b/test/AsioHelpersTest.cpp index 7bc7d0acb9..0537b69e6d 100644 --- a/test/AsioHelpersTest.cpp +++ b/test/AsioHelpersTest.cpp @@ -160,15 +160,21 @@ ASYNC_TEST(AsioHelpers, verifyInterruptibleDoesPropagateError) { } // _________________________________________________________________________ -ASYNC_TEST(AsioHelpers, verifyNoCheckIsPerformedWhenTimerIsCancelledEarly) { +ASYNC_TEST(AsioHelpers, verifyEarlyCancellationOfCallbackDoesCancelEarly) { ad_utility::SharedCancellationHandle handle = std::make_shared>(); - handle->cancel(ad_utility::CancellationState::MANUAL); auto sleeperTask = []() -> net::awaitable { co_return; }(); + std::promise> promise{}; + auto future = promise.get_future(); + ad_utility::JThread cancelTask{[&future, &handle]() { + future.get()(); + // Make sure first iteration is not affected + std::this_thread::sleep_for(std::chrono::milliseconds{5}); + handle->cancel(ad_utility::CancellationState::MANUAL); + }}; EXPECT_NO_THROW(co_await ad_utility::interruptible( - std::move(sleeperTask), handle, - std::make_shared(false))); + std::move(sleeperTask), handle, std::move(promise))); } // _________________________________________________________________________ diff --git a/test/CancellationHandleTest.cpp b/test/CancellationHandleTest.cpp index d7643af7a8..3515165e3f 100644 --- a/test/CancellationHandleTest.cpp +++ b/test/CancellationHandleTest.cpp @@ -439,6 +439,24 @@ TEST(CancellationHandle, verifyIsCancelledDoesPleaseWatchDog) { // _____________________________________________________________________________ +TEST(CancellationHandle, verifyWatchDogEndsEarlyIfCancelled) { + CancellationHandle handle; + handle.cancel(MANUAL); + + handle.startWatchDog(); + // Wait for Watchdog to start + std::this_thread::sleep_for(1ms); + + handle.cancellationState_ = WAITING_FOR_CHECK; + + // Wait for one watchdog cycle + tolerance + std::this_thread::sleep_for(DESIRED_CANCELLATION_CHECK_INTERVAL + 1ms); + // If the watchdog were running it would've set this to CHECK_WINDOW_MISSED + EXPECT_EQ(handle.cancellationState_, WAITING_FOR_CHECK); +} + +// _____________________________________________________________________________ + TEST(CancellationHandle, expectDisabledHandleIsAlwaysFalse) { CancellationHandle handle; diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index 6979395d08..cbe51171b5 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -8,6 +8,7 @@ #include "./util/GTestHelpers.h" #include "./util/IdTableHelpers.h" +#include "./util/TripleComponentTestHelpers.h" #include "engine/GroupBy.h" #include "engine/IndexScan.h" #include "engine/Join.h" @@ -377,7 +378,7 @@ struct GroupByOptimizations : ::testing::Test { makeExecutionTree(qec, Permutation::Enum::POS, xyzTriple); Tree xScan = makeExecutionTree( qec, Permutation::Enum::PSO, - SparqlTriple{{""}, {"