diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index d0efe2c515..9d03073613 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -66,7 +66,7 @@ jobs: conan install .. -pr:b=../conanprofiles/clang-16-macos -pr:h=../conanprofiles/clang-16-macos -of=. --build=missing; - name: Configure CMake # For std::ranges::join_view we need the -fexperimental-library flag on libc++16, which on Mac requires to manually tinker with the linking flags. - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.build-type}} -DCMAKE_TOOLCHAIN_FILE="$(pwd)/build/conan_toolchain.cmake" -DUSE_PARALLEL=true -DRUN_EXPENSIVE_TESTS=false -DENABLE_EXPENSIVE_CHECKS=true -DCMAKE_CXX_COMPILER=clang++ -DADDITIONAL_COMPILER_FLAGS="-fexperimental-library" -DADDITIONAL_LINKER_FLAGS="-L$(brew --prefix llvm)/lib/c++" + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.build-type}} -DCMAKE_TOOLCHAIN_FILE="$(pwd)/build/conan_toolchain.cmake" -DUSE_PARALLEL=true -DRUN_EXPENSIVE_TESTS=false -DENABLE_EXPENSIVE_CHECKS=true -DCMAKE_CXX_COMPILER=clang++ -DADDITIONAL_COMPILER_FLAGS="-fexperimental-library" -D_NO_TIMING_TESTS=ON -DADDITIONAL_LINKER_FLAGS="-L$(brew --prefix llvm)/lib/c++" - name: Build # Build your program with the given configuration diff --git a/CMakeLists.txt b/CMakeLists.txt index 379816c78b..45f338ade8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,10 +7,10 @@ set(CMAKE_C_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) -if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) +if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) # will not take effect without FORCE set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR} CACHE PATH "Install top-level directory" FORCE) -endif() +endif () # Boost::ASIO currently seems to have a bug when multiple coroutine streams are # concurrently in flight because there were multiple calls to `co_spawn`. @@ -24,20 +24,20 @@ add_definitions("-DBOOST_ASIO_DISABLE_AWAITABLE_FRAME_RECYCLING") # Coroutines require an additional compiler flag that is called differently # on clang and g++ -if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "11.0.0") - MESSAGE(FATAL_ERROR "G++ versions older than 11.0 are not supported by QLever") - else() - add_compile_options(-fcoroutines) - endif() - -elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "16.0.0") +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "11.0.0") + MESSAGE(FATAL_ERROR "G++ versions older than 11.0 are not supported by QLever") + else () + add_compile_options(-fcoroutines) + endif () + +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "16.0.0") MESSAGE(FATAL_ERROR "Clang++ versions older than 16.0 are not supported by QLever") - endif() -else() + endif () +else () MESSAGE(FATAL_ERROR "QLever currently only supports the G++ or LLVM-Clang++ compilers. Found ${CMAKE_CXX_COMPILER_ID}") -endif() +endif () ## Build targets for address sanitizer # AddressSanitize @@ -70,7 +70,7 @@ include(FetchContent) FetchContent_Declare( googletest GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG f8d7d77c06936315286eb55f8de22cd23c188571 # release-1.14.0 + GIT_TAG f8d7d77c06936315286eb55f8de22cd23c188571 # release-1.14.0 ) ################################ @@ -87,7 +87,7 @@ FetchContent_GetProperties(nlohmann-json) if (NOT nlohmann-json_POPULATED) FetchContent_Populate(nlohmann-json) include_directories(SYSTEM ${nlohmann-json_SOURCE_DIR}/single_include) -endif() +endif () ############################### @@ -96,7 +96,7 @@ endif() FetchContent_Declare( antlr GIT_REPOSITORY https://github.com/antlr/antlr4.git - GIT_TAG 9239e6ff444420516b44b7621e8dc7691fcf0e16 + GIT_TAG 9239e6ff444420516b44b7621e8dc7691fcf0e16 ) # From ANTLR we actually don't want the toplevel directory (which doesn't @@ -108,10 +108,10 @@ FetchContent_GetProperties(antlr) if (NOT antlr_POPULATED) FetchContent_Populate(antlr) set(ANTLR_BUILD_CPP_TESTS OFF CACHE BOOL "don't try to build googletest twice") - add_subdirectory(${antlr_SOURCE_DIR}/runtime/Cpp EXCLUDE_FROM_ALL ) + add_subdirectory(${antlr_SOURCE_DIR}/runtime/Cpp EXCLUDE_FROM_ALL) target_compile_options(antlr4_static PRIVATE -Wno-all -Wno-extra -Wno-unqualified-std-cast-call -Wno-error -Wno-deprecated-declarations) - include_directories(SYSTEM ${antlr_SOURCE_DIR}/runtime/Cpp/runtime/src ) -endif() + include_directories(SYSTEM ${antlr_SOURCE_DIR}/runtime/Cpp/runtime/src) +endif () ################################ # Threading @@ -129,38 +129,37 @@ find_package(ICU 60 REQUIRED COMPONENTS uc i18n) find_package(jemalloc QUIET) if (TARGET jemalloc::jemalloc) - MESSAGE(STATUS "Use jemalloc that was installed via conan") - link_libraries(jemalloc::jemalloc) + MESSAGE(STATUS "Use jemalloc that was installed via conan") + link_libraries(jemalloc::jemalloc) elseif (${JEMALLOC_MANUALLY_INSTALLED}) - link_libraries(jemalloc) -else() - find_package(PkgConfig) - pkg_check_modules (JEMALLOC jemalloc) - - pkg_search_module(JEMALLOC jemalloc) - if (${JEMALLOC_FOUND}) - include_directories(${JEMALLOC_INCLUDE_DIRS}) - link_libraries(${JEMALLOC_LIBRARIES}) - else () - message(WARNING "Jemalloc could not be found via + link_libraries(jemalloc) +else () + find_package(PkgConfig) + pkg_check_modules(JEMALLOC jemalloc) + + pkg_search_module(JEMALLOC jemalloc) + if (${JEMALLOC_FOUND}) + include_directories(${JEMALLOC_INCLUDE_DIRS}) + link_libraries(${JEMALLOC_LIBRARIES}) + else () + message(WARNING "Jemalloc could not be found via pkg-config. If you are sure that you have installed jemalloc on your system (e.g. via `apt install libjemalloc-dev` on Ubuntu), you might try rerunning cmake with `-DJEMALLOC_MANUALLY_INSTALLED=True`. This is currently necessary on Ubuntu 18.04, where pkg-config does not find jemalloc. Continuing without jemalloc, this will impact the performance, most notably of the IndexBuilder") - endif() -endif() + endif () +endif () ### ZSTD find_package(ZSTD QUIET) if (TARGET zstd::libzstd_static) MESSAGE(STATUS "Use zstd that was installed via conan") link_libraries(zstd::libzstd_static) -else() -link_libraries(zstd) -endif() - +else () + link_libraries(zstd) +endif () ###################################### @@ -182,15 +181,15 @@ find_package(OpenSSL REQUIRED) # function `qlever_target_link_libraries` for all libraries and executables. It # is a drop-in replacement for `target_link_libraries` that additionally links # against the common libraries. -function (qlever_target_link_libraries target) +function(qlever_target_link_libraries target) target_link_libraries(${target} ${ARGN} absl::flat_hash_map - absl::flat_hash_set absl::strings absl::str_format ICU::uc - ICU::i18n OpenSSL::SSL OpenSSL::Crypto GTest::gtest GTest::gmock stxxl fsst) + absl::flat_hash_set absl::strings absl::str_format ICU::uc + ICU::i18n OpenSSL::SSL OpenSSL::Crypto GTest::gtest GTest::gmock stxxl fsst) # memorySize is a utility library for defining memory sizes. if (NOT ${target} STREQUAL "memorySize") target_link_libraries(${target} memorySize) - endif() + endif () endfunction() @@ -207,7 +206,6 @@ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ADDITIONAL_LINKER_FLAGS} set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ADDITIONAL_LINKER_FLAGS}") - if (${PERFTOOLS_PROFILER}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lprofiler") message(STATUS "Adding -lprofiler (make sure your have google-perftools installed.)") @@ -229,9 +227,9 @@ set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") # CTRE, Compile-Time-Regular-Expressions ################################ FetchContent_Declare( - ctre - GIT_REPOSITORY https://github.com/hanickadot/compile-time-regular-expressions.git - GIT_TAG b3d7788b559e34d985c8530c3e0e7260b67505a6 # v3.8.1 + ctre + GIT_REPOSITORY https://github.com/hanickadot/compile-time-regular-expressions.git + GIT_TAG b3d7788b559e34d985c8530c3e0e7260b67505a6 # v3.8.1 ) ################################ @@ -254,29 +252,34 @@ if (USE_PARALLEL) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") add_definitions("-D_PARALLEL_SORT") endif () -endif() +endif () + +OPTION(_NO_TIMING_TESTS "Disable timing tests on platforms where `sleep` is unreliable" OFF) +if (_NO_TIMING_TESTS) + add_definitions("-D_QLEVER_NO_TIMING_TESTS") +endif () if (USE_TREE_BASED_CACHE) add_definitions("-D_QLEVER_USE_TREE_BASED_CACHE") -endif() +endif () if (RUN_EXPENSIVE_TESTS) message(STATUS "Running expensive unit tests. This is only recommended in release builds") add_definitions("-DQLEVER_RUN_EXPENSIVE_TESTS") -endif() +endif () if (ENABLE_EXPENSIVE_CHECKS) message(STATUS "Enabling checks that potentially have a significant runtime overhead") add_definitions("-DAD_ENABLE_EXPENSIVE_CHECKS") -endif() +endif () set(QUERY_CANCELLATION_MODE "ENABLED" CACHE STRING "Option to allow disabling cancellation checks partially or completely to reduce the overhead of this mechanism during query computation.") # Hint for cmake gui, but not actually enforced set_property(CACHE QUERY_CANCELLATION_MODE PROPERTY STRINGS "ENABLED" "NO_WATCH_DOG" "DISABLED") # So enforce this ourselves -if(QUERY_CANCELLATION_MODE AND NOT QUERY_CANCELLATION_MODE MATCHES "ENABLED|NO_WATCH_DOG|DISABLED") +if (QUERY_CANCELLATION_MODE AND NOT QUERY_CANCELLATION_MODE MATCHES "ENABLED|NO_WATCH_DOG|DISABLED") message(FATAL_ERROR "Invalid value for QUERY_CANCELLATION_MODE '${QUERY_CANCELLATION_MODE}'. Please remove the option entirely or change it to ENABLED, NO_WATCH_DOG or DISABLED.") -endif() +endif () add_definitions("-DQUERY_CANCELLATION_MODE=${QUERY_CANCELLATION_MODE}") ################################ @@ -322,9 +325,9 @@ FetchContent_MakeAvailable(googletest ctre abseil re2 stxxl fsst) # Disable some warnings in RE2, STXXL, and GTEST target_compile_options(re2 PRIVATE -Wno-unused-parameter) target_compile_options(stxxl PRIVATE -Wno-deprecated-declarations) -if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - target_compile_options(gtest PRIVATE -Wno-maybe-uninitialized) -endif() +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + target_compile_options(gtest PRIVATE -Wno-maybe-uninitialized) +endif () include_directories(${ctre_SOURCE_DIR}/single-header) target_compile_options(fsst PRIVATE -Wno-extra -Wno-all -Wno-error) target_compile_options(fsst12 PRIVATE -Wno-extra -Wno-all -Wno-error) @@ -353,13 +356,13 @@ if (NOT DONT_UPDATE_COMPILATION_INFO) # The first output which is never created is necessary s.t. the command is never cached and # always rerun. add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/FileThatNeverExists.cpp" - "${CMAKE_CURRENT_BINARY_DIR}/CompilationInfo.cpp" - COMMAND cmake -P ${CMAKE_CURRENT_SOURCE_DIR}/CompilationInfo.cmake) -else() + "${CMAKE_CURRENT_BINARY_DIR}/CompilationInfo.cpp" + COMMAND cmake -P ${CMAKE_CURRENT_SOURCE_DIR}/CompilationInfo.cmake) +else () add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/CompilationInfo.cpp" COMMAND cmake -P ${CMAKE_CURRENT_SOURCE_DIR}/CompilationInfo.cmake) -endif() +endif () set(LOG_LEVEL_FATAL FATAL) set(LOG_LEVEL_ERROR ERROR) @@ -370,11 +373,11 @@ set(LOG_LEVEL_TIMING TIMING) set(LOG_LEVEL_TRACE TRACE) -if(CMAKE_BUILD_TYPE MATCHES DEBUG) - set(LOGLEVEL DEBUG CACHE STRING "The loglevel") -else() - set(LOGLEVEL INFO CACHE STRING "The loglevel") -endif() +if (CMAKE_BUILD_TYPE MATCHES DEBUG) + set(LOGLEVEL DEBUG CACHE STRING "The loglevel") +else () + set(LOGLEVEL INFO CACHE STRING "The loglevel") +endif () set_property(CACHE LOGLEVEL PROPERTY STRINGS FATAL ERROR WARN INFO DEBUG TIMING TRACE) add_definitions(-DLOGLEVEL=${LOG_LEVEL_${LOGLEVEL}}) @@ -382,9 +385,9 @@ add_definitions(-DLOGLEVEL=${LOG_LEVEL_${LOGLEVEL}}) ################################################## # Warnings about incorrect combination of CMake variables -if(LOGLEVEL MATCHES "FATAL|ERROR" AND QUERY_CANCELLATION_MODE EQUAL "ENABLED") +if (LOGLEVEL MATCHES "FATAL|ERROR" AND QUERY_CANCELLATION_MODE EQUAL "ENABLED") message(WARNING "Log level is not printing logs with level WARN, which is necessary when QUERY_CANCELLATION_MODE=ENABLED for it to work properly") -endif() +endif () ################################################## # Precompiled headers @@ -411,7 +414,7 @@ add_executable(IndexBuilderMain src/index/IndexBuilderMain.cpp) qlever_target_link_libraries(IndexBuilderMain index ${CMAKE_THREAD_LIBS_INIT} Boost::program_options) add_executable(ServerMain src/ServerMain.cpp) -qlever_target_link_libraries (ServerMain engine ${CMAKE_THREAD_LIBS_INIT} Boost::program_options) +qlever_target_link_libraries(ServerMain engine ${CMAKE_THREAD_LIBS_INIT} Boost::program_options) target_precompile_headers(ServerMain REUSE_FROM engine) add_executable(VocabularyMergerMain src/VocabularyMergerMain.cpp) diff --git a/Dockerfile b/Dockerfile index 611c014507..b2d2945de8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ WORKDIR /app/ ENV DEBIAN_FRONTEND=noninteractive WORKDIR /app/build/ -RUN cmake -DCMAKE_BUILD_TYPE=Release -DLOGLEVEL=INFO -DUSE_PARALLEL=true -GNinja .. && ninja +RUN cmake -DCMAKE_BUILD_TYPE=Release -DLOGLEVEL=INFO -DUSE_PARALLEL=true -D_NO_TIMING_TESTS=ON -GNinja .. && ninja RUN ctest --rerun-failed --output-on-failure FROM base as runtime diff --git a/benchmark/GroupByHashMapBenchmark.cpp b/benchmark/GroupByHashMapBenchmark.cpp index 6c05da3b7a..8b32efa0fd 100644 --- a/benchmark/GroupByHashMapBenchmark.cpp +++ b/benchmark/GroupByHashMapBenchmark.cpp @@ -6,12 +6,12 @@ #include #include "../benchmark/infrastructure/Benchmark.h" +#include "../test/engine/ValuesForTesting.h" #include "../test/util/IdTableHelpers.h" #include "../test/util/IndexTestHelpers.h" #include "engine/GroupBy.h" #include "engine/Sort.h" #include "engine/Values.h" -#include "engine/ValuesForTesting.h" #include "engine/sparqlExpressions/AggregateExpression.h" #include "engine/sparqlExpressions/GroupConcatExpression.h" #include "engine/sparqlExpressions/LiteralExpression.h" diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index 160015b493..4bd05f36f2 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -915,7 +915,7 @@ void GroupBy::extractValues( auto targetIterator = resultTable->getColumn(outCol).begin() + evaluationContext._beginIndex; - for (sparqlExpression::IdOrString val : generator) { + for (sparqlExpression::IdOrLiteralOrIri val : generator) { *targetIterator = sparqlExpression::detail::constantExpressionResultToId( std::move(val), *localVocab); ++targetIterator; diff --git a/src/engine/GroupByHashMapOptimization.h b/src/engine/GroupByHashMapOptimization.h index 76537960a3..afc57a6f77 100644 --- a/src/engine/GroupByHashMapOptimization.h +++ b/src/engine/GroupByHashMapOptimization.h @@ -69,11 +69,11 @@ struct CountAggregationData { // Data to perform MIN/MAX aggregation using the HashMap optimization. template struct ExtremumAggregationData { - sparqlExpression::IdOrString currentValue_; + sparqlExpression::IdOrLiteralOrIri currentValue_; bool firstValueSet_ = false; // _____________________________________________________________________________ - void addValue(const sparqlExpression::IdOrString& value, + void addValue(const sparqlExpression::IdOrLiteralOrIri& value, const sparqlExpression::EvaluationContext* ctx) { if (!firstValueSet_) { currentValue_ = value; @@ -88,10 +88,12 @@ struct ExtremumAggregationData { // _____________________________________________________________________________ [[nodiscard]] ValueId calculateResult(LocalVocab* localVocab) const { auto valueIdResultGetter = [](ValueId id) { return id; }; - auto stringResultGetter = [localVocab](const std::string& str) { - auto localVocabIndex = localVocab->getIndexAndAddIfNotContained(str); - return ValueId::makeFromLocalVocabIndex(localVocabIndex); - }; + auto stringResultGetter = + [localVocab](const ad_utility::triple_component::LiteralOrIri& str) { + auto localVocabIndex = localVocab->getIndexAndAddIfNotContained( + str.toStringRepresentation()); + return ValueId::makeFromLocalVocabIndex(localVocabIndex); + }; return std::visit(ad_utility::OverloadCallOperator(valueIdResultGetter, stringResultGetter), currentValue_); diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index c04774e74b..15ecb9fa83 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -4,10 +4,11 @@ #include "engine/IndexScan.h" +#include + #include #include -#include "absl/strings/str_join.h" #include "index/IndexImpl.h" #include "index/TriplesView.h" #include "parser/ParsedQuery.h" diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index 26f8c3f694..900917f407 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -7,8 +7,6 @@ #include "./Operation.h" -using std::string; - class SparqlTriple; class SparqlTripleSimple; @@ -43,7 +41,7 @@ class IndexScan : public Operation { const std::vector& additionalColumns() const { return additionalColumns_; } - string getDescriptor() const override; + std::string getDescriptor() const override; size_t getResultWidth() const override; @@ -93,6 +91,10 @@ class IndexScan : public Operation { bool knownEmptyResult() override { return getExactSize() == 0; } + bool isIndexScanWithNumVariables(size_t target) const override { + return numVariables() == target; + } + // Currently only the full scans support a limit clause. [[nodiscard]] bool supportsLimit() const override { return getResultWidth() == 3; @@ -114,7 +116,7 @@ class IndexScan : public Operation { size_t computeSizeEstimate() const; - string getCacheKeyImpl() const override; + std::string getCacheKeyImpl() const override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index aac3cbad80..cba6ab70fb 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -47,8 +47,8 @@ Join::Join(QueryExecutionContext* qec, std::shared_ptr t1, // If one of the inputs is a SCAN and the other one is not, always make the // SCAN the right child (which also gives a deterministic order of the // subtrees). This simplifies several branches in the `computeResult` method. - if (t1->getType() == QueryExecutionTree::SCAN && - t2->getType() != QueryExecutionTree::SCAN) { + if (std::dynamic_pointer_cast(t1->getRootOperation()) && + !std::dynamic_pointer_cast(t2->getRootOperation())) { swapChildren(); } _left = std::move(t1); @@ -131,12 +131,14 @@ ResultTable Join::computeResult() { auto rightResIfCached = getCachedOrSmallResult(*_right, _rightJoinCol); checkCancellation(); - if (_left->getType() == QueryExecutionTree::SCAN && - _right->getType() == QueryExecutionTree::SCAN) { + auto leftIndexScan = + std::dynamic_pointer_cast(_left->getRootOperation()); + if (leftIndexScan && + std::dynamic_pointer_cast(_right->getRootOperation())) { if (rightResIfCached && !leftResIfCached) { idTable = computeResultForIndexScanAndIdTable( - rightResIfCached->idTable(), _rightJoinCol, - dynamic_cast(*_left->getRootOperation()), _leftJoinCol); + rightResIfCached->idTable(), _rightJoinCol, *leftIndexScan, + _leftJoinCol); checkCancellation(); return {std::move(idTable), resultSortedOn(), LocalVocab{}}; @@ -169,11 +171,11 @@ ResultTable Join::computeResult() { const auto& leftIdTable = leftRes->idTable(); auto leftHasUndef = !leftIdTable.empty() && leftIdTable.at(0, _leftJoinCol).isUndefined(); - if (_right->getType() == QueryExecutionTree::SCAN && !rightResIfCached && - !leftHasUndef) { + auto rightIndexScan = + std::dynamic_pointer_cast(_right->getRootOperation()); + if (rightIndexScan && !rightResIfCached && !leftHasUndef) { idTable = computeResultForIndexScanAndIdTable( - leftRes->idTable(), _leftJoinCol, - dynamic_cast(*_right->getRootOperation()), _rightJoinCol); + leftRes->idTable(), _leftJoinCol, *rightIndexScan, _rightJoinCol); checkCancellation(); return {std::move(idTable), resultSortedOn(), leftRes->getSharedLocalVocab()}; @@ -565,8 +567,11 @@ void updateRuntimeInfoForLazyScan( // ______________________________________________________________________________________________________ IdTable Join::computeResultForTwoIndexScans() { - AD_CORRECTNESS_CHECK(_left->getType() == QueryExecutionTree::SCAN && - _right->getType() == QueryExecutionTree::SCAN); + auto leftScan = + std::dynamic_pointer_cast(_left->getRootOperation()); + auto rightScan = + std::dynamic_pointer_cast(_right->getRootOperation()); + AD_CORRECTNESS_CHECK(leftScan && rightScan); // The join column already is the first column in both inputs, so we don't // have to permute the inputs and results for the `AddCombinedRowToIdTable` // class to work correctly. @@ -575,12 +580,9 @@ IdTable Join::computeResultForTwoIndexScans() { 1, IdTable{getResultWidth(), getExecutionContext()->getAllocator()}, cancellationHandle_}; - auto& leftScan = dynamic_cast(*_left->getRootOperation()); - auto& rightScan = dynamic_cast(*_right->getRootOperation()); - ad_utility::Timer timer{ad_utility::timer::Timer::InitialStatus::Started}; auto [leftBlocksInternal, rightBlocksInternal] = - IndexScan::lazyScanForJoinOfTwoScans(leftScan, rightScan); + IndexScan::lazyScanForJoinOfTwoScans(*leftScan, *rightScan); runtimeInfo().addDetail("time-for-filtering-blocks", timer.msecs()); auto leftBlocks = convertGenerator(std::move(leftBlocksInternal)); @@ -589,8 +591,8 @@ IdTable Join::computeResultForTwoIndexScans() { ad_utility::zipperJoinForBlocksWithoutUndef(leftBlocks, rightBlocks, std::less{}, rowAdder); - updateRuntimeInfoForLazyScan(leftScan, leftBlocks.details()); - updateRuntimeInfoForLazyScan(rightScan, rightBlocks.details()); + updateRuntimeInfoForLazyScan(*leftScan, leftBlocks.details()); + updateRuntimeInfoForLazyScan(*rightScan, rightBlocks.details()); AD_CORRECTNESS_CHECK(leftBlocks.details().numBlocksRead_ <= rightBlocks.details().numElementsRead_); diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index ee888276ed..46649e7e96 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -100,8 +100,7 @@ shared_ptr Operation::getResult(bool isRoot, auto lock = getExecutionContext()->getQueryTreeCache().pinnedSizes().wlock(); forAllDescendants([&lock](QueryExecutionTree* child) { - if (child->getType() == QueryExecutionTree::OperationType::SCAN && - child->getResultWidth() == 1) { + if (child->getRootOperation()->isIndexScanWithNumVariables(1)) { (*lock)[child->getRootOperation()->getCacheKey()] = child->getSizeEstimate(); } diff --git a/src/engine/Operation.h b/src/engine/Operation.h index af44ed2a53..07ea31dce7 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -7,9 +7,7 @@ #pragma once #include -#include #include -#include #include "engine/QueryExecutionContext.h" #include "engine/ResultTable.h" @@ -19,9 +17,6 @@ #include "parser/data/Variable.h" #include "util/CancellationHandle.h" #include "util/CompilerExtensions.h" -#include "util/Exception.h" -#include "util/Log.h" -#include "util/TypeTraits.h" // forward declaration needed to break dependencies class QueryExecutionTree; @@ -117,6 +112,14 @@ class Operation { virtual void setSelectedVariablesForSubquery( const std::vector& selectedVariables) final; + /// Return true if this object is an instance of `IndexScan` and has the + /// specified number of variables. For this to work this function needs to + /// be overridden by `IndexScan` to do the right thing. + virtual bool isIndexScanWithNumVariables( + [[maybe_unused]] size_t target) const { + return false; + } + RuntimeInformation& runtimeInfo() const { return *_runtimeInfo; } std::shared_ptr getRuntimeInfoPointer() { diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index 831193d386..0b195338bc 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -6,37 +6,13 @@ #include "./QueryExecutionTree.h" -#include -#include +#include +#include +#include #include -#include +#include -#include "engine/Bind.h" -#include "engine/CartesianProductJoin.h" -#include "engine/CountAvailablePredicates.h" -#include "engine/Distinct.h" -#include "engine/ExportQueryExecutionTrees.h" -#include "engine/Filter.h" -#include "engine/GroupBy.h" -#include "engine/HasPredicateScan.h" -#include "engine/IndexScan.h" -#include "engine/Join.h" -#include "engine/Minus.h" -#include "engine/MultiColumnJoin.h" -#include "engine/NeutralElementOperation.h" -#include "engine/OptionalJoin.h" -#include "engine/OrderBy.h" -#include "engine/Service.h" #include "engine/Sort.h" -#include "engine/TextIndexScanForEntity.h" -#include "engine/TextIndexScanForWord.h" -#include "engine/TransitivePathBase.h" -#include "engine/Union.h" -#include "engine/Values.h" -#include "engine/ValuesForTesting.h" -#include "parser/RdfEscaping.h" - -using std::string; using parsedQuery::SelectClause; @@ -45,7 +21,7 @@ QueryExecutionTree::QueryExecutionTree(QueryExecutionContext* const qec) : qec_(qec) {} // _____________________________________________________________________________ -string QueryExecutionTree::getCacheKey() const { +std::string QueryExecutionTree::getCacheKey() const { return rootOperation_->getCacheKey(); } @@ -94,7 +70,7 @@ size_t QueryExecutionTree::getCostEstimate() { // result is pinned in cache. Nothing to compute return 0; } - if (type_ == QueryExecutionTree::SCAN && getResultWidth() == 1) { + if (getRootOperation()->isIndexScanWithNumVariables(1)) { return getSizeEstimate(); } else { return rootOperation_->getCostEstimate(); @@ -142,95 +118,6 @@ void QueryExecutionTree::readFromCache() { } } -template -void QueryExecutionTree::setOperation(std::shared_ptr operation) { - if constexpr (std::is_same_v) { - type_ = SCAN; - } else if constexpr (std::is_same_v) { - type_ = UNION; - } else if constexpr (std::is_same_v) { - type_ = BIND; - } else if constexpr (std::is_same_v) { - type_ = SORT; - } else if constexpr (std::is_same_v) { - type_ = DISTINCT; - } else if constexpr (std::is_same_v) { - type_ = VALUES; - } else if constexpr (std::is_same_v) { - type_ = SERVICE; - } else if constexpr (std::is_same_v) { - type_ = TRANSITIVE_PATH; - } else if constexpr (std::is_same_v) { - type_ = ORDER_BY; - } else if constexpr (std::is_same_v) { - type_ = GROUP_BY; - } else if constexpr (std::is_same_v) { - type_ = HAS_PREDICATE_SCAN; - } else if constexpr (std::is_same_v) { - type_ = FILTER; - } else if constexpr (std::is_same_v) { - type_ = NEUTRAL_ELEMENT; - } else if constexpr (std::is_same_v) { - type_ = JOIN; - } else if constexpr (std::is_same_v) { - type_ = TEXT_INDEX_SCAN_FOR_WORD; - } else if constexpr (std::is_same_v) { - type_ = TEXT_INDEX_SCAN_FOR_ENTITY; - } else if constexpr (std::is_same_v) { - type_ = COUNT_AVAILABLE_PREDICATES; - } else if constexpr (std::is_same_v) { - type_ = MINUS; - } else if constexpr (std::is_same_v) { - type_ = OPTIONAL_JOIN; - } else if constexpr (std::is_same_v) { - type_ = MULTICOLUMN_JOIN; - } else if constexpr (std::is_same_v || - std::is_same_v) { - type_ = DUMMY; - } else if constexpr (std::is_same_v) { - type_ = CARTESIAN_PRODUCT_JOIN; - } else { - static_assert(ad_utility::alwaysFalse, - "New type of operation that was not yet registered"); - } - rootOperation_ = std::move(operation); - readFromCache(); -} - -template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation( - std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation( - std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation( - std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation( - std::shared_ptr); -template void QueryExecutionTree::setOperation( - std::shared_ptr); -template void QueryExecutionTree::setOperation( - std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation(std::shared_ptr); -template void QueryExecutionTree::setOperation( - std::shared_ptr); -template void QueryExecutionTree::setOperation( - std::shared_ptr); -template void QueryExecutionTree::setOperation( - std::shared_ptr); -template void QueryExecutionTree::setOperation( - std::shared_ptr); - // ________________________________________________________________________________________________________________ std::shared_ptr QueryExecutionTree::createSortedTree( std::shared_ptr qet, diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h index 10c2d74457..8f8b28eff5 100644 --- a/src/engine/QueryExecutionTree.h +++ b/src/engine/QueryExecutionTree.h @@ -6,64 +6,27 @@ #include #include #include -#include -#include +#include #include "engine/Operation.h" #include "engine/QueryExecutionContext.h" #include "parser/ParsedQuery.h" -#include "parser/data/ConstructQueryExportContext.h" #include "parser/data/Types.h" -#include "util/Conversions.h" -#include "util/Generator.h" -#include "util/HashSet.h" #include "util/stream_generator.h" -using std::shared_ptr; -using std::string; - // A query execution tree. Processed bottom up, which gives an ordering to the // operations needed to solve a query. class QueryExecutionTree { public: - explicit QueryExecutionTree(QueryExecutionContext* const qec); - template - QueryExecutionTree(QueryExecutionContext* const qec, - std::shared_ptr operation) + explicit QueryExecutionTree(QueryExecutionContext* qec); + QueryExecutionTree(QueryExecutionContext* qec, + std::shared_ptr operation) : QueryExecutionTree(qec) { - setOperation(std::move(operation)); + rootOperation_ = std::move(operation); + readFromCache(); } - enum OperationType { - UNDEFINED, - SCAN, - JOIN, - SORT, - ORDER_BY, - FILTER, - DISTINCT, - TEXT_INDEX_SCAN_FOR_WORD, - TEXT_INDEX_SCAN_FOR_ENTITY, - OPTIONAL_JOIN, - COUNT_AVAILABLE_PREDICATES, - GROUP_BY, - HAS_PREDICATE_SCAN, - UNION, - MULTICOLUMN_JOIN, - TRANSITIVE_PATH, - VALUES, - SERVICE, - BIND, - MINUS, - NEUTRAL_ELEMENT, - DUMMY, - CARTESIAN_PRODUCT_JOIN - }; - - template - void setOperation(std::shared_ptr); - - string getCacheKey() const; + std::string getCacheKey() const; const QueryExecutionContext* getQec() const { return qec_; } @@ -82,17 +45,13 @@ class QueryExecutionTree { std::shared_ptr getRootOperation() const { return rootOperation_; } - const OperationType& getType() const { return type_; } - - bool isEmpty() const { - return type_ == OperationType::UNDEFINED || !rootOperation_; - } + bool isEmpty() const { return !rootOperation_; } size_t getVariableColumn(const Variable& variable) const; size_t getResultWidth() const { return rootOperation_->getResultWidth(); } - shared_ptr getResult() const { + std::shared_ptr getResult() const { return rootOperation_->getResult(isRoot()); } @@ -103,7 +62,8 @@ class QueryExecutionTree { size_t columnIndex_; }; - using ColumnIndicesAndTypes = vector>; + using ColumnIndicesAndTypes = + std::vector>; // Returns a vector where the i-th element contains the column index and // `ResultType` of the i-th `selectVariable` in the `resultTable` @@ -144,7 +104,7 @@ class QueryExecutionTree { void readFromCache(); // recursively get all warnings from descendant operations - vector collectWarnings() const { + std::vector collectWarnings() const { return rootOperation_->collectWarnings(); } @@ -181,17 +141,17 @@ class QueryExecutionTree { // sorted accordingly, it is simply returned. static std::shared_ptr createSortedTree( std::shared_ptr qet, - const vector& sortColumns); + const std::vector& sortColumns); // Similar to `createSortedTree` (see directly above), but create the sorted // trees for two different trees, the sort columns of which are specified as // a vector of two-dimensional arrays. This format often appears in // `QueryPlanner.cpp`. static std::pair, - shared_ptr> + std::shared_ptr> createSortedTrees(std::shared_ptr qetA, std::shared_ptr qetB, - const vector>& sortColumns); + const std::vector>& sortColumns); // The return type of the `getSortedTreesAndJoinColumns` function below. It is // deliberately stored as a tuple vs. a struct with named members, so that we @@ -233,7 +193,6 @@ class QueryExecutionTree { QueryExecutionContext* qec_; // No ownership std::shared_ptr rootOperation_ = nullptr; // Owned child. Will be deleted at deconstruction. - OperationType type_ = OperationType::UNDEFINED; std::optional sizeEstimate_ = std::nullopt; bool isRoot_ = false; // used to distinguish the root from child // operations/subtrees when pinning only the result. diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 3f56dcaeea..65d1b040ea 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -487,7 +487,7 @@ std::vector QueryPlanner::optimize( // it might be, that we have not yet applied all the filters // (it might be, that the last join was optional and introduced new variables) if (!candidatePlans.empty()) { - applyFiltersIfPossible(candidatePlans[0], rootPattern->_filters, true); + applyFiltersIfPossible(candidatePlans[0], rootPattern->_filters); checkCancellation(); } @@ -1344,9 +1344,10 @@ string QueryPlanner::getPruningKey( } // _____________________________________________________________________________ +template void QueryPlanner::applyFiltersIfPossible( - vector& row, const vector& filters, - bool replace) const { + vector& row, + const vector& filters) const { // Apply every filter possible. // It is possible when, // 1) the filter has not already been applied @@ -1371,11 +1372,12 @@ void QueryPlanner::applyFiltersIfPossible( // in one go. Changing `row` inside the loop would invalidate the iterators. std::vector addedPlans; for (auto& plan : row) { - if (plan._qet->getType() == QueryExecutionTree::SCAN && - plan._qet->getResultWidth() == 3 && !replace) { - // Do not apply filters to dummies, except at the very end of query - // planning. - continue; + if constexpr (!replace) { + if (plan._qet->getRootOperation()->isIndexScanWithNumVariables(3)) { + // Do not apply filters to dummies, except at the very end of query + // planning. + continue; + } } for (size_t i = 0; i < filters.size(); ++i) { if (((plan._idsOfIncludedFilters >> i) & 1) != 0) { @@ -1393,7 +1395,7 @@ void QueryPlanner::applyFiltersIfPossible( newPlan._idsOfIncludedFilters |= (size_t(1) << i); newPlan._idsOfIncludedNodes = plan._idsOfIncludedNodes; newPlan.type = plan.type; - if (replace) { + if constexpr (replace) { plan = std::move(newPlan); } else { addedPlans.push_back(std::move(newPlan)); @@ -1414,7 +1416,7 @@ QueryPlanner::runDynamicProgrammingOnConnectedComponent( // (there might be duplicates because we already have multiple candidates // for each index scan with different permutations. dpTab.push_back(std::move(connectedComponent)); - applyFiltersIfPossible(dpTab.back(), filters, false); + applyFiltersIfPossible(dpTab.back(), filters); ad_utility::HashSet uniqueNodeIds; std::ranges::copy( dpTab.back() | std::views::transform(&SubtreePlan::_idsOfIncludedNodes), @@ -1429,7 +1431,7 @@ QueryPlanner::runDynamicProgrammingOnConnectedComponent( checkCancellation(); auto newPlans = merge(dpTab[i - 1], dpTab[k - i - 1], tg); dpTab[k - 1].insert(dpTab[k - 1].end(), newPlans.begin(), newPlans.end()); - applyFiltersIfPossible(dpTab.back(), filters, false); + applyFiltersIfPossible(dpTab.back(), filters); } // As we only passed in connected components, we expect the result to always // be nonempty. @@ -1467,7 +1469,7 @@ vector> QueryPlanner::fillDpTab( } if (numConnectedComponents == 1) { // A Cartesian product is not needed if there is only one component. - applyFiltersIfPossible(lastDpRowFromComponents.back(), filters, true); + applyFiltersIfPossible(lastDpRowFromComponents.back(), filters); return lastDpRowFromComponents; } // More than one connected component, set up a Cartesian product. @@ -1483,7 +1485,7 @@ vector> QueryPlanner::fillDpTab( std::back_inserter(subtrees)); result.at(0).push_back( makeSubtreePlan(_qec, std::move(subtrees))); - applyFiltersIfPossible(result.at(0), filters, true); + applyFiltersIfPossible(result.at(0), filters); return result; } @@ -1793,9 +1795,6 @@ std::vector QueryPlanner::createJoinCandidates( const auto& b = !swapForTesting ? bin : ain; std::vector candidates; - // We often query for the type of an operation, so we shorten these checks. - using enum QueryExecutionTree::OperationType; - // TODO find out, what is ACTUALLY the use case for the triple // graph. Is it only meant for (questionable) performance reasons // or does it change the meaning. @@ -1849,8 +1848,8 @@ std::vector QueryPlanner::createJoinCandidates( // CASE: JOIN ON ONE COLUMN ONLY. // Skip if we have two operations, where all three positions are variables. - if (a._qet->getType() == SCAN && a._qet->getResultWidth() == 3 && - b._qet->getType() == SCAN && b._qet->getResultWidth() == 3) { + if (a._qet->getRootOperation()->isIndexScanWithNumVariables(3) && + b._qet->getRootOperation()->isIndexScanWithNumVariables(3)) { return candidates; } @@ -1883,18 +1882,16 @@ auto QueryPlanner::createJoinWithTransitivePath( SubtreePlan a, SubtreePlan b, const std::vector>& jcs) -> std::optional { - using enum QueryExecutionTree::OperationType; - const bool aIsTransPath = a._qet->getType() == TRANSITIVE_PATH; - const bool bIsTransPath = b._qet->getType() == TRANSITIVE_PATH; + auto aTransPath = + std::dynamic_pointer_cast(a._qet->getRootOperation()); + auto bTransPath = + std::dynamic_pointer_cast(b._qet->getRootOperation()); - if (!(aIsTransPath || bIsTransPath)) { + if (!(aTransPath || bTransPath)) { return std::nullopt; } - std::shared_ptr otherTree = - aIsTransPath ? b._qet : a._qet; - auto& transPathTree = aIsTransPath ? a._qet : b._qet; - auto transPathOperation = std::dynamic_pointer_cast( - transPathTree->getRootOperation()); + std::shared_ptr otherTree = aTransPath ? b._qet : a._qet; + auto transPathOperation = aTransPath ? aTransPath : bTransPath; // TODO: Handle the case of two or more common variables if (jcs.size() > 1) { @@ -1902,8 +1899,8 @@ auto QueryPlanner::createJoinWithTransitivePath( "Transitive Path operation with more than" " two common variables is not supported"); } - const size_t otherCol = aIsTransPath ? jcs[0][1] : jcs[0][0]; - const size_t thisCol = aIsTransPath ? jcs[0][0] : jcs[0][1]; + const size_t otherCol = aTransPath ? jcs[0][1] : jcs[0][0]; + const size_t thisCol = aTransPath ? jcs[0][0] : jcs[0][1]; // Do not bind the side of a path twice if (transPathOperation->isBoundOrId()) { return std::nullopt; @@ -1934,11 +1931,14 @@ auto QueryPlanner::createJoinWithHasPredicateScan( // If the join column corresponds to the has-predicate scan's // subject column we can use a specialized join that avoids // loading the full has-predicate predicate. - using enum QueryExecutionTree::OperationType; auto isSuitablePredicateScan = [](const auto& tree, size_t joinColumn) { - return tree._qet->getType() == HAS_PREDICATE_SCAN && joinColumn == 0 && - static_cast(tree._qet->getRootOperation().get()) - ->getType() == HasPredicateScan::ScanType::FULL_SCAN; + if (joinColumn == 0) { + auto rootOperation = std::dynamic_pointer_cast( + tree._qet->getRootOperation()); + return rootOperation && + rootOperation->getType() == HasPredicateScan::ScanType::FULL_SCAN; + } + return false; }; const bool aIsSuitablePredicateScan = isSuitablePredicateScan(a, jcs[0][0]); diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 4078bd04c4..a08cf48e02 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -374,9 +374,9 @@ class QueryPlanner { const SubtreePlan& plan, const vector& orderedOnColumns) const; - [[nodiscard]] void applyFiltersIfPossible( - std::vector& row, const std::vector& filters, - bool replaceInsteadOfAddPlans) const; + template + void applyFiltersIfPossible(std::vector& row, + const std::vector& filters) const; /** * @brief Optimize a set of triples, filters and precomputed candidates diff --git a/src/engine/idTable/CompressedExternalIdTable.h b/src/engine/idTable/CompressedExternalIdTable.h index aacb154d4f..d444db8e69 100644 --- a/src/engine/idTable/CompressedExternalIdTable.h +++ b/src/engine/idTable/CompressedExternalIdTable.h @@ -667,6 +667,9 @@ class CompressedExternalIdTableSorter // requested for the output, and the single block is larger than this // blocksize, we manually have to split it into chunks. auto& block = this->currentBlock_; + if (block.empty()) { + co_return; + } const auto blocksizeOutput = blocksize.value_or(block.numRows()); if (block.numRows() <= blocksizeOutput) { if (this->moveResultOnMerge_) { @@ -735,7 +738,9 @@ class CompressedExternalIdTableSorter } } numPopped += result.numRows(); - co_yield std::move(result).template toStatic(); + if (!result.empty()) { + co_yield std::move(result).template toStatic(); + } AD_CORRECTNESS_CHECK(numPopped == this->numElementsPushed_); } diff --git a/src/engine/sparqlExpressions/AggregateExpression.h b/src/engine/sparqlExpressions/AggregateExpression.h index 23bc06dec6..e19646ea11 100644 --- a/src/engine/sparqlExpressions/AggregateExpression.h +++ b/src/engine/sparqlExpressions/AggregateExpression.h @@ -238,32 +238,32 @@ using AvgExpression = // TODO Comment template inline const auto compareIdsOrStrings = - [](const T& a, const U& b, - const EvaluationContext* ctx) -> IdOrString { + []( + const T& a, const U& b, + const EvaluationContext* ctx) -> IdOrLiteralOrIri { // TODO moveTheStrings. return toBoolNotUndef( sparqlExpression::compareIdsOrStrings< Comp, valueIdComparators::ComparisonForIncompatibleTypes:: CompareByType>(a, b, ctx)) - ? IdOrString{a} - : IdOrString{b}; + ? IdOrLiteralOrIri{a} + : IdOrLiteralOrIri{b}; }; // Min and Max. template -inline const auto minMaxLambdaForAllTypes = []( - const T& a, const T& b, - const EvaluationContext* ctx) { - auto actualImpl = [ctx](const auto& x, const auto& y) { - return compareIdsOrStrings(x, y, ctx); - }; - if constexpr (ad_utility::isSimilar) { - return std::get(actualImpl(a, b)); - } else { - auto base = [](const IdOrString& i) -> const IdOrStringBase& { return i; }; - // TODO We should definitely move strings here. - return std::visit(actualImpl, base(a), base(b)); - } -}; +inline const auto minMaxLambdaForAllTypes = + [](const T& a, const T& b, + const EvaluationContext* ctx) { + auto actualImpl = [ctx](const auto& x, const auto& y) { + return compareIdsOrStrings(x, y, ctx); + }; + if constexpr (ad_utility::isSimilar) { + return std::get(actualImpl(a, b)); + } else { + // TODO We should definitely move strings here. + return std::visit(actualImpl, a, b); + } + }; constexpr inline auto minLambdaForAllTypes = minMaxLambdaForAllTypes; diff --git a/src/engine/sparqlExpressions/ConditionalExpressions.cpp b/src/engine/sparqlExpressions/ConditionalExpressions.cpp index df66810448..29aec5a7b2 100644 --- a/src/engine/sparqlExpressions/ConditionalExpressions.cpp +++ b/src/engine/sparqlExpressions/ConditionalExpressions.cpp @@ -14,8 +14,8 @@ using namespace sparqlExpression::detail; [[maybe_unused]] auto ifImpl = []( EffectiveBooleanValueGetter::Result condition, T&& i, - U&& e) -> IdOrString requires std::is_rvalue_reference_v && - std::is_rvalue_reference_v { + U&& e) -> IdOrLiteralOrIri requires std::is_rvalue_reference_v && + std::is_rvalue_reference_v { if (condition == EffectiveBooleanValueGetter::Result::True) { return AD_FWD(i); } else { @@ -50,16 +50,16 @@ class CoalesceExpression : public VariadicExpression { 0, ctx->size(), [&unboundIndices](size_t i) { unboundIndices.push_back(i); }, [ctx]() { ctx->cancellationHandle_->throwIfCancelled(); }); - VectorWithMemoryLimit result{ctx->_allocator}; + VectorWithMemoryLimit result{ctx->_allocator}; std::fill_n(std::back_inserter(result), ctx->size(), - IdOrString{Id::makeUndefined()}); + IdOrLiteralOrIri{Id::makeUndefined()}); if (result.empty()) { return result; } ctx->cancellationHandle_->throwIfCancelled(); - auto isUnbound = [](const IdOrString& x) { + auto isUnbound = [](const IdOrLiteralOrIri& x) { return (std::holds_alternative(x) && std::get(x) == Id::makeUndefined()); }; @@ -68,7 +68,7 @@ class CoalesceExpression : public VariadicExpression { [&nextUnboundIndices, &unboundIndices, &isUnbound, &result, ctx ](T && childResult) requires isConstantResult { - IdOrString constantResult{AD_FWD(childResult)}; + IdOrLiteralOrIri constantResult{AD_FWD(childResult)}; if (isUnbound(constantResult)) { nextUnboundIndices = std::move(unboundIndices); return; @@ -109,7 +109,7 @@ class CoalesceExpression : public VariadicExpression { // Skip all the indices where the result is already bound from a // previous child. if (i == *unboundIdxIt) { - if (IdOrString val{std::move(*generatorIterator)}; + if (IdOrLiteralOrIri val{std::move(*generatorIterator)}; isUnbound(val)) { nextUnboundIndices.push_back(i); } else { diff --git a/src/engine/sparqlExpressions/GroupConcatExpression.h b/src/engine/sparqlExpressions/GroupConcatExpression.h index f6b985386b..793857b676 100644 --- a/src/engine/sparqlExpressions/GroupConcatExpression.h +++ b/src/engine/sparqlExpressions/GroupConcatExpression.h @@ -53,7 +53,9 @@ class GroupConcatExpression : public SparqlExpression { groupConcatImpl(std::move(generator)); } result.shrink_to_fit(); - return IdOrString(std::move(result)); + return IdOrLiteralOrIri(ad_utility::triple_component::LiteralOrIri{ + ad_utility::triple_component::Literal::literalWithNormalizedContent( + asNormalizedStringViewUnsafe(result))}); }; auto childRes = child_->evaluate(context); diff --git a/src/engine/sparqlExpressions/LiteralExpression.h b/src/engine/sparqlExpressions/LiteralExpression.h index 729a240dc0..92e84957db 100644 --- a/src/engine/sparqlExpressions/LiteralExpression.h +++ b/src/engine/sparqlExpressions/LiteralExpression.h @@ -21,7 +21,7 @@ class LiteralExpression : public SparqlExpression { // make the `const` evaluate function threadsafe and lock-free. // TODO Make this unnecessary by completing multiple small groups at // once during the GROUP BY. - mutable std::atomic cachedResult_ = nullptr; + mutable std::atomic cachedResult_ = nullptr; public: // _________________________________________________________________________ @@ -49,10 +49,11 @@ class LiteralExpression : public SparqlExpression { return *ptr; } auto id = context->_qec.getIndex().getId(s); - IdOrString result = - id.has_value() ? IdOrString{id.value()} - : IdOrString{std::string{s.toStringRepresentation()}}; - auto ptrForCache = std::make_unique(result); + IdOrLiteralOrIri result = + id.has_value() + ? IdOrLiteralOrIri{id.value()} + : IdOrLiteralOrIri{ad_utility::triple_component::LiteralOrIri{s}}; + auto ptrForCache = std::make_unique(result); ptrForCache.reset(std::atomic_exchange_explicit( &cachedResult_, ptrForCache.release(), std::memory_order_relaxed)); context->cancellationHandle_->throwIfCancelled(); diff --git a/src/engine/sparqlExpressions/RelationalExpressionHelpers.h b/src/engine/sparqlExpressions/RelationalExpressionHelpers.h index 4d1cd53a7e..4c9f24d8be 100644 --- a/src/engine/sparqlExpressions/RelationalExpressionHelpers.h +++ b/src/engine/sparqlExpressions/RelationalExpressionHelpers.h @@ -113,13 +113,16 @@ constexpr Comparison getComparisonForSwappedArguments(Comparison comp) { // collation level. // TODO Make the collation level configurable. inline std::pair getRangeFromVocab( - const std::string& s, const EvaluationContext* context) { + const ad_utility::triple_component::LiteralOrIri& s, + const EvaluationContext* context) { auto level = TripleComponentComparator::Level::QUARTERNARY; // TODO This should be `Vocab::equal_range` - const ValueId lower = Id::makeFromVocabIndex( - context->_qec.getIndex().getVocab().lower_bound(s, level)); - const ValueId upper = Id::makeFromVocabIndex( - context->_qec.getIndex().getVocab().upper_bound(s, level)); + const ValueId lower = + Id::makeFromVocabIndex(context->_qec.getIndex().getVocab().lower_bound( + s.toStringRepresentation(), level)); + const ValueId upper = + Id::makeFromVocabIndex(context->_qec.getIndex().getVocab().upper_bound( + s.toStringRepresentation(), level)); return {lower, upper}; } @@ -127,10 +130,11 @@ inline std::pair getRangeFromVocab( // consecutive range of IDs. For its usage see below. template concept StoresStringOrId = - ad_utility::SimilarToAny>; -// Convert a string or `IdOrString` value into the (possibly empty) range of -// corresponding `ValueIds` (denoted by a `std::pair`, see + ad_utility::SimilarToAny>; +// Convert a string or `IdOrLiteralOrIri` value into the (possibly empty) range +// of corresponding `ValueIds` (denoted by a `std::pair`, see // `getRangeFromVocab` above for details). This function also takes `ValueId`s // and `pair` which are simply returned unchanged. This makes // the usage of this function easier. @@ -140,7 +144,7 @@ auto makeValueId(const S& value, const EvaluationContext* context) { return value; } else if constexpr (ad_utility::isSimilar>) { return value; - } else if constexpr (ad_utility::isSimilar) { + } else if constexpr (ad_utility::isSimilar) { auto visitor = [context](const auto& x) { auto res = makeValueId(x, context); if constexpr (ad_utility::isSimilar) { @@ -152,10 +156,11 @@ auto makeValueId(const S& value, const EvaluationContext* context) { return res; } }; - return value.visit(visitor); + return std::visit(visitor, value); } else { - static_assert(ad_utility::isSimilar); + static_assert( + ad_utility::isSimilar); return getRangeFromVocab(value, context); } }; @@ -171,11 +176,13 @@ inline const auto compareIdsOrStrings = []( const T& a, const U& b, const EvaluationContext* ctx) -> valueIdComparators::ComparisonResult { - if constexpr (ad_utility::isSimilar && - ad_utility::isSimilar) { + using LiteralOrIri = ad_utility::triple_component::LiteralOrIri; + if constexpr (ad_utility::isSimilar && + ad_utility::isSimilar) { // TODO integrate comparison via ICU and proper handling for // IRIs/ Literals/etc. - return valueIdComparators::fromBool(applyComparison(a, b)); + return valueIdComparators::fromBool(applyComparison( + a.toStringRepresentation(), b.toStringRepresentation())); } else { auto x = makeValueId(a, ctx); auto y = makeValueId(b, ctx); diff --git a/src/engine/sparqlExpressions/RelationalExpressions.cpp b/src/engine/sparqlExpressions/RelationalExpressions.cpp index 0a328c7bf5..a99d67180c 100644 --- a/src/engine/sparqlExpressions/RelationalExpressions.cpp +++ b/src/engine/sparqlExpressions/RelationalExpressions.cpp @@ -159,9 +159,9 @@ requires AreComparable ExpressionResult evaluateRelationalExpression( return std::nullopt; }; std::optional resultFromBinarySearch; - if constexpr (ad_utility::isSimilar) { + if constexpr (ad_utility::isSimilar) { resultFromBinarySearch = - value2.visit([&impl](const auto& x) { return impl(x); }); + std::visit([&impl](const auto& x) { return impl(x); }, value2); } else { resultFromBinarySearch = impl(value2); } @@ -186,14 +186,7 @@ requires AreComparable ExpressionResult evaluateRelationalExpression( AlwaysUndef>(x, y, context))); } }; - auto base = [](const I& arg) -> decltype(auto) { - if constexpr (ad_utility::isSimilar) { - return static_cast(arg); - } else { - return arg; - } - }; - ad_utility::visitWithVariantsAndParameters(impl, base(*itA), base(*itB)); + ad_utility::visitWithVariantsAndParameters(impl, *itA, *itB); ++itA; ++itB; context->cancellationHandle_->throwIfCancelled(); diff --git a/src/engine/sparqlExpressions/SparqlExpressionTypes.cpp b/src/engine/sparqlExpressions/SparqlExpressionTypes.cpp index 9dd5a0be07..36fec1d212 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionTypes.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionTypes.cpp @@ -6,17 +6,18 @@ namespace sparqlExpression { -IdOrString::IdOrString(std::optional s) { - if (s.has_value()) { - emplace(std::move(s.value())); - } else { - emplace(Id::makeUndefined()); - } -} - // _____________________________________________________________________________ -void PrintTo(const IdOrString& var, std::ostream* os) { - std::visit([&os](const auto& s) { *os << s; }, var); +void PrintTo(const IdOrLiteralOrIri& var, std::ostream* os) { + std::visit( + [&os](const T& s) { + auto& stream = *os; + if constexpr (std::same_as) { + stream << s; + } else { + stream << s.toStringRepresentation(); + } + }, + var); } // _____________________________________________________________________________ diff --git a/src/engine/sparqlExpressions/SparqlExpressionTypes.h b/src/engine/sparqlExpressions/SparqlExpressionTypes.h index 297a73f19d..dc753e5b04 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionTypes.h +++ b/src/engine/sparqlExpressions/SparqlExpressionTypes.h @@ -11,6 +11,8 @@ #include "engine/QueryExecutionContext.h" #include "engine/sparqlExpressions/SetOfIntervals.h" #include "global/Id.h" +#include "parser/LiteralOrIri.h" +#include "parser/TripleComponent.h" #include "parser/data/Variable.h" #include "util/AllocatorWithLimit.h" #include "util/HashSet.h" @@ -51,19 +53,11 @@ class VectorWithMemoryLimit // A class to store the results of expressions that can yield strings or IDs as // their result (for example IF and COALESCE). It is also used for expressions -// that can only yield strings. It is currently implemented as a thin wrapper -// around a `variant`, but a more sophisticated implementation could be done in -// the future. -using IdOrStringBase = std::variant; -class IdOrString : public IdOrStringBase, - public VisitMixin { - public: - using IdOrStringBase::IdOrStringBase; - explicit IdOrString(std::optional s); -}; - -// Print an `IdOrString` for googletest. -void PrintTo(const IdOrString& var, std::ostream* os); +// that can only yield strings. +using IdOrLiteralOrIri = + std::variant; +// Printing for GTest. +void PrintTo(const IdOrLiteralOrIri& var, std::ostream* os); /// The result of an expression can either be a vector of bool/double/int/string /// a variable (e.g. in BIND (?x as ?y)) or a "Set" of indices, which identifies @@ -72,7 +66,7 @@ void PrintTo(const IdOrString& var, std::ostream* os); namespace detail { // For each type T in this tuple, T as well as VectorWithMemoryLimit are // possible expression result types. -using ConstantTypes = std::tuple; +using ConstantTypes = std::tuple; using ConstantTypesAsVector = ad_utility::LiftedTuple; @@ -195,12 +189,14 @@ requires isConstantResult && std::is_rvalue_reference_v Id constantExpressionResultToId(T&& result, LocalVocabT& localVocab) { if constexpr (ad_utility::isSimilar) { return result; - } else if constexpr (ad_utility::isSimilar) { + } else if constexpr (ad_utility::isSimilar) { return std::visit( [&localVocab](R&& el) mutable { - if constexpr (ad_utility::isSimilar) { + if constexpr (ad_utility::isSimilar< + R, ad_utility::triple_component::LiteralOrIri>) { return Id::makeFromLocalVocabIndex( - localVocab.getIndexAndAddIfNotContained(std::forward(el))); + localVocab.getIndexAndAddIfNotContained( + AD_FWD(el).toStringRepresentation())); } else { static_assert(ad_utility::isSimilar); return el; diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h index f124121b12..95c9f270b7 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h @@ -20,6 +20,8 @@ namespace sparqlExpression::detail { +using LiteralOrIri = ad_utility::triple_component::LiteralOrIri; + // An empty struct to represent a non-numeric value in a context where only // numeric values make sense. struct NotNumeric {}; @@ -51,11 +53,12 @@ Id makeNumericId(T t) { } // All the numeric value getters have an `operator()` for `ValueId` and one for -// `std::string`. This mixin adds the `operator()` for the `IdOrString` variant -// via the CRTP pattern. +// `std::string`. This mixin adds the `operator()` for the `IdOrLiteralOrIri` +// variant via the CRTP pattern. template struct Mixin { - decltype(auto) operator()(IdOrString s, const EvaluationContext* ctx) const { + decltype(auto) operator()(IdOrLiteralOrIri s, + const EvaluationContext* ctx) const { return std::visit( [this, ctx](auto el) { return static_cast(this)->operator()(el, ctx); @@ -67,7 +70,7 @@ struct Mixin { // Return `NumericValue` which is then used as the input to numeric expressions. struct NumericValueGetter : Mixin { using Mixin::operator(); - NumericValue operator()(const string&, const EvaluationContext*) const { + NumericValue operator()(const LiteralOrIri&, const EvaluationContext*) const { return NotNumeric{}; } @@ -91,7 +94,7 @@ struct IsValidValueGetter : Mixin { // check for NULL/UNDEF values. bool operator()(ValueId id, const EvaluationContext*) const; - bool operator()(const string&, const EvaluationContext*) const { + bool operator()(const LiteralOrIri&, const EvaluationContext*) const { return true; } }; @@ -105,8 +108,8 @@ struct EffectiveBooleanValueGetter : Mixin { Result operator()(ValueId id, const EvaluationContext*) const; // Nonempty strings are true. - Result operator()(const std::string& s, const EvaluationContext*) const { - return s.empty() ? Result::False : Result::True; + Result operator()(const LiteralOrIri& s, const EvaluationContext*) const { + return s.getContent().empty() ? Result::False : Result::True; } }; @@ -116,13 +119,11 @@ struct StringValueGetter : Mixin { using Mixin::operator(); std::optional operator()(ValueId, const EvaluationContext*) const; - std::optional operator()(string s, const EvaluationContext*) const { - // Strip quotes - // TODO Use stronger types to encode literals/ IRIs/ ETC - if (s.size() >= 2 && s.starts_with('"') && s.ends_with('"')) { - return s.substr(1, s.size() - 2); - } - return s; + // TODO probably we should return a reference or a view here. + // TODO use a `NormalizedStringView` inside the expressions. + std::optional operator()(const LiteralOrIri& s, + const EvaluationContext*) const { + return std::string(asStringViewUnsafe(s.getContent())); } }; @@ -133,20 +134,24 @@ struct IsBlankNodeValueGetter : Mixin { return Id::makeFromBool(id.getDatatype() == Datatype::BlankNodeIndex); } - Id operator()(const std::string&, const EvaluationContext*) const { + Id operator()(const LiteralOrIri&, const EvaluationContext*) const { return Id::makeFromBool(false); } }; // Value getters for `isIRI`, `isBlank`, and `isLiteral`. -template +template struct IsSomethingValueGetter - : Mixin> { + : Mixin> { using Mixin::operator(); Id operator()(ValueId id, const EvaluationContext* context) const; - Id operator()(const std::string& s, const EvaluationContext*) const { - return Id::makeFromBool(s.starts_with(prefix)); + Id operator()(const LiteralOrIri& s, const EvaluationContext*) const { + // TODO Use the `isLiteral` etc. functions directly as soon as the + // local vocabulary also stores `LiteralOrIri`. + return Id::makeFromBool(s.toStringRepresentation().starts_with( + isLiteralOrIriSomethingFunction)); } }; static constexpr auto isIriPrefix = ad_utility::ConstexprSmallString<2>{"<"}; @@ -166,7 +171,7 @@ struct IsNumericValueGetter : Mixin { return Id::makeFromBool(datatype == Datatype::Double || datatype == Datatype::Int); } - Id operator()(const std::string&, const EvaluationContext*) const { + Id operator()(const LiteralOrIri&, const EvaluationContext*) const { return Id::makeFromBool(false); } }; @@ -185,7 +190,7 @@ struct DateValueGetter : Mixin { } } - Opt operator()(const std::string&, const EvaluationContext*) const { + Opt operator()(const LiteralOrIri&, const EvaluationContext*) const { return std::nullopt; } }; @@ -198,14 +203,12 @@ struct LiteralFromIdGetter : Mixin { using Mixin::operator(); std::optional operator()(ValueId id, const EvaluationContext* context) const; - std::optional operator()(std::string s, + std::optional operator()(const LiteralOrIri& s, const EvaluationContext*) const { - // Strip quotes - // TODO Use stronger types to encode literals/ IRIs/ ETC - if (s.size() >= 2 && s.starts_with('"') && s.ends_with('"')) { - return s.substr(1, s.size() - 2); + if (s.isIri()) { + return std::nullopt; } - return s; + return std::string{asStringViewUnsafe(s.getContent())}; } }; diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 43c3272dc4..7e48871a94 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -9,9 +9,26 @@ namespace sparqlExpression { namespace detail::string_expressions { + +using LiteralOrIri = ad_utility::triple_component::LiteralOrIri; + +// Convert a `string_view` to a `LiteralOrIri` that stores a `Literal`. +// Note: This currently requires a copy of a string since the `Literal` class +// has to add the quotation marks. +constexpr auto toLiteral = [](std::string_view normalizedContent) { + return LiteralOrIri{ + ad_utility::triple_component::Literal::literalWithNormalizedContent( + asNormalizedStringViewUnsafe(normalizedContent))}; +}; + // String functions. -[[maybe_unused]] auto strImpl = [](std::optional s) { - return IdOrString{std::move(s)}; +[[maybe_unused]] auto strImpl = + [](std::optional s) -> IdOrLiteralOrIri { + if (s.has_value()) { + return IdOrLiteralOrIri{toLiteral(s.value())}; + } else { + return Id::makeUndefined(); + } }; NARY_EXPRESSION(StrExpressionImpl, 1, FV); @@ -70,10 +87,11 @@ class StringExpressionImpl : public SparqlExpression { // Lift a `Function` that takes one or multiple `std::string`s (possibly via // references) and returns an `Id` or `std::string` to a function that takes the -// same number of `std::optional` and returns `Id` or `IdOrString`. -// If any of the optionals is `std::nullopt`, then UNDEF is returned, else the -// result of the `Function` with the values of the optionals. This is a useful -// helper function for implementing expressions that work on strings. +// same number of `std::optional` and returns `Id` or +// `IdOrLiteralOrIri`. If any of the optionals is `std::nullopt`, then UNDEF is +// returned, else the result of the `Function` with the values of the optionals. +// This is a useful helper function for implementing expressions that work on +// strings. template struct LiftStringFunction { template >... Arguments> @@ -81,12 +99,12 @@ struct LiftStringFunction { using ResultOfFunction = decltype(std::invoke(Function{}, std::move(arguments.value())...)); static_assert(std::same_as || - std::same_as, + std::same_as, "Template argument of `LiftStringFunction` must return `Id` " "or `std::string`"); using Result = std::conditional_t, Id, - IdOrString>; + IdOrLiteralOrIri>; if ((... || !arguments.has_value())) { return Result{Id::makeUndefined()}; } @@ -104,22 +122,22 @@ using StrlenExpression = // LCASE [[maybe_unused]] auto lowercaseImpl = - [](std::optional input) -> IdOrString { + [](std::optional input) -> IdOrLiteralOrIri { if (!input.has_value()) { return Id::makeUndefined(); } else { - return ad_utility::utf8ToLower(input.value()); + return toLiteral(ad_utility::utf8ToLower(input.value())); } }; using LowercaseExpression = StringExpressionImpl<1, decltype(lowercaseImpl)>; // UCASE [[maybe_unused]] auto uppercaseImpl = - [](std::optional input) -> IdOrString { + [](std::optional input) -> IdOrLiteralOrIri { if (!input.has_value()) { return Id::makeUndefined(); } else { - return ad_utility::utf8ToUpper(input.value()); + return toLiteral(ad_utility::utf8ToUpper(input.value())); } }; using UppercaseExpression = StringExpressionImpl<1, decltype(uppercaseImpl)>; @@ -149,15 +167,15 @@ class SubstrImpl { }; public: - IdOrString operator()(std::optional s, NumericValue start, - NumericValue length) const { + IdOrLiteralOrIri operator()(std::optional s, NumericValue start, + NumericValue length) const { if (!s.has_value() || std::holds_alternative(start) || std::holds_alternative(length)) { return Id::makeUndefined(); } if (isNan(start) || isNan(length)) { - return std::string{}; + return toLiteral(""); } // In SPARQL indices are 1-based, but the implementation we use is 0 based, @@ -185,8 +203,8 @@ class SubstrImpl { return static_cast(n); }; - return std::string{ - ad_utility::getUTF8Substring(str, clamp(startInt), clamp(lengthInt))}; + return toLiteral( + ad_utility::getUTF8Substring(str, clamp(startInt), clamp(lengthInt))); } }; @@ -226,22 +244,22 @@ using ContainsExpression = // STRAFTER / STRBEFORE template [[maybe_unused]] auto strAfterOrBeforeImpl = - [](std::string text, std::string_view pattern) -> std::string { - // Required by the SPARQL standard. - if (pattern.empty()) { - return text; - } - auto pos = text.find(pattern); - if (pos >= text.size()) { - return ""; - } - if constexpr (isStrAfter) { - return text.substr(pos + pattern.size()); - } else { - // STRBEFORE - return text.substr(0, pos); - } -}; + [](std::string_view text, std::string_view pattern) { + // Required by the SPARQL standard. + if (pattern.empty()) { + return toLiteral(text); + } + auto pos = text.find(pattern); + if (pos >= text.size()) { + return toLiteral(""); + } + if constexpr (isStrAfter) { + return toLiteral(text.substr(pos + pattern.size())); + } else { + // STRBEFORE + return toLiteral(text.substr(0, pos)); + } + }; auto strAfter = strAfterOrBeforeImpl; @@ -257,7 +275,7 @@ using StrBeforeExpression = [[maybe_unused]] auto replaceImpl = [](std::optional input, const std::unique_ptr& pattern, - const std::optional& replacement) -> IdOrString { + const std::optional& replacement) -> IdOrLiteralOrIri { if (!input.has_value() || !pattern || !replacement.has_value()) { return Id::makeUndefined(); } @@ -269,7 +287,7 @@ using StrBeforeExpression = } const auto& repl = replacement.value(); re2::RE2::GlobalReplace(&in, pat, repl); - return std::move(in); + return toLiteral(in); }; using ReplaceExpression = @@ -352,15 +370,16 @@ class ConcatExpression : public detail::VariadicExpression { std::visit(visitSingleExpressionResult, child->evaluate(ctx)); }); - // Lift the result from `string` to `IdOrString` which is needed for the - // expression module. + // Lift the result from `string` to `IdOrLiteralOrIri` which is needed for + // the expression module. if (std::holds_alternative(result)) { - return IdOrString{std::move(std::get(result))}; + return IdOrLiteralOrIri{toLiteral(std::get(result))}; } else { auto& stringVec = std::get(result); - VectorWithMemoryLimit resultAsVec{ - std::make_move_iterator(stringVec.begin()), - std::make_move_iterator(stringVec.end()), ctx->_allocator}; + VectorWithMemoryLimit resultAsVec(ctx->_allocator); + resultAsVec.reserve(stringVec.size()); + std::ranges::copy(stringVec | std::views::transform(toLiteral), + std::back_inserter(resultAsVec)); return resultAsVec; } } @@ -368,20 +387,13 @@ class ConcatExpression : public detail::VariadicExpression { // ENCODE_FOR_URI [[maybe_unused]] auto encodeForUriImpl = - [](std::optional input) -> IdOrString { + [](std::optional input) -> IdOrLiteralOrIri { if (!input.has_value()) { return Id::makeUndefined(); } else { std::string_view value{input.value()}; - if (value.starts_with("\"")) { - auto contentEnd = ad_utility::findLiteralEnd(value, "\""); - if (contentEnd != 0) { - std::string_view content = value.substr(1, contentEnd - 1); - return boost::urls::encode(content, boost::urls::unreserved_chars); - } - } - return boost::urls::encode(value, boost::urls::unreserved_chars); + return toLiteral(boost::urls::encode(value, boost::urls::unreserved_chars)); } }; using EncodeForUriExpression = diff --git a/src/index/IndexFormatVersion.h b/src/index/IndexFormatVersion.h index 101134ce73..3c1fbf4260 100644 --- a/src/index/IndexFormatVersion.h +++ b/src/index/IndexFormatVersion.h @@ -36,5 +36,5 @@ struct IndexFormatVersion { // The actual index version. Change it once the binary format of the index // changes. inline const IndexFormatVersion& indexFormatVersion{ - 1296, DateOrLargeYear{Date{2024, 3, 8}}}; + 1320, DateOrLargeYear{Date{2024, 4, 17}}}; } // namespace qlever diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 6a6ab048c1..705dc182dc 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -208,6 +208,11 @@ std::unique_ptr> IndexImpl::buildOspWithPatterns( // S P O PatternOfS PatternOfO, sorted by OPS. auto blockGenerator = [](auto& queue) -> cppcoro::generator> { + // If an exception occurs in the block that is consuming the blocks yielded + // from this generator, we have to explicitly finish the `queue`, otherwise + // there will be a deadlock because the threads involved in the queue can + // never join. + absl::Cleanup cl{[&queue]() { queue.finish(); }}; while (auto block = queue.pop()) { co_yield fixBlockAfterPatternJoin(std::move(block)); } diff --git a/src/index/VocabularyOnDisk.cpp b/src/index/VocabularyOnDisk.cpp index 51ec204806..a16838be95 100644 --- a/src/index/VocabularyOnDisk.cpp +++ b/src/index/VocabularyOnDisk.cpp @@ -12,35 +12,19 @@ using OffsetAndSize = VocabularyOnDisk::OffsetAndSize; // ____________________________________________________________________________ -std::optional VocabularyOnDisk::getOffsetAndSize( - uint64_t idx) const { - IndexAndOffset idAndDummyOffset{idx, 0}; - auto it = std::ranges::lower_bound(idsAndOffsets_, idAndDummyOffset); - if (it >= idsAndOffsets_.end() - 1 || it->idx_ != idx) { - return std::nullopt; - } - return getOffsetAndSizeForIthElement(it - idsAndOffsets_.begin()); -} - -// ____________________________________________________________________________ -VocabularyOnDisk::OffsetSizeId VocabularyOnDisk::getOffsetSizeIdForIthElement( - uint64_t i) const { - AD_CONTRACT_CHECK(i < size()); - const auto offset = idsAndOffsets_[i].offset_; - const auto nextOffset = idsAndOffsets_[i + 1].offset_; - return OffsetSizeId{offset, nextOffset - offset, idsAndOffsets_[i].idx_}; +OffsetAndSize VocabularyOnDisk::getOffsetAndSize(uint64_t i) const { + AD_CORRECTNESS_CHECK(i < size()); + const auto offset = offsets_[i]; + const auto nextOffset = offsets_[i + 1]; + return {offset, nextOffset - offset}; } // _____________________________________________________________________________ -std::optional VocabularyOnDisk::operator[](uint64_t idx) const { - auto optionalOffsetAndSize = getOffsetAndSize(idx); - if (!optionalOffsetAndSize.has_value()) { - return std::nullopt; - } - - string result(optionalOffsetAndSize->_size, '\0'); - file_.read(result.data(), optionalOffsetAndSize->_size, - optionalOffsetAndSize->_offset); +std::string VocabularyOnDisk::operator[](uint64_t idx) const { + AD_CONTRACT_CHECK(idx < size()); + auto offsetAndSize = getOffsetAndSize(idx); + string result(offsetAndSize._size, '\0'); + file_.read(result.data(), offsetAndSize._size, offsetAndSize._offset); return result; } @@ -50,27 +34,22 @@ void VocabularyOnDisk::buildFromIterable(Iterable&& it, const string& fileName) { { file_.open(fileName.c_str(), "w"); - ad_utility::MmapVector idsAndOffsets( - fileName + offsetSuffix_, ad_utility::CreateTag{}); + ad_utility::MmapVector offsets(fileName + offsetSuffix_, + ad_utility::CreateTag{}); uint64_t currentOffset = 0; - std::optional previousId = std::nullopt; + uint64_t nextId = 0; for (const auto& [word, id] : it) { - AD_CONTRACT_CHECK(!previousId.has_value() || previousId.value() < id); - idsAndOffsets.push_back(IndexAndOffset{id, currentOffset}); + AD_CONTRACT_CHECK(nextId == id); + ++nextId; + offsets.push_back(currentOffset); currentOffset += file_.write(word.data(), word.size()); - previousId = id; } // End offset of last vocabulary entry, also consistent with the empty // vocabulary. - if (previousId.has_value()) { - idsAndOffsets.push_back( - IndexAndOffset{previousId.value() + 1, currentOffset}); - } else { - idsAndOffsets.push_back(IndexAndOffset{highestIdx_ + 1, currentOffset}); - } + offsets.push_back(currentOffset); file_.close(); - } // After this close, the destructor of MmapVector is called, whoch dumps + } // After this close, the destructor of MmapVector is called, which dumps // everything to disk. open(fileName); } @@ -78,17 +57,13 @@ void VocabularyOnDisk::buildFromIterable(Iterable&& it, // _____________________________________________________________________________ VocabularyOnDisk::WordWriter::WordWriter(const std::string& outFilename) : file_{outFilename.c_str(), "w"}, - idsAndOffsets_{absl::StrCat(outFilename, VocabularyOnDisk::offsetSuffix_), - ad_utility::CreateTag{}} {} + offsets_{absl::StrCat(outFilename, VocabularyOnDisk::offsetSuffix_), + ad_utility::CreateTag{}} {} // _____________________________________________________________________________ void VocabularyOnDisk::WordWriter::operator()(std::string_view word) { - AD_CONTRACT_CHECK(!previousId_.has_value() || - previousId_.value() < currentIndex_); - idsAndOffsets_.push_back(IndexAndOffset{currentIndex_, currentOffset_}); + offsets_.push_back(currentOffset_); currentOffset_ += file_.write(word.data(), word.size()); - previousId_ = currentIndex_; - ++currentIndex_; } // _____________________________________________________________________________ @@ -98,10 +73,9 @@ void VocabularyOnDisk::WordWriter::finish() { } // End offset of last vocabulary entry, also consistent with the empty // vocabulary. - auto endIndex = previousId_.value_or(VocabularyOnDisk::highestIndexEmpty_); - idsAndOffsets_.push_back(IndexAndOffset{endIndex, currentOffset_}); + offsets_.push_back(currentOffset_); file_.close(); - idsAndOffsets_.close(); + offsets_.close(); } // _____________________________________________________________________________ @@ -120,21 +94,10 @@ void VocabularyOnDisk::buildFromStringsAndIds( // _____________________________________________________________________________ void VocabularyOnDisk::open(const std::string& filename) { file_.open(filename.c_str(), "r"); - idsAndOffsets_.open(filename + offsetSuffix_); - AD_CONTRACT_CHECK(idsAndOffsets_.size() > 0); - size_ = idsAndOffsets_.size() - 1; + offsets_.open(filename + offsetSuffix_); + AD_CORRECTNESS_CHECK(offsets_.size() > 0); + size_ = offsets_.size() - 1; if (size_ > 0) { - highestIdx_ = (*(end() - 1))._index; + highestIdx_ = size_ - 1; } } - -// ____________________________________________________________________________ -WordAndIndex VocabularyOnDisk::getIthElement(size_t n) const { - AD_CONTRACT_CHECK(n < idsAndOffsets_.size()); - auto offsetSizeId = getOffsetSizeIdForIthElement(n); - - std::string result(offsetSizeId._size, '\0'); - file_.read(result.data(), offsetSizeId._size, offsetSizeId._offset); - - return {std::move(result), offsetSizeId._id}; -} diff --git a/src/index/VocabularyOnDisk.h b/src/index/VocabularyOnDisk.h index dd45b2e98e..9bc258459a 100644 --- a/src/index/VocabularyOnDisk.h +++ b/src/index/VocabularyOnDisk.h @@ -19,24 +19,13 @@ // then binary search for a string can be performed. class VocabularyOnDisk { private: - // An ID and the offset of the corresponding word in the underlying file. - struct IndexAndOffset { - uint64_t idx_; - uint64_t offset_; - // Compare only by the IDs. - auto operator<=>(const IndexAndOffset& rhs) const { - return idx_ <=> rhs.idx_; - } - // Equality comparison is currently unused, but it must be declared to be - // able to use `std::ranges::lower_bound` etc. - bool operator==(const IndexAndOffset& rhs) const; - }; - + // The offset of a word in the underlying file. + using Offset = uint64_t; // The file in which the words are stored. mutable ad_utility::File file_; // The IDs and offsets of the words. - ad_utility::MmapVectorView idsAndOffsets_; + ad_utility::MmapVectorView offsets_; // The highest ID that occurs in the vocabulary. If the vocabulary is empty, // this will be Id(-1), s.t. highestIdx_ + 1 will overflow to 0. @@ -48,7 +37,7 @@ class VocabularyOnDisk { // This suffix is appended to the filename of the main file, in order to get // the name for the file in which IDs and offsets are stored. - static constexpr std::string_view offsetSuffix_ = ".idsAndOffsets"; + static constexpr std::string_view offsetSuffix_ = ".offsets"; public: // A helper class that is used to build a vocabulary word by word. @@ -59,10 +48,8 @@ class VocabularyOnDisk { class WordWriter { private: ad_utility::File file_; - ad_utility::MmapVector idsAndOffsets_; + ad_utility::MmapVector offsets_; uint64_t currentOffset_ = 0; - std::optional previousId_ = std::nullopt; - uint64_t currentIndex_ = 0; bool isFinished_ = false; ad_utility::ThrowInDestructorIfSafe throwInDestructorIfSafe_; @@ -87,9 +74,9 @@ class VocabularyOnDisk { /// this file, for example via `buildFromVector` or `buildFromTextFile`. void open(const std::string& filename); - /// If an entry with this `idx` exists, return the corresponding string, else - /// `std::nullopt` - std::optional operator[](uint64_t idx) const; + // Return the word that is stored at the index. Throw an exception if `idx >= + // size`. + std::string operator[](uint64_t idx) const; /// Get the number of words in the vocabulary. size_t size() const { return size_; } @@ -154,24 +141,15 @@ class VocabularyOnDisk { return iteratorToWordAndIndex(it); } - // The offset of a word in `file_`, its size in number of bytes and its ID - struct OffsetSizeId { - uint64_t _offset; - uint64_t _size; - uint64_t _id; - }; - // The offset of a word in `file_` and its size in number of bytes. struct OffsetAndSize { uint64_t _offset; uint64_t _size; - OffsetAndSize(OffsetSizeId osi) : _offset{osi._offset}, _size{osi._size} {} - OffsetAndSize() = default; }; // Helper function for implementing a random access iterator. using Accessor = decltype([](const auto& vocabulary, uint64_t index) { - return vocabulary.getIthElement(index); + return vocabulary[index]; }); // Const random access iterators, implemented via the // `IteratorForAccessOperator` template. @@ -181,11 +159,6 @@ class VocabularyOnDisk { const_iterator end() const { return {this, size()}; } private: - // Return the `i`-th element from this vocabulary. Note that this is (in - // general) NOT the element with the ID `i`, because the ID space is not - // contiguous. - WordAndIndex getIthElement(size_t n) const; - // Takes a lambda that compares two string-like objects and returns a lambda // that compares two objects, either of which can be string-like or // `WordAndIndex`. @@ -212,23 +185,13 @@ class VocabularyOnDisk { if (it == end()) { return {std::nullopt, getHighestId() + 1}; } else { - return *it; + return {*it, static_cast(it - begin())}; } } // Get the `OffsetAndSize` for the element with the `idx`. Return // `std::nullopt` if `idx` is not contained in the vocabulary. - std::optional getOffsetAndSize(uint64_t idx) const; - - // Return the `OffsetSizeId` for the element with the i-th smallest ID. - // Requires that i < size(). - OffsetSizeId getOffsetSizeIdForIthElement(uint64_t i) const; - - // Return the `OffsetAndSize` for the element with the i-th smallest ID. - // Requires that i < size(). - OffsetAndSize getOffsetAndSizeForIthElement(uint64_t i) const { - return getOffsetSizeIdForIthElement(i); - } + OffsetAndSize getOffsetAndSize(uint64_t idx) const; // Build a vocabulary from any type that is forward-iterable and yields // pairs of (string-like, ID). Used as the common implementation for diff --git a/src/parser/Literal.cpp b/src/parser/Literal.cpp index dca504da59..a9fb4364e6 100644 --- a/src/parser/Literal.cpp +++ b/src/parser/Literal.cpp @@ -78,7 +78,7 @@ Literal Literal::literalWithoutQuotes( // __________________________________________ Literal Literal::literalWithNormalizedContent( - NormalizedString normalizedRdfContent, + NormalizedStringView normalizedRdfContent, std::optional> descriptor) { auto quotes = "\""sv; auto actualContent = diff --git a/src/parser/Literal.h b/src/parser/Literal.h index 65ac735c86..5367c261ae 100644 --- a/src/parser/Literal.h +++ b/src/parser/Literal.h @@ -4,6 +4,9 @@ #pragma once +#include +#include + #include "parser/Iri.h" #include "parser/NormalizedString.h" @@ -25,12 +28,6 @@ class Literal { // Create a new literal without any descriptor explicit Literal(std::string content, size_t beginOfSuffix_); - // Similar to `fromEscapedRdfLiteral`, except the rdfContent is expected to - // already be normalized - static Literal literalWithNormalizedContent( - NormalizedString normalizedRdfContent, - std::optional> descriptor = std::nullopt); - // Internal helper function. Return either the empty string (for a plain // literal), `@langtag` or `^^`. std::string_view getSuffix() const { @@ -79,6 +76,12 @@ class Literal { std::string_view rdfContentWithQuotes, std::optional> descriptor = std::nullopt); + // Similar to `fromEscapedRdfLiteral`, except the rdfContent is expected to + // already be normalized + static Literal literalWithNormalizedContent( + NormalizedStringView normalizedRdfContent, + std::optional> descriptor = std::nullopt); + void addLanguageTag(std::string_view languageTag); void addDatatype(const Iri& datatype); diff --git a/src/parser/LiteralOrIri.h b/src/parser/LiteralOrIri.h index ba34b7c8e9..5a6724a15d 100644 --- a/src/parser/LiteralOrIri.h +++ b/src/parser/LiteralOrIri.h @@ -121,5 +121,12 @@ class LiteralOrIri { // Create a new iri given a prefix iri and its suffix static LiteralOrIri prefixedIri(const Iri& prefix, std::string_view suffix); + + // Printing for GTest + friend void PrintTo(const LiteralOrIri& literalOrIri, std::ostream* os) { + auto& s = *os; + s << literalOrIri.toStringRepresentation(); + } }; + } // namespace ad_utility::triple_component diff --git a/src/util/Conversions.h b/src/util/Conversions.h index c49f877c41..bbd4e901f2 100644 --- a/src/util/Conversions.h +++ b/src/util/Conversions.h @@ -4,16 +4,14 @@ #pragma once -#include #include #include #include "parser/LiteralOrIri.h" -#include "util/StringUtils.h" namespace ad_utility { -static constexpr std::string_view languageTaggedPredicatePrefix = "@"; +constexpr std::string_view languageTaggedPredicatePrefix = "@"; //! Convert a language tag like "@en" to the corresponding entity uri //! for the efficient language filter. // TODO The overload that takes and returns `std::string` can be diff --git a/test/AggregateExpressionTest.cpp b/test/AggregateExpressionTest.cpp index 40bb0a3106..0f86fd6f50 100644 --- a/test/AggregateExpressionTest.cpp +++ b/test/AggregateExpressionTest.cpp @@ -6,6 +6,7 @@ #include "./util/GTestHelpers.h" #include "./util/IdTableHelpers.h" #include "./util/IdTestHelpers.h" +#include "./util/TripleComponentTestHelpers.h" #include "engine/ValuesForTesting.h" #include "engine/sparqlExpressions/AggregateExpression.h" #include "gtest/gtest.h" @@ -20,6 +21,10 @@ auto V = VocabId; auto U = Id::makeUndefined(); auto L = LocalVocabId; auto D = DoubleId; +auto lit = [](auto s) { + return IdOrLiteralOrIri( + ad_utility::triple_component::LiteralOrIri(tripleComponentLiteral(s))); +}; static const Id NaN = D(std::numeric_limits::quiet_NaN()); } // namespace @@ -50,9 +55,10 @@ TEST(AggregateExpression, max) { testMaxId({V(7), U, V(2), V(4)}, V(7)); testMaxId({I(3), U, V(0), L(3), U, (I(-1))}, L(3)); - auto testMaxString = testAggregate; + auto testMaxString = testAggregate; // TODO Implement correct comparison on strings - testMaxString({"alpha", "äpfel", "Beta", "unfug"}, "äpfel"); + testMaxString({lit("alpha"), lit("äpfel"), lit("Beta"), lit("unfug")}, + lit("äpfel")); } // ______________________________________________________________________________ @@ -63,9 +69,10 @@ TEST(AggregateExpression, min) { testMinId({V(7), U, V(2), V(4)}, U); testMinId({I(3), V(0), L(3), (I(-1))}, I(-1)); - auto testMinString = testAggregate; + auto testMinString = testAggregate; // TODO Implement correct comparison on strings - testMinString({"alpha", "äpfel", "Beta", "unfug"}, "Beta"); + testMinString({lit("alpha"), lit("äpfel"), lit("Beta"), lit("unfug")}, + lit("Beta")); } // ______________________________________________________________________________ @@ -77,9 +84,8 @@ TEST(AggregateExpression, sum) { testSumId({I(3), U}, U); testSumId({I(3), NaN}, NaN); - auto testSumString = testAggregate; - // TODO The result should be `UNDEF` not `NaN` - testSumString({"alpha", "äpfel", "Beta", "unfug"}, U); + auto testSumString = testAggregate; + testSumString({lit("alpha"), lit("äpfel"), lit("Beta"), lit("unfug")}, U); } // ______________________________________________________________________________ @@ -91,6 +97,6 @@ TEST(AggregateExpression, count) { testCountId({U, I(3), U}, I(1)); testCountId({I(3), NaN, NaN}, I(2), true); - auto testCountString = testAggregate; - testCountString({"alpha", "äpfel", "", "unfug"}, I(4)); + auto testCountString = testAggregate; + testCountString({lit("alpha"), lit("äpfel"), lit(""), lit("unfug")}, I(4)); } diff --git a/test/CancellationHandleTest.cpp b/test/CancellationHandleTest.cpp index 3515165e3f..241e2c9005 100644 --- a/test/CancellationHandleTest.cpp +++ b/test/CancellationHandleTest.cpp @@ -153,8 +153,8 @@ TEST(CancellationHandle, ensureObjectLifetimeIsValidWithoutWatchDogStarted) { namespace ad_utility { TEST(CancellationHandle, verifyWatchDogDoesChangeState) { -#ifdef __APPLE__ - GTEST_SKIP_("sleep_for is unreliable for macos builds"); +#ifdef _QLEVER_NO_TIMING_TESTS + GTEST_SKIP_("because _QLEVER_NO_TIMING_TESTS defined"); #endif CancellationHandle handle; @@ -172,8 +172,8 @@ TEST(CancellationHandle, verifyWatchDogDoesChangeState) { // _____________________________________________________________________________ TEST(CancellationHandle, verifyWatchDogDoesNotChangeStateAfterCancel) { -#ifdef __APPLE__ - GTEST_SKIP_("sleep_for is unreliable for macos builds"); +#ifdef _QLEVER_NO_TIMING_TESTS + GTEST_SKIP_("because _QLEVER_NO_TIMING_TESTS defined"); #endif CancellationHandle handle; handle.startWatchDog(); diff --git a/test/LocalVocabTest.cpp b/test/LocalVocabTest.cpp index bdba4a9b68..9b2615a737 100644 --- a/test/LocalVocabTest.cpp +++ b/test/LocalVocabTest.cpp @@ -278,8 +278,8 @@ TEST(LocalVocab, propagation) { testQec, {Variable{"?x"}}, {Alias{groupConcatExpression("?y", "|"), Variable{"?concat"}}}, qet(values1)); - checkLocalVocab( - groupBy, std::vector{"", "", "", "yN1|yN2"}); + checkLocalVocab(groupBy, std::vector{"", "", "", + "\"yN1|yN2\""}); // DISTINCT again, but after something has been added to the local vocabulary // (to check that the "y1|y2" added by the GROUP BY does not also appear here, diff --git a/test/ProgressBarTest.cpp b/test/ProgressBarTest.cpp index 53e2dca917..2df1601c1c 100644 --- a/test/ProgressBarTest.cpp +++ b/test/ProgressBarTest.cpp @@ -29,7 +29,7 @@ TEST(ProgressBar, typicalUsage) { // NOTE: For macOS, `std::this_thread::sleep_for` can take much longer // than indicated, resulting in a much lower speed than expected. std::string expectedSpeedRegex = -#ifndef __APPLE__ +#ifndef _QLEVER_NO_TIMING_TESTS "\\[average speed [234]\\.[0-9] M/s, last batch [234]\\.[0-9] M/s" ", fastest [234]\\.[0-9] M/s, slowest [234]\\.[0-9] M/s\\] "; #else @@ -67,7 +67,7 @@ TEST(ProgressBar, numberOfStepsLessThanBatchSize) { ProgressBar progressBar(numSteps, "Steps: ", 5'000); std::this_thread::sleep_for(std::chrono::milliseconds(1)); std::string expectedUpdateRegex = -#ifndef __APPLE__ +#ifndef _QLEVER_NO_TIMING_TESTS "Steps: 3,000 \\[average speed [234]\\.[0-9] M/s\\] \n"; #else "Steps: 3,000 \\[average speed [0-9]\\.[0-9] M/s\\] \n"; diff --git a/test/RelationalExpressionTest.cpp b/test/RelationalExpressionTest.cpp index bd5e364cca..22be1fa43e 100644 --- a/test/RelationalExpressionTest.cpp +++ b/test/RelationalExpressionTest.cpp @@ -8,6 +8,7 @@ #include "./SparqlExpressionTestHelpers.h" #include "./util/AllocatorTestHelpers.h" #include "./util/GTestHelpers.h" +#include "./util/TripleComponentTestHelpers.h" #include "engine/sparqlExpressions/RelationalExpressions.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -23,6 +24,14 @@ using valueIdComparators::Comparison; // First some internal helper functions and constants. namespace { +auto lit = [](std::string_view s) { + return ad_utility::triple_component::LiteralOrIri(tripleComponentLiteral(s)); +}; + +auto iriref = [](std::string_view s) { + return ad_utility::triple_component::LiteralOrIri(iri(s)); +}; + // Convenient access to constants for "infinity" and "not a number". The // spelling `NaN` was chosen because `nan` conflicts with the standard library. const auto inf = std::numeric_limits::infinity(); @@ -295,30 +304,31 @@ TEST(RelationalExpression, DoubleAndDouble) { } TEST(RelationalExpression, StringAndString) { - testLessThanGreaterThanEqualHelper( - {"alpha", "beta"}, {"sigma", "delta"}, {"epsilon", "epsilon"}); + testLessThanGreaterThanEqualHelper( + {lit("alpha"), lit("beta")}, {lit("sigma"), lit("delta")}, + {lit("epsilon"), lit("epsilon")}); // TODO These tests only work, when we actually use unicode // comparisons for the string based expressions. // TODO Add an example for strings that are bytewise different but // equal on the unicode level (e.g.`ä` vs `a + dots`. - // testLessThanGreaterThanEqualHelper({"Alpha", - // "beta"}, + // testLessThanGreaterThanEqualHelper({"Alpha", "beta"}, // {"beta", "äpfel"}, {"xxx", "xxx"}); } TEST(RelationalExpression, NumericAndStringAreNeverEqual) { - auto stringVec = - VectorWithMemoryLimit({"hallo", "by", ""}, makeAllocator()); + auto stringVec = VectorWithMemoryLimit( + {lit("hallo"), lit("by"), lit("")}, makeAllocator()); auto intVec = VectorWithMemoryLimit({-12365, 0, 12}, makeAllocator()); auto doubleVec = VectorWithMemoryLimit({-12.365, 0, 12.1e5}, makeAllocator()); - testUndefHelper(int64_t{3}, IdOrString{"hallo"}); - testUndefHelper(int64_t{3}, IdOrString{"3"}); - testUndefHelper(-12.0, IdOrString{"hallo"}); - testUndefHelper(-12.0, IdOrString{"-12.0"}); - testUndefHelper(intVec.clone(), IdOrString{"someString"}); - testUndefHelper(doubleVec.clone(), IdOrString{"someString"}); + testUndefHelper(int64_t{3}, IdOrLiteralOrIri{lit("hallo")}); + testUndefHelper(int64_t{3}, IdOrLiteralOrIri{lit("3")}); + testUndefHelper(-12.0, IdOrLiteralOrIri{lit("hallo")}); + testUndefHelper(-12.0, IdOrLiteralOrIri{lit("-12.0")}); + testUndefHelper(intVec.clone(), IdOrLiteralOrIri{lit("someString")}); + testUndefHelper(doubleVec.clone(), IdOrLiteralOrIri{lit("someString")}); testUndefHelper(int64_t{3}, stringVec.clone()); testUndefHelper(intVec.clone(), stringVec.clone()); testUndefHelper(doubleVec.clone(), stringVec.clone()); @@ -535,21 +545,21 @@ TEST(RelationalExpression, NumericConstantAndNumericVector) { } TEST(RelationalExpression, StringConstantsAndStringVector) { - VectorWithMemoryLimit vec( - {"alpha", "alpaka", "bertram", "sigma", "zeta", "kaulquappe", "caesar", - "caesar", "caesar"}, + VectorWithMemoryLimit vec( + {lit("alpha"), lit("alpaka"), lit("bertram"), lit("sigma"), lit("zeta"), + lit("kaulquappe"), lit("caesar"), lit("caesar"), lit("caesar")}, makeAllocator()); - testLessThanGreaterThanEqualMultipleValuesHelper(IdOrString{"caesar"}, - vec.clone()); + testLessThanGreaterThanEqualMultipleValuesHelper( + IdOrLiteralOrIri{lit("caesar")}, vec.clone()); // TODO These tests only work, when we actually use unicode // comparisons for the string based expressions. TODDO Add an example // for strings that are bytewise different but equal on the unicode level // (e.g.`ä` vs `a + dots`. - // VectorWithMemoryLimit vec2({"AlpHa", "älpaka", "Æ", "sigma", - // "Eta", "kaulQuappe", "Caesar", "Caesar", "Caesare"}, alloc); - // testLessThanGreaterThanEqualHelper({"Alpha", - // "beta"}, + // VectorWithMemoryLimit vec2({"AlpHa", "älpaka", "Æ", + // "sigma", "Eta", "kaulQuappe", "Caesar", "Caesar", "Caesare"}, alloc); + // testLessThanGreaterThanEqualHelper({"Alpha", "beta"}, // {"beta", "äpfel"}, {"xxx", "xxx"}); } @@ -592,13 +602,13 @@ TEST(RelationalExpression, DoubleVectorAndIntVector) { } TEST(RelationalExpression, StringVectorAndStringVector) { - VectorWithMemoryLimit vecA{ - {"alpha", "beta", "g", "epsilon", "fraud", "capitalism", "", "bo'sä30", - "Me"}, + VectorWithMemoryLimit vecA{ + {lit("alpha"), lit("beta"), lit("g"), lit("epsilon"), lit("fraud"), + lit("capitalism"), lit(""), lit("bo'sä30"), lit("Me")}, makeAllocator()}; - VectorWithMemoryLimit vecB{ - {"alph", "alpha", "f", "epsiloo", "freud", "communism", "", "bo'sä30", - "Me"}, + VectorWithMemoryLimit vecB{ + {lit("alph"), lit("alpha"), lit("f"), lit("epsiloo"), lit("freud"), + lit("communism"), lit(""), lit("bo'sä30"), lit("Me")}, makeAllocator()}; testLessThanGreaterThanEqualMultipleValuesHelper(vecA.clone(), vecB.clone()); // TODO Add a test case for correct unicode collation as soon as that @@ -656,20 +666,22 @@ TEST(RelationalExpression, VariableAndConstant) { {false, false, true}); // ?vocab column is `"Beta", "alpha", "älpha" - testWithExplicitResult(Variable{"?vocab"}, IdOrString{"\"älpha\""}, + testWithExplicitResult(Variable{"?vocab"}, + IdOrLiteralOrIri{lit("\"älpha\"")}, {false, true, true}); - testWithExplicitResult(Variable{"?vocab"}, IdOrString{"\"alpha\""}, + testWithExplicitResult(Variable{"?vocab"}, + IdOrLiteralOrIri{lit("\"alpha\"")}, {true, false, true}); - testWithExplicitResult(IdOrString{"\"atm\""}, Variable{"?vocab"}, - {true, false, false}); + testWithExplicitResult(IdOrLiteralOrIri{lit("\"atm\"")}, + Variable{"?vocab"}, {true, false, false}); // ?mixed column is `1, -0.1, ` auto U = Id::makeUndefined(); auto B = ad_utility::testing::BoolId; - testWithExplicitIdResult(IdOrString{""}, Variable{"?mixed"}, - {U, U, B(true)}); - testWithExplicitIdResult(IdOrString{""}, Variable{"?mixed"}, - {U, U, B(true)}); + testWithExplicitIdResult(IdOrLiteralOrIri{iriref("")}, + Variable{"?mixed"}, {U, U, B(true)}); + testWithExplicitIdResult(IdOrLiteralOrIri{iriref("")}, + Variable{"?mixed"}, {U, U, B(true)}); // Note: `1` and `` are "not compatible", so even the "not equal" // comparison returns false. @@ -754,7 +766,8 @@ TEST(RelationalExpression, VariableAndConstantBinarySearch) { testSortedVariableAndConstant(ints, int64_t{-1}, {{{0, 3}}}); testSortedVariableAndConstant(ints, 0.3, {{{0, 1}, {2, 3}}}); // ints and strings are always incompatible. - testSortedVariableAndConstant(ints, IdOrString{"a string"}, {}); + testSortedVariableAndConstant(ints, IdOrLiteralOrIri{lit("a string")}, + {}); testSortedVariableAndConstant(doubles, int64_t{0}, {{{0, 2}}}); testSortedVariableAndConstant(doubles, 2.8, {{{1, 2}}}); @@ -764,10 +777,13 @@ TEST(RelationalExpression, VariableAndConstantBinarySearch) { testSortedVariableAndConstant(numeric, 1.0, {{{0, 1}}}); testSortedVariableAndConstant(numeric, 3.4, {{{0, 1}, {2, 3}}}); - testSortedVariableAndConstant(vocab, IdOrString{"\"alpha\""}, {{{1, 3}}}); - testSortedVariableAndConstant(vocab, IdOrString{"\"alpha\""}, {{{0, 3}}}); - testSortedVariableAndConstant(vocab, IdOrString{"\"ball\""}, {{{0, 2}}}); - testSortedVariableAndConstant(vocab, IdOrString{"\"älpha\""}, + testSortedVariableAndConstant(vocab, IdOrLiteralOrIri{lit("\"alpha\"")}, + {{{1, 3}}}); + testSortedVariableAndConstant(vocab, IdOrLiteralOrIri{lit("\"alpha\"")}, + {{{0, 3}}}); + testSortedVariableAndConstant(vocab, IdOrLiteralOrIri{lit("\"ball\"")}, + {{{0, 2}}}); + testSortedVariableAndConstant(vocab, IdOrLiteralOrIri{lit("\"älpha\"")}, {{{0, 1}, {2, 3}}}); testSortedVariableAndConstant(vocab, inf, {}); @@ -778,7 +794,8 @@ TEST(RelationalExpression, VariableAndConstantBinarySearch) { // Note: only *numeric* values that are not equal to 1.0 are considered here. testSortedVariableAndConstant(mixed, 1.0, {{{1, 2}}}); testSortedVariableAndConstant(mixed, -inf, {{{0, 2}}}); - testSortedVariableAndConstant(mixed, IdOrString{""}, {{{2, 3}}}); + testSortedVariableAndConstant(mixed, IdOrLiteralOrIri{iriref("")}, + {{{2, 3}}}); } // TODO We currently do not have tests for the `LocalVocab` case, diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index c5c2bd5334..5dee81b2e3 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -39,7 +39,29 @@ auto Voc = ad_utility::testing::VocabId; auto U = Id::makeUndefined(); using Ids = std::vector; -using IdOrStrings = std::vector; +using IdOrLiteralOrIriVec = std::vector; + +auto lit = [](std::string_view s, std::string_view langtagOrDatatype = "") { + return ad_utility::triple_component::LiteralOrIri( + ad_utility::testing::tripleComponentLiteral(s, langtagOrDatatype)); +}; + +auto iriref = [](std::string_view s) { + return ad_utility::triple_component::LiteralOrIri(iri(s)); +}; + +auto idOrLitOrStringVec = + [](const std::vector>& input) { + IdOrLiteralOrIriVec result; + for (const auto& el : input) { + if (std::holds_alternative(el)) { + result.push_back(std::get(el)); + } else { + result.push_back(lit(std::get(el))); + } + } + return result; + }; // All the helper functions `testUnaryExpression` etc. below internally evaluate // the given expressions using the `TestContext` class, so it is possible to use @@ -78,7 +100,8 @@ SingleExpressionResult auto toExpressionResult(T vec) { if constexpr (SingleExpressionResult) { return vec; } else if constexpr (std::convertible_to) { - return IdOrString{std::string{std::string_view{vec}}}; + // TODO Make a generic testing utility for the string case... + return IdOrLiteralOrIri{lit(vec)}; } else { return VectorWithMemoryLimit{ std::make_move_iterator(vec.begin()), @@ -107,16 +130,16 @@ ::testing::Matcher matchId(Id id) { } // Return a matcher that matches a result of an expression that is not a vector. -// If it is an ID (possibly contained in the `IdOrString` variant), then the -// `matchId` matcher from above is used, else we test for equality. +// If it is an ID (possibly contained in the `IdOrLiteralOrIri` variant), then +// the `matchId` matcher from above is used, else we test for equality. template requires(!isVectorResult) auto nonVectorResultMatcher(const T& expected) { if constexpr (std::is_same_v) { return matchId(expected); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { return std::visit( [](const U& el) { - return ::testing::MatcherCast( + return ::testing::MatcherCast( ::testing::VariantWith(nonVectorResultMatcher(el))); }, expected); @@ -256,7 +279,7 @@ TEST(SparqlExpression, logicalOperators) { alloc}; V dAsBool{{B(true), B(true), B(false), B(false)}, alloc}; - V s{{"true", "", "false", ""}, alloc}; + V s{{lit("true"), lit(""), lit("false"), lit("")}, alloc}; V sAsBool{{B(true), B(false), B(true), B(false)}, alloc}; V i{{I(32), I(-42), I(0), I(5)}, alloc}; @@ -325,10 +348,10 @@ TEST(SparqlExpression, logicalOperators) { testAnd(allFalse, b, D(nan)); testAnd(b, b, D(2839.123)); - testOr(allTrue, b, IdOrString{"halo"}); - testOr(b, b, IdOrString("")); - testAnd(allFalse, b, IdOrString("")); - testAnd(b, b, IdOrString("yellow")); + testOr(allTrue, b, IdOrLiteralOrIri{lit("halo")}); + testOr(b, b, IdOrLiteralOrIri(lit(""))); + testAnd(allFalse, b, IdOrLiteralOrIri(lit(""))); + testAnd(b, b, IdOrLiteralOrIri(lit("yellow"))); // Test the behavior in the presence of UNDEF values. Id t = B(true); @@ -492,7 +515,7 @@ TEST(SparqlExpression, dateOperators) { auto testYear = testUnaryExpression<&makeYearExpression>; testYear(Ids{Id::makeFromDouble(42.0)}, Ids{U}); testYear(Ids{Id::makeFromBool(false)}, Ids{U}); - testYear(IdOrStrings{"noDate"}, Ids{U}); + testYear(IdOrLiteralOrIriVec{lit("noDate")}, Ids{U}); } // _____________________________________________________________________________________ @@ -504,22 +527,28 @@ static auto makeStrlenWithStr = [](auto arg) { auto checkStrlenWithStrChild = testUnaryExpression; TEST(SparqlExpression, stringOperators) { // Test `StrlenExpression` and `StrExpression`. - checkStrlen(IdOrStrings{"one", "two", "three", ""}, - Ids{I(3), I(3), I(5), I(0)}); - checkStrlenWithStrChild(IdOrStrings{"one", "two", "three", ""}, - Ids{I(3), I(3), I(5), I(0)}); + checkStrlen( + IdOrLiteralOrIriVec{lit("one"), lit("two"), lit("three"), lit("")}, + Ids{I(3), I(3), I(5), I(0)}); + checkStrlenWithStrChild( + IdOrLiteralOrIriVec{lit("one"), lit("two"), lit("three"), lit("")}, + Ids{I(3), I(3), I(5), I(0)}); // Test the different (optimized) behavior depending on whether the STR() // function was applied to the argument. - checkStrlen(IdOrStrings{"one", I(1), D(3.6), ""}, Ids{I(3), U, U, I(0)}); - checkStrlenWithStrChild(IdOrStrings{"one", I(1), D(3.6), ""}, - Ids{I(3), I(1), I(3), I(0)}); - checkStr(Ids{I(1), I(2), I(3)}, IdOrStrings{"1", "2", "3"}); - checkStr(Ids{D(-1.0), D(1.0), D(2.34)}, IdOrStrings{"-1", "1", "2.34"}); + checkStrlen(IdOrLiteralOrIriVec{lit("one"), I(1), D(3.6), lit("")}, + Ids{I(3), U, U, I(0)}); + checkStrlenWithStrChild( + IdOrLiteralOrIriVec{lit("one"), I(1), D(3.6), lit("")}, + Ids{I(3), I(1), I(3), I(0)}); + checkStr(Ids{I(1), I(2), I(3)}, + IdOrLiteralOrIriVec{lit("1"), lit("2"), lit("3")}); + checkStr(Ids{D(-1.0), D(1.0), D(2.34)}, + IdOrLiteralOrIriVec{lit("-1"), lit("1"), lit("2.34")}); checkStr(Ids{B(true), B(false), B(true)}, - IdOrStrings{"true", "false", "true"}); - checkStr(IdOrStrings{"one", "two", "three"}, - IdOrStrings{"one", "two", "three"}); + IdOrLiteralOrIriVec{lit("true"), lit("false"), lit("true")}); + checkStr(IdOrLiteralOrIriVec{lit("one"), lit("two"), lit("three")}, + IdOrLiteralOrIriVec{lit("one"), lit("two"), lit("three")}); // A simple test for uniqueness of the cache key. auto c1a = makeStrlenExpression(std::make_unique(iri(""))) @@ -552,10 +581,10 @@ TEST(SparqlExpression, stringOperators) { auto checkUcase = testUnaryExpression<&makeUppercaseExpression>; auto checkLcase = testUnaryExpression<&makeLowercaseExpression>; TEST(SparqlExpression, uppercaseAndLowercase) { - checkLcase(IdOrStrings{"One", "tWÖ", U, I(12)}, - IdOrStrings{"one", "twö", U, U}); - checkUcase(IdOrStrings{"One", "tWÖ", U, I(12)}, - IdOrStrings{"ONE", "TWÖ", U, U}); + checkLcase(IdOrLiteralOrIriVec{lit("One"), lit("tWÖ"), U, I(12)}, + IdOrLiteralOrIriVec{lit("one"), lit("twö"), U, U}); + checkUcase(IdOrLiteralOrIriVec{lit("One"), lit("tWÖ"), U, I(12)}, + IdOrLiteralOrIriVec{lit("ONE"), lit("TWÖ"), U, U}); } // _____________________________________________________________________________________ @@ -570,35 +599,55 @@ auto checkStrBefore = std::bind_front(testNaryExpression, &makeStrBeforeExpression); TEST(SparqlExpression, binaryStringOperations) { // Test STRSTARTS, STRENDS, CONTAINS, STRBEFORE, and STRAFTER. - using S = IdOrStrings; auto F = Id::makeFromBool(false); auto T = Id::makeFromBool(true); + auto S = [](const std::vector>& input) { + IdOrLiteralOrIriVec result; + for (const auto& el : input) { + if (std::holds_alternative(el)) { + result.push_back(std::get(el)); + } else { + result.push_back(lit(std::get(el))); + } + } + return result; + }; checkStrStarts( Ids{T, F, T, F, T, T, F, F, F}, - S{"", "", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo"}, - S{"", "x", "", "Hallo", "Häl", "Hällo", "Hällox", "ll", "lo"}); - checkStrEnds( - Ids{T, F, T, F, T, T, F, F, F}, - S{"", "", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo"}, - S{"", "x", "", "Hallo", "o", "Hällo", "Hällox", "ll", "H"}); + S({"", "", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo", + "Hällo"}), + S({"", "x", "", "Hallo", "Häl", "Hällo", "Hällox", "ll", "lo"})); + checkStrEnds(Ids{T, F, T, F, T, T, F, F, F}, + S({"", "", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo", + "Hällo"}), + S({"", "x", "", "Hallo", "o", "Hällo", "Hällox", "ll", "H"})); checkContains(Ids{T, F, T, F, T, T, F}, - S{"", "", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo"}, - S{"", "x", "", "ullo", "ll", "Hällo", "Hällox"}); - checkStrAfter(S{"", "", "Hällo", "", "o", "", "", "lo"}, - S{"", "", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo"}, - S{"", "x", "", "ullo", "ll", "Hällo", "Hällox", "l"}); + S({"", "", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo"}), + S({"", "x", "", "ullo", "ll", "Hällo", "Hällox"})); + checkStrAfter( + S({"", "", "Hällo", "", "o", "", "", "lo"}), + S({"", "", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo"}), + S({"", "x", "", "ullo", "ll", "Hällo", "Hällox", "l"})); checkStrBefore( - S{"", "", "Hällo", "", "Hä", "", "", "Hä"}, - S{"", "", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo"}, - S{"", "x", "", "ullo", "ll", "Hällo", "Hällox", "l"}); + S({"", "", "Hällo", "", "Hä", "", "", "Hä"}), + S({"", "", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo", "Hällo"}), + S({"", "x", "", "ullo", "ll", "Hällo", "Hällox", "l"})); } // ______________________________________________________________________________ static auto checkSubstr = std::bind_front(testNaryExpression, makeSubstrExpression); TEST(SparqlExpression, substr) { - auto strs = [](const IdOrStrings& input) { - return VectorWithMemoryLimit(input.begin(), input.end(), alloc); + auto strs = [](const std::vector>& input) { + VectorWithMemoryLimit result(alloc); + for (const auto& el : input) { + if (std::holds_alternative(el)) { + result.push_back(std::get(el)); + } else { + result.push_back(lit(std::get(el))); + } + } + return result; }; // Remember: The start position (the second argument to the SUBSTR expression) @@ -642,15 +691,17 @@ TEST(SparqlExpression, substr) { // Invalid datatypes // First must be string. - auto Ux = IdOrString{U}; + auto Ux = IdOrLiteralOrIri{U}; checkSubstr(Ux, I(3), I(4), I(7)); checkSubstr(Ux, U, I(4), I(7)); checkSubstr(Ux, Ux, I(4), I(7)); // Second and third must be numeric; - checkSubstr(Ux, IdOrString{"hello"}, U, I(4)); - checkSubstr(Ux, IdOrString{"hello"}, IdOrString{"bye"}, I(17)); - checkSubstr(Ux, IdOrString{"hello"}, I(4), U); - checkSubstr(Ux, IdOrString{"hello"}, I(4), IdOrString{"bye"}); + checkSubstr(Ux, IdOrLiteralOrIri{lit("hello")}, U, I(4)); + checkSubstr(Ux, IdOrLiteralOrIri{lit("hello")}, IdOrLiteralOrIri{lit("bye")}, + I(17)); + checkSubstr(Ux, IdOrLiteralOrIri{lit("hello")}, I(4), U); + checkSubstr(Ux, IdOrLiteralOrIri{lit("hello")}, I(4), + IdOrLiteralOrIri{lit("bye")}); } // _____________________________________________________________________________________ @@ -662,7 +713,7 @@ TEST(SparqlExpression, unaryNegate) { Ids{B(true), B(false), I(0), I(3), D(0), D(12), D(naN), U}, Ids{B(false), B(true), B(true), B(false), B(true), B(false), B(true), U}); // Empty strings are considered to be true. - checkNegate(IdOrStrings{"true", "false", "", "blibb"}, + checkNegate(idOrLitOrStringVec({"true", "false", "", "blibb"}), Ids{B(false), B(false), B(true), B(false)}); } @@ -673,7 +724,8 @@ TEST(SparqlExpression, unaryMinus) { checkMinus( Ids{B(true), B(false), I(0), I(3), D(0), D(12.8), D(naN), U, Voc(6)}, Ids{I(-1), I(0), I(0), I(-3), D(-0.0), D(-12.8), D(-naN), U, U}); - checkMinus(IdOrStrings{"true", "false", "", ""}, Ids{U, U, U, U}); + checkMinus(idOrLitOrStringVec({"true", "false", "", ""}), + Ids{U, U, U, U}); } // _____________________________________________________________________________________ @@ -738,8 +790,9 @@ TEST(SparqlExpression, isSomethingFunctions) { Id blank2 = Id::makeFromBlankNodeIndex(BlankNodeIndex::make(12)); Id localLiteral = testContext().notInVocabA; - IdOrStrings testIdOrStrings{"", "\"l\"", blank2, iri, literal, - blank, localLiteral, I(42), D(1), U}; + IdOrLiteralOrIriVec testIdOrStrings = + IdOrLiteralOrIriVec{iriref(""), lit("\"l\""), blank2, iri, literal, + blank, localLiteral, I(42), D(1), U}; testUnaryExpression(testIdOrStrings, Ids{T, F, F, T, F, F, F, F, F, F}); testUnaryExpression(testIdOrStrings, @@ -758,16 +811,20 @@ TEST(SparqlExpression, geoSparqlExpressions) { auto checkLong = testUnaryExpression<&makeLongitudeExpression>; auto checkDist = std::bind_front(testNaryExpression, &makeDistExpression); - checkLat(IdOrStrings{"POINT(24.3 26.8)", "NotAPoint", I(12)}, + checkLat(idOrLitOrStringVec({"POINT(24.3 26.8)", "NotAPoint", I(12)}), Ids{D(26.8), U, U}); - checkLong(IdOrStrings{D(4.2), "POINT(24.3 26.8)", "NotAPoint"}, + checkLong(idOrLitOrStringVec({D(4.2), "POINT(24.3 26.8)", "NotAPoint"}), Ids{U, D(24.3), U}); - checkDist(D(0.0), IdOrString{"POINT(24.3 26.8)"}, - IdOrString{"POINT(24.3 26.8)"}); - checkDist(U, IdOrString{"POINT(24.3 26.8)"}, IdOrString{I(12)}); - checkDist(U, IdOrString{I(12)}, IdOrString{"POINT(24.3 26.8)"}); - checkDist(U, IdOrString{"POINT(24.3 26.8)"s}, IdOrString{"NotAPoint"}); - checkDist(U, IdOrString{"NotAPoint"}, IdOrString{"POINT(24.3 26.8)"}); + checkDist(D(0.0), IdOrLiteralOrIri{lit("POINT(24.3 26.8)")}, + IdOrLiteralOrIri{lit("POINT(24.3 26.8)")}); + checkDist(U, IdOrLiteralOrIri{lit("POINT(24.3 26.8)")}, + IdOrLiteralOrIri{I(12)}); + checkDist(U, IdOrLiteralOrIri{I(12)}, + IdOrLiteralOrIri{lit("POINT(24.3 26.8)")}); + checkDist(U, IdOrLiteralOrIri{lit("POINT(24.3 26.8)"s)}, + IdOrLiteralOrIri{lit("NotAPoint")}); + checkDist(U, IdOrLiteralOrIri{lit("NotAPoint")}, + IdOrLiteralOrIri{lit("POINT(24.3 26.8)")}); } // ________________________________________________________________________________________ @@ -778,29 +835,31 @@ TEST(SparqlExpression, ifAndCoalesce) { const auto T = Id::makeFromBool(true); const auto F = Id::makeFromBool(false); - checkIf( - IdOrStrings{I(0), "eins", I(2), I(3), "vier", "fünf"}, + checkIf(idOrLitOrStringVec({I(0), "eins", I(2), I(3), "vier", "fünf"}), + // UNDEF and the empty string are considered to be `false`. + std::tuple{idOrLitOrStringVec({T, F, T, "true", U, ""}), + Ids{I(0), I(1), I(2), I(3), I(4), I(5)}, + idOrLitOrStringVec( + {"null", "eins", "zwei", "drei", "vier", "fünf"})}); + checkCoalesce( + idOrLitOrStringVec({I(0), "eins", I(2), I(3), U, D(5.0)}), // UNDEF and the empty string are considered to be `false`. - std::tuple{IdOrStrings{T, F, T, "true", U, ""}, - Ids{I(0), I(1), I(2), I(3), I(4), I(5)}, - IdOrStrings{"null", "eins", "zwei", "drei", "vier", "fünf"}}); - checkCoalesce(IdOrStrings{I(0), "eins", I(2), I(3), U, D(5.0)}, - // UNDEF and the empty string are considered to be `false`. - std::tuple{Ids{I(0), U, I(2), I(3), U, D(5.0)}, - IdOrStrings{"null", "eins", "zwei", "drei", U, U}, - Ids{U, U, U, U, U, D(5.0)}}); + std::tuple{Ids{I(0), U, I(2), I(3), U, D(5.0)}, + idOrLitOrStringVec({"null", "eins", "zwei", "drei", U, U}), + Ids{U, U, U, U, U, D(5.0)}}); // Example for COALESCE where we have a constant input and evaluating the last // input is not necessary. - checkCoalesce(IdOrStrings{I(0), "eins", I(2), I(3), "eins", D(5.0)}, - // UNDEF and the empty string are considered to be `false`. - std::tuple{Ids{I(0), U, I(2), I(3), U, D(5.0)}, U, - IdOrString{"eins"}, Ids{U, U, U, U, U, D(5.0)}}); + checkCoalesce( + idOrLitOrStringVec({I(0), "eins", I(2), I(3), "eins", D(5.0)}), + // UNDEF and the empty string are considered to be `false`. + std::tuple{Ids{I(0), U, I(2), I(3), U, D(5.0)}, U, + IdOrLiteralOrIri{lit("eins")}, Ids{U, U, U, U, U, D(5.0)}}); // Check COALESCE with no arguments or empty arguments. - checkCoalesce(IdOrStrings{}, std::tuple{}); - checkCoalesce(IdOrStrings{}, std::tuple{Ids{}}); - checkCoalesce(IdOrStrings{}, std::tuple{Ids{}, Ids{}}); - checkCoalesce(IdOrStrings{}, std::tuple{Ids{}, Ids{}, Ids{}}); + checkCoalesce(IdOrLiteralOrIriVec{}, std::tuple{}); + checkCoalesce(IdOrLiteralOrIriVec{}, std::tuple{Ids{}}); + checkCoalesce(IdOrLiteralOrIriVec{}, std::tuple{Ids{}, Ids{}}); + checkCoalesce(IdOrLiteralOrIriVec{}, std::tuple{Ids{}, Ids{}, Ids{}}); auto coalesceExpr = makeCoalesceExpressionVariadic( std::make_unique(iri("")), @@ -816,25 +875,28 @@ TEST(SparqlExpression, concatExpression) { auto checkConcat = testNaryExpressionVec; const auto T = Id::makeFromBool(true); - checkConcat(IdOrStrings{"0null", "eins", "2zwei", "3drei", "", "5.35.2"}, - // UNDEF evaluates to an empty string.. - std::tuple{Ids{I(0), U, I(2), I(3), U, D(5.3)}, - IdOrStrings{"null", "eins", "zwei", "drei", U, U}, - Ids{U, U, U, U, U, D(5.2)}}); + checkConcat( + idOrLitOrStringVec({"0null", "eins", "2zwei", "3drei", "", "5.35.2"}), + // UNDEF evaluates to an empty string.. + std::tuple{Ids{I(0), U, I(2), I(3), U, D(5.3)}, + idOrLitOrStringVec({"null", "eins", "zwei", "drei", U, U}), + Ids{U, U, U, U, U, D(5.2)}}); // Example with some constants in the middle. - checkConcat(IdOrStrings{"0trueeins", "trueeins", "2trueeins", "3trueeins", - "trueeins", "12.3trueeins-2.1"}, - // UNDEF and the empty string are considered to be `false`. - std::tuple{Ids{I(0), U, I(2), I(3), U, D(12.3)}, T, - IdOrString{"eins"}, Ids{U, U, U, U, U, D(-2.1)}}); + checkConcat( + idOrLitOrStringVec({"0trueeins", "trueeins", "2trueeins", "3trueeins", + "trueeins", "12.3trueeins-2.1"}), + // UNDEF and the empty string are considered to be `false`. + std::tuple{Ids{I(0), U, I(2), I(3), U, D(12.3)}, T, + IdOrLiteralOrIri{lit("eins")}, Ids{U, U, U, U, U, D(-2.1)}}); // Only constants - checkConcat(IdOrString{"trueMe1"}, std::tuple{T, IdOrString{"Me"}, I(1)}); + checkConcat(IdOrLiteralOrIri{lit("trueMe1")}, + std::tuple{T, IdOrLiteralOrIri{lit("Me")}, I(1)}); // Constants at the beginning. - checkConcat(IdOrStrings{"trueMe1", "trueMe2"}, - std::tuple{T, IdOrString{"Me"}, Ids{I(1), I(2)}}); + checkConcat(IdOrLiteralOrIriVec{lit("trueMe1"), lit("trueMe2")}, + std::tuple{T, IdOrLiteralOrIri{lit("Me")}, Ids{I(1), I(2)}}); - checkConcat(IdOrString{""}, std::tuple{}); + checkConcat(IdOrLiteralOrIri{lit("")}, std::tuple{}); auto coalesceExpr = makeConcatExpressionVariadic( std::make_unique(iri("")), std::make_unique(iri(""))); @@ -848,36 +910,43 @@ TEST(SparqlExpression, concatExpression) { TEST(SparqlExpression, ReplaceExpression) { auto checkReplace = testNaryExpressionVec<&makeReplaceExpression>; // A simple replace( no regexes involved). - checkReplace(IdOrStrings{"null", "Eins", "zwEi", "drEi", U, U}, - std::tuple{IdOrStrings{"null", "eins", "zwei", "drei", U, U}, - IdOrString{"e"}, IdOrString{"E"}}); + checkReplace( + idOrLitOrStringVec({"null", "Eins", "zwEi", "drEi", U, U}), + std::tuple{idOrLitOrStringVec({"null", "eins", "zwei", "drei", U, U}), + IdOrLiteralOrIri{lit("e")}, IdOrLiteralOrIri{lit("E")}}); // A somewhat more involved regex - checkReplace(IdOrStrings{"null", "Xs", "zwei", "drei", U, U}, - std::tuple{IdOrStrings{"null", "eins", "zwei", "drei", U, U}, - IdOrString{"e.[a-z]"}, IdOrString{"X"}}); + checkReplace( + idOrLitOrStringVec({"null", "Xs", "zwei", "drei", U, U}), + std::tuple{idOrLitOrStringVec({"null", "eins", "zwei", "drei", U, U}), + IdOrLiteralOrIri{lit("e.[a-z]")}, IdOrLiteralOrIri{lit("X")}}); // Case-insensitive matching using the hack for google regex: - checkReplace(IdOrStrings{"null", "xxns", "zwxx", "drxx"}, - std::tuple{IdOrStrings{"null", "eIns", "zwEi", "drei"}, - IdOrString{"(?i)[ei]"}, IdOrString{"x"}}); + checkReplace(idOrLitOrStringVec({"null", "xxns", "zwxx", "drxx"}), + std::tuple{idOrLitOrStringVec({"null", "eIns", "zwEi", "drei"}), + IdOrLiteralOrIri{lit("(?i)[ei]")}, + IdOrLiteralOrIri{lit("x")}}); // Multiple matches withing the same string checkReplace( - IdOrString{"wEeDEflE"}, - std::tuple{IdOrString{"weeeDeeflee"}, IdOrString{"ee"}, IdOrString{"E"}}); + IdOrLiteralOrIri{lit("wEeDEflE")}, + std::tuple{IdOrLiteralOrIri{lit("weeeDeeflee")}, + IdOrLiteralOrIri{lit("ee")}, IdOrLiteralOrIri{lit("E")}}); // Illegal regex. - checkReplace(IdOrStrings{U, U, U, U, U, U}, - std::tuple{IdOrStrings{"null", "Xs", "zwei", "drei", U, U}, - IdOrString{"e.[a-z"}, IdOrString{"X"}}); + checkReplace( + IdOrLiteralOrIriVec{U, U, U, U, U, U}, + std::tuple{idOrLitOrStringVec({"null", "Xs", "zwei", "drei", U, U}), + IdOrLiteralOrIri{lit("e.[a-z")}, IdOrLiteralOrIri{lit("X")}}); // Undefined regex - checkReplace(IdOrStrings{U, U, U, U, U, U}, - std::tuple{IdOrStrings{"null", "Xs", "zwei", "drei", U, U}, U, - IdOrString{"X"}}); + checkReplace( + IdOrLiteralOrIriVec{U, U, U, U, U, U}, + std::tuple{idOrLitOrStringVec({"null", "Xs", "zwei", "drei", U, U}), U, + IdOrLiteralOrIri{lit("X")}}); // Illegal replacement. - checkReplace(IdOrStrings{U, U, U, U, U, U}, - std::tuple{IdOrStrings{"null", "Xs", "zwei", "drei", U, U}, - IdOrString{"e"}, Id::makeUndefined()}); + checkReplace( + IdOrLiteralOrIriVec{U, U, U, U, U, U}, + std::tuple{idOrLitOrStringVec({"null", "Xs", "zwei", "drei", U, U}), + IdOrLiteralOrIri{lit("e")}, Id::makeUndefined()}); } // ______________________________________________________________________________ @@ -887,15 +956,16 @@ TEST(SparqlExpression, literalExpression) { "\"notInTheVocabulary\"")}; // Evaluate multiple times to test the caching behavior. for (size_t i = 0; i < 15; ++i) { - ASSERT_EQ((ExpressionResult{IdOrString{"\"notInTheVocabulary\""}}), - expr.evaluate(&ctx.context)); + ASSERT_EQ( + (ExpressionResult{IdOrLiteralOrIri{lit("\"notInTheVocabulary\"")}}), + expr.evaluate(&ctx.context)); } // A similar test with a constant entry that is part of the vocabulary and can // therefore be converted to an ID. IriExpression iriExpr{TripleComponent::Iri::fromIriref("")}; Id idOfX = ctx.x; for (size_t i = 0; i < 15; ++i) { - ASSERT_EQ((ExpressionResult{IdOrString{idOfX}}), + ASSERT_EQ((ExpressionResult{IdOrLiteralOrIri{idOfX}}), iriExpr.evaluate(&ctx.context)); } } @@ -903,13 +973,16 @@ TEST(SparqlExpression, literalExpression) { // ______________________________________________________________________________ TEST(SparqlExpression, encodeForUri) { auto checkEncodeForUri = testUnaryExpression<&makeEncodeForUriExpression>; - using IoS = IdOrString; + using IoS = IdOrLiteralOrIri; + auto l = [](std::string_view s, std::string_view langOrDatatype = "") { + return IoS{lit(s, langOrDatatype)}; + }; checkEncodeForUri("Los Angeles", "Los%20Angeles"); - checkEncodeForUri("\"Los Angeles\"@en", "Los%20Angeles"); - checkEncodeForUri("\"Los Angeles\"^^xsd:string", "Los%20Angeles"); - checkEncodeForUri("\"Los \\\"Angeles\"^^xsd:string", "Los%20%5C%22Angeles"); - checkEncodeForUri("\"Los Angeles", "%22Los%20Angeles"); - checkEncodeForUri("L\"os \"Angeles", "L%22os%20%22Angeles"); + checkEncodeForUri(l("Los Angeles", "@en"), "Los%20Angeles"); + checkEncodeForUri(l("Los Angeles", "^^"), "Los%20Angeles"); + checkEncodeForUri(l("Los Ängeles", "^^"), "Los%20%C3%84ngeles"); + checkEncodeForUri(l("\"Los Angeles"), "%22Los%20Angeles"); + checkEncodeForUri(l("L\"os \"Angeles"), "L%22os%20%22Angeles"); // Literals from the global and local vocab. checkEncodeForUri(testContext().aelpha, "%C3%A4lpha"); checkEncodeForUri(testContext().notInVocabA, "notInVocabA"); diff --git a/test/SparqlExpressionTypesTest.cpp b/test/SparqlExpressionTypesTest.cpp index af2bc66f99..a05d650185 100644 --- a/test/SparqlExpressionTypesTest.cpp +++ b/test/SparqlExpressionTypesTest.cpp @@ -29,13 +29,15 @@ TEST(SparqlExpressionTypes, expressionResult) { } TEST(SparqlExpressionTypes, printIdOrString) { + using namespace ad_utility::triple_component; std::stringstream str; - IdOrString idOrString{Id::makeUndefined()}; + + IdOrLiteralOrIri idOrString{Id::makeUndefined()}; PrintTo(idOrString, &str); ASSERT_EQ(str.str(), "Undefined:Undefined"); - idOrString = "bimm"; + idOrString = LiteralOrIri::literalWithoutQuotes("bimm"); // Clear the stringstream. str.str({}); PrintTo(idOrString, &str); - ASSERT_EQ(str.str(), "bimm"); + ASSERT_EQ(str.str(), "\"bimm\""); } diff --git a/test/SparqlParserTest.cpp b/test/SparqlParserTest.cpp index e67c9857f8..529d10c144 100644 --- a/test/SparqlParserTest.cpp +++ b/test/SparqlParserTest.cpp @@ -8,10 +8,11 @@ #include -#include "./util/TripleComponentTestHelpers.h" #include "SparqlAntlrParserTestHelpers.h" #include "global/Constants.h" #include "parser/SparqlParser.h" +#include "util/Conversions.h" +#include "util/TripleComponentTestHelpers.h" namespace m = matchers; namespace p = parsedQuery; diff --git a/test/StringUtilsTest.cpp b/test/StringUtilsTest.cpp index 2aca0783dd..5c4763f926 100644 --- a/test/StringUtilsTest.cpp +++ b/test/StringUtilsTest.cpp @@ -313,3 +313,11 @@ TEST(StringUtilsTest, insertThousandSeparator) { forbiddenSymbolTest .template operator()<'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'>(); } + +TEST(StringUtilsTest, findLiteralEnd) { + using namespace ad_utility; + EXPECT_EQ(findLiteralEnd("nothing", "\""), std::string_view::npos); + EXPECT_EQ(findLiteralEnd("no\"thing", "\""), 2u); + EXPECT_EQ(findLiteralEnd("no\\\"thi\"ng", "\""), 7u); + EXPECT_EQ(findLiteralEnd("no\\\\\"thing", "\""), 4u); +} diff --git a/test/TimerTest.cpp b/test/TimerTest.cpp index 4a0f462ffd..f14e808333 100644 --- a/test/TimerTest.cpp +++ b/test/TimerTest.cpp @@ -32,8 +32,8 @@ void testTime(const ad_utility::Timer& timer, } TEST(Timer, BasicWorkflow) { -#ifdef __APPLE__ - GTEST_SKIP_("sleep_for is unreliable for macos builds"); +#ifdef _QLEVER_NO_TIMING_TESTS + GTEST_SKIP_("because _QLEVER_NO_TIMING_TESTS defined"); #endif Timer t{Timer::Started}; ASSERT_TRUE(t.isRunning()); @@ -85,8 +85,8 @@ TEST(Timer, BasicWorkflow) { } TEST(Timer, InitiallyStopped) { -#ifdef __APPLE__ - GTEST_SKIP_("sleep_for is unreliable for macos builds"); +#ifdef _QLEVER_NO_TIMING_TESTS + GTEST_SKIP_("because _QLEVER_NO_TIMING_TESTS defined"); #endif Timer t{Timer::Stopped}; ASSERT_FALSE(t.isRunning()); @@ -102,8 +102,8 @@ TEST(Timer, InitiallyStopped) { } TEST(TimeBlockAndLog, TimeBlockAndLog) { -#ifdef __APPLE__ - GTEST_SKIP_("sleep_for is unreliable for macos builds"); +#ifdef _QLEVER_NO_TIMING_TESTS + GTEST_SKIP_("because _QLEVER_NO_TIMING_TESTS defined"); #endif std::string s; { diff --git a/test/TurtleParserTest.cpp b/test/TurtleParserTest.cpp index 85c02977fd..215fd065bd 100644 --- a/test/TurtleParserTest.cpp +++ b/test/TurtleParserTest.cpp @@ -808,8 +808,8 @@ TEST(TurtleParserTest, exceptionPropagationFileBufferReading) { // blocking, even when there are still lots of blocks in the pipeline that are // currently being parsed. TEST(TurtleParserTest, stopParsingOnOutsideFailure) { -#ifdef __APPLE__ - GTEST_SKIP_("sleep_for is unreliable for macos builds"); +#ifdef _QLEVER_NO_TIMING_TESTS + GTEST_SKIP_("because _QLEVER_NO_TIMING_TESTS defined"); #endif std::string filename{"turtleParserStopParsingOnOutsideFailure.dat"}; auto testWithParser = [&]( diff --git a/test/VocabularyOnDiskTest.cpp b/test/VocabularyOnDiskTest.cpp index 130ff64577..22774821a1 100644 --- a/test/VocabularyOnDiskTest.cpp +++ b/test/VocabularyOnDiskTest.cpp @@ -110,18 +110,6 @@ TEST(VocabularyOnDisk, LowerUpperBoundStdLess) { createVocabularyFromDisk("lowerUpperBoundStdLess2")); } -TEST(VocabularyOnDisk, LowerUpperBoundStdLessNonContiguousIds) { - std::vector words{"alpha", "betta", "chimes", "someVery123Word"}; - std::vector ids{2, 4, 8, 42}; - VocabularyCreator creator1{"lowerUppperBoundStdLessNonContiguousIds1"}; - testUpperAndLowerBoundWithStdLessFromWordsAndIds( - creator1.createVocabularyImpl(words, ids), words, ids); - - VocabularyCreator creator2{"lowerUppperBoundStdLessNonContiguousIds2"}; - testUpperAndLowerBoundWithStdLessFromWordsAndIds( - creator2.createVocabularyFromDiskImpl(words, ids), words, ids); -} - TEST(VocabularyOnDisk, LowerUpperBoundNumeric) { testUpperAndLowerBoundWithNumericComparator( createVocabulary("lowerUpperBoundNumeric1")); @@ -129,18 +117,6 @@ TEST(VocabularyOnDisk, LowerUpperBoundNumeric) { createVocabularyFromDisk("lowerUpperBoundNumeric2")); } -TEST(VocabularyOnDisk, LowerUpperBoundNumericNonContiguousIds) { - std::vector words{"4", "33", "222", "1111"}; - std::vector ids{2, 4, 8, 42}; - - VocabularyCreator creator1{"lowerUpperBoundNumericNonContiguousIds1"}; - testUpperAndLowerBoundWithNumericComparatorFromWordsAndIds( - creator1.createVocabularyImpl(words, ids), words, ids); - VocabularyCreator creator2{"lowerUpperBoundNumericNonContiguousIds2"}; - testUpperAndLowerBoundWithNumericComparatorFromWordsAndIds( - creator2.createVocabularyFromDiskImpl(words, ids), words, ids); -} - TEST(VocabularyOnDisk, AccessOperator) { testAccessOperatorForUnorderedVocabulary(createVocabulary("AccessOperator1")); testAccessOperatorForUnorderedVocabulary( diff --git a/test/engine/CartesianProductJoinTest.cpp b/test/engine/CartesianProductJoinTest.cpp index 26782ad410..33eebf82c3 100644 --- a/test/engine/CartesianProductJoinTest.cpp +++ b/test/engine/CartesianProductJoinTest.cpp @@ -5,12 +5,12 @@ #include #include +#include "../engine/ValuesForTesting.h" #include "../util/GTestHelpers.h" #include "../util/IdTableHelpers.h" #include "../util/IndexTestHelpers.h" #include "engine/CartesianProductJoin.h" #include "engine/QueryExecutionTree.h" -#include "engine/ValuesForTesting.h" using namespace ad_utility::testing; using ad_utility::source_location; diff --git a/src/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h similarity index 100% rename from src/engine/ValuesForTesting.h rename to test/engine/ValuesForTesting.h diff --git a/test/engine/idTable/CompressedExternalIdTableTest.cpp b/test/engine/idTable/CompressedExternalIdTableTest.cpp index 5d679ef814..9bc253f29b 100644 --- a/test/engine/idTable/CompressedExternalIdTableTest.cpp +++ b/test/engine/idTable/CompressedExternalIdTableTest.cpp @@ -107,8 +107,13 @@ void testExternalSorterImpl(size_t numDynamicColumns, size_t numRows, } for (size_t k = 0; k < 5; ++k) { - auto generator = writer.sortedView(); + // Also test the case that the blocksize does not exactly divides the + // number of inputs. + auto blocksize = k == 1 ? 1 : 17; using namespace ::testing; + auto generator = k == 0 ? std::views::join(ad_utility::OwningView{ + writer.getSortedBlocks(blocksize)}) + : writer.sortedView(); if (mergeMultipleTimes || k == 0) { auto result = idTableFromRowGenerator( generator, numDynamicColumns); diff --git a/test/parser/LiteralOrIriTest.cpp b/test/parser/LiteralOrIriTest.cpp index 5afcc5e8d5..0c5486fc7b 100644 --- a/test/parser/LiteralOrIriTest.cpp +++ b/test/parser/LiteralOrIriTest.cpp @@ -10,6 +10,7 @@ #include "parser/Literal.h" #include "parser/LiteralOrIri.h" #include "parser/NormalizedString.h" +#include "util/HashSet.h" using namespace ad_utility::triple_component; @@ -194,3 +195,17 @@ TEST(LiteralOrIri, EnsureLiteralsAreEncoded) { EXPECT_THAT(R"(This is to be "\ encoded)", asStringViewUnsafe(literal2.getContent())); } + +TEST(LiteralOrIri, Printing) { + LiteralOrIri literal1 = LiteralOrIri::literalWithoutQuotes("hallo"); + std::stringstream str; + PrintTo(literal1, &str); + EXPECT_EQ(str.str(), "\"hallo\""); +} + +TEST(LiteralOrIri, Hashing) { + auto lit = LiteralOrIri::literalWithoutQuotes("bimbamm"); + auto iri = LiteralOrIri::iriref(""); + ad_utility::HashSet set{lit, iri}; + EXPECT_THAT(set, ::testing::UnorderedElementsAre(lit, iri)); +} diff --git a/test/util/IdTableHelpers.cpp b/test/util/IdTableHelpers.cpp index 99360cd8a9..bdb0c3a865 100644 --- a/test/util/IdTableHelpers.cpp +++ b/test/util/IdTableHelpers.cpp @@ -4,12 +4,10 @@ #include "../test/util/IdTableHelpers.h" -#include - #include #include -#include "engine/ValuesForTesting.h" +#include "../engine/ValuesForTesting.h" #include "engine/idTable/IdTable.h" #include "global/ValueId.h" #include "util/Algorithm.h" diff --git a/test/util/IdTableHelpers.h b/test/util/IdTableHelpers.h index 37a6f71846..75d34bad0a 100644 --- a/test/util/IdTableHelpers.h +++ b/test/util/IdTableHelpers.h @@ -12,6 +12,7 @@ #include #include +#include "../engine/ValuesForTesting.h" #include "./AllocatorTestHelpers.h" #include "./GTestHelpers.h" #include "./IdTestHelpers.h" @@ -20,7 +21,6 @@ #include "engine/Join.h" #include "engine/OptionalJoin.h" #include "engine/QueryExecutionTree.h" -#include "engine/ValuesForTesting.h" #include "engine/idTable/IdTable.h" #include "global/ValueId.h" #include "util/Algorithm.h" diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp index 30e7b49062..83d216a6bc 100644 --- a/test/util/IndexTestHelpers.cpp +++ b/test/util/IndexTestHelpers.cpp @@ -46,7 +46,7 @@ std::vector getAllIndexFilenames( indexBasename + ".prefixes", indexBasename + ".vocabulary.internal", indexBasename + ".vocabulary.external", - indexBasename + ".vocabulary.external.idsAndOffsets"}; + indexBasename + ".vocabulary.external.offsets"}; } namespace { diff --git a/test/util/OperationTestHelpers.h b/test/util/OperationTestHelpers.h index 7c7402cb99..8764aab4b7 100644 --- a/test/util/OperationTestHelpers.h +++ b/test/util/OperationTestHelpers.h @@ -85,10 +85,4 @@ class ShallowParentOperation : public Operation { } }; -template <> -void QueryExecutionTree::setOperation( - std::shared_ptr operation) { - rootOperation_ = std::move(operation); -} - #endif // QLEVER_OPERATIONTESTHELPERS_H diff --git a/test/util/TripleComponentTestHelpers.h b/test/util/TripleComponentTestHelpers.h index 5c108fe8bc..dcaad52808 100644 --- a/test/util/TripleComponentTestHelpers.h +++ b/test/util/TripleComponentTestHelpers.h @@ -13,7 +13,12 @@ namespace ad_utility::testing { // `literal` (which must be enclosed in double quotes) and the optional // `langtagOrDatatype` (which must start with `@` or `^^`). constexpr auto tripleComponentLiteral = - [](const std::string& literal, std::string_view langtagOrDatatype = "") { + [](std::string_view literal, std::string_view langtagOrDatatype = "") { + std::string dummy; + if (!(literal.starts_with('"') && literal.ends_with('"'))) { + dummy = absl::StrCat("\"", literal, "\""); + literal = dummy; + } if (langtagOrDatatype.starts_with("@")) { return TripleComponent::Literal::fromEscapedRdfLiteral( literal, std::string(langtagOrDatatype));