From d50499f96b2ce5978218d012cdd8292bd286d2be Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 27 Jul 2023 20:51:00 +0200 Subject: [PATCH] Refactor SPARQL expression definitions + add ABS, CEIL, FLOOR, ROUND (#1043) 1. Refactor the code for the various SPARQL expressions and functions. The definitions are now in `.cpp` files, which reduces compile time. Each expression is defined in three parts: the actual behavior (e.g., `extractYear`), the corresponding type (e.g., `YearExpression` defined via the macro `NARY_EXPRESSION`), and the corresponding factory function that is used in the SPARQL parser (e.g., `makeYearExpression`). 2. Add the following four unary functions: `ABS`, `CEIL`, `FLOOR`, `ROUND`. Pay attention to the detail that according to the SPARQL standard, `ROUND` of negative numbers that lie exactly between two integers (e.g., `-42.5`) rounds towards zero (`-42`), unlike `std::round`, which rounds away from zero (`-43`). --- .github/workflows/code-coverage.yml | 10 +- .github/workflows/format-check.yml | 2 + src/engine/sparqlExpressions/CMakeLists.txt | 18 +- .../sparqlExpressions/DateExpressions.cpp | 60 ++++ .../sparqlExpressions/NaryExpression.cpp | 95 ++--- src/engine/sparqlExpressions/NaryExpression.h | 335 ++---------------- .../sparqlExpressions/NaryExpressionImpl.h | 197 ++++++++++ .../NumericBinaryExpressions.cpp | 99 ++++++ .../NumericUnaryExpressions.cpp | 96 +++++ .../sparqlExpressions/RegexExpression.cpp | 2 +- .../sparqlExpressions/SparqlExpression.h | 4 + .../sparqlExpressions/StringExpressions.cpp | 29 ++ .../sparqlParser/SparqlQleverVisitor.cpp | 64 ++-- src/parser/sparqlParser/SparqlQleverVisitor.h | 2 +- test/GroupByTest.cpp | 12 +- test/RegexExpressionTest.cpp | 3 +- test/SparqlAntlrParserTest.cpp | 217 +++++++++--- test/SparqlExpressionTest.cpp | 184 ++++++---- 18 files changed, 881 insertions(+), 548 deletions(-) create mode 100644 src/engine/sparqlExpressions/DateExpressions.cpp create mode 100644 src/engine/sparqlExpressions/NaryExpressionImpl.h create mode 100644 src/engine/sparqlExpressions/NumericBinaryExpressions.cpp create mode 100644 src/engine/sparqlExpressions/NumericUnaryExpressions.cpp create mode 100644 src/engine/sparqlExpressions/StringExpressions.cpp diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml index 95e7929b1f..1ad53e974d 100644 --- a/.github/workflows/code-coverage.yml +++ b/.github/workflows/code-coverage.yml @@ -41,6 +41,10 @@ jobs: with: submodules: "recursive" + - name: Install dependencies + run: | + sudo gem install apt-spy2 && sudo apt-spy2 fix --commit --launchpad --country=US + sudo apt-get update - name: Install clang 16 # The sed command fixes a bug in `llvm.sh` in combination with the latest version of # `apt-key`. Without it the GPG key for the llvm repository is downloaded but deleted @@ -49,8 +53,6 @@ jobs: wget https://apt.llvm.org/llvm.sh sudo chmod +x llvm.sh sed 's/apt-key del/echo/' llvm.sh -iy - sudo ./llvm.sh 15 - sudo apt install clang-15 llvm-15 sudo ./llvm.sh 16 all - name: Install dependencies run: | @@ -89,8 +91,8 @@ jobs: - name: Process coverage info working-directory: ${{github.workspace}}/build/test run: > - llvm-profdata-15 merge -sparse *.profraw -o default.profdata; - xargs -a tests.txt llvm-cov-15 export --dump --format=lcov --instr-profile ./default.profdata --ignore-filename-regex="/third_party/" --ignore-filename-regex="/generated/" --ignore-filename-regex="/nlohmann/" --ignore-filename-regex="/ctre/" --ignore-filename-regex="/test/" --ignore-filename-regex="/benchmark/" > ./coverage.lcov + llvm-profdata-16 merge -sparse *.profraw -o default.profdata; + xargs -a tests.txt llvm-cov-16 export --dump --format=lcov --instr-profile ./default.profdata --ignore-filename-regex="/third_party/" --ignore-filename-regex="/generated/" --ignore-filename-regex="/nlohmann/" --ignore-filename-regex="/ctre/" --ignore-filename-regex="/test/" --ignore-filename-regex="/benchmark/" > ./coverage.lcov # Only upload the coverage directly if this is not a pull request. In this # case we are on the master branch and have access to the Codecov token. diff --git a/.github/workflows/format-check.yml b/.github/workflows/format-check.yml index 9e704a3ab2..f0f00d583a 100644 --- a/.github/workflows/format-check.yml +++ b/.github/workflows/format-check.yml @@ -19,6 +19,8 @@ jobs: - uses: actions/checkout@v3 - name: Install dependencies run: | + # The following line currently seems to be necessary to work around a bug in the installation. + sudo apt remove python3-lldb-* wget https://apt.llvm.org/llvm.sh sudo chmod +x llvm.sh sed 's/apt-key del/echo/' llvm.sh -iy diff --git a/src/engine/sparqlExpressions/CMakeLists.txt b/src/engine/sparqlExpressions/CMakeLists.txt index 7d8d584ffb..4738f71d3a 100644 --- a/src/engine/sparqlExpressions/CMakeLists.txt +++ b/src/engine/sparqlExpressions/CMakeLists.txt @@ -1,16 +1,10 @@ add_library(sparqlExpressions - SparqlExpressionTypes.h - SparqlExpression.h - AggregateExpression.h - GroupConcatExpression.h - SparqlExpressionGenerators.h - SparqlExpressionValueGetters.h SparqlExpressionValueGetters.cpp - NaryExpression.h NaryExpression.cpp - SetOfIntervals.h SetOfIntervals.cpp - LiteralExpression.h GroupConcatExpression.h - SparqlExpressionPimpl.h SparqlExpressionPimpl.cpp - SampleExpression.h SampleExpression.cpp + SparqlExpressionValueGetters.cpp + NaryExpression.cpp + SetOfIntervals.cpp + SparqlExpressionPimpl.cpp + SampleExpression.cpp RelationalExpressions.cpp AggregateExpression.cpp RegexExpression.cpp - LangExpression.cpp) + LangExpression.cpp NumericUnaryExpressions.cpp NumericBinaryExpressions.cpp DateExpressions.cpp StringExpressions.cpp) qlever_target_link_libraries(sparqlExpressions index) diff --git a/src/engine/sparqlExpressions/DateExpressions.cpp b/src/engine/sparqlExpressions/DateExpressions.cpp new file mode 100644 index 0000000000..b502922d04 --- /dev/null +++ b/src/engine/sparqlExpressions/DateExpressions.cpp @@ -0,0 +1,60 @@ +// Copyright 2023, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Kalmbach + +#include "engine/sparqlExpressions/NaryExpressionImpl.h" + +namespace sparqlExpression { +namespace detail { +// Date functions. +// The input is `std::nullopt` if the argument to the expression is not a date. +inline auto extractYear = [](std::optional d) { + if (!d.has_value()) { + return Id::makeUndefined(); + } else { + return Id::makeFromInt(d->getYear()); + } +}; + +inline auto extractMonth = [](std::optional d) { + // TODO Use the monadic operations for std::optional + if (!d.has_value()) { + return Id::makeUndefined(); + } + auto optionalMonth = d.value().getMonth(); + if (!optionalMonth.has_value()) { + return Id::makeUndefined(); + } + return Id::makeFromInt(optionalMonth.value()); +}; + +inline auto extractDay = [](std::optional d) { + // TODO Use the monadic operations for `std::optional`. + if (!d.has_value()) { + return Id::makeUndefined(); + } + auto optionalDay = d.value().getDay(); + if (!optionalDay.has_value()) { + return Id::makeUndefined(); + } + return Id::makeFromInt(optionalDay.value()); +}; + +NARY_EXPRESSION(YearExpression, 1, FV); +NARY_EXPRESSION(MonthExpression, 1, + FV); +NARY_EXPRESSION(DayExpression, 1, FV); +} // namespace detail +using namespace detail; +SparqlExpression::Ptr makeYearExpression(SparqlExpression::Ptr child) { + return std::make_unique(std::move(child)); +} + +SparqlExpression::Ptr makeDayExpression(SparqlExpression::Ptr child) { + return std::make_unique(std::move(child)); +} + +SparqlExpression::Ptr makeMonthExpression(SparqlExpression::Ptr child) { + return std::make_unique(std::move(child)); +} +} // namespace sparqlExpression diff --git a/src/engine/sparqlExpressions/NaryExpression.cpp b/src/engine/sparqlExpressions/NaryExpression.cpp index ccb6383ee9..700c61caa5 100644 --- a/src/engine/sparqlExpressions/NaryExpression.cpp +++ b/src/engine/sparqlExpressions/NaryExpression.cpp @@ -4,87 +4,34 @@ #include "engine/sparqlExpressions/NaryExpression.h" +#include "engine/sparqlExpressions/NaryExpressionImpl.h" +#include "util/GeoSparqlHelpers.h" + namespace sparqlExpression { namespace detail { +NARY_EXPRESSION(LongitudeExpression, 1, + FV, + StringValueGetter>); +NARY_EXPRESSION(LatitudeExpression, 1, + FV, + StringValueGetter>); +NARY_EXPRESSION(DistExpression, 2, + FV, + StringValueGetter>); -// _____________________________________________________________________________ -template -requires(isOperation) -NaryExpression::NaryExpression(Children&& children) - : _children{std::move(children)} {} - -// _____________________________________________________________________________ - -template -requires(isOperation) -ExpressionResult NaryExpression::evaluate( - EvaluationContext* context) const { - auto resultsOfChildren = ad_utility::applyFunctionToEachElementOfTuple( - [context](const auto& child) { return child->evaluate(context); }, - _children); - - // A function that only takes several `ExpressionResult`s, - // and evaluates the expression. - auto evaluateOnChildrenResults = - std::bind_front(ad_utility::visitWithVariantsAndParameters, - evaluateOnChildrenOperands, NaryOperation{}, context); +} // namespace detail - return std::apply(evaluateOnChildrenResults, std::move(resultsOfChildren)); +using namespace detail; +SparqlExpression::Ptr makeDistExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2) { + return std::make_unique(std::move(child1), std::move(child2)); } -// _____________________________________________________________________________ -template -requires(isOperation) -std::span NaryExpression::children() { - return {_children.data(), _children.size()}; +SparqlExpression::Ptr makeLatitudeExpression(SparqlExpression::Ptr child) { + return std::make_unique(std::move(child)); } - -// __________________________________________________________________________ -template -requires(isOperation) [[nodiscard]] string NaryExpression::getCacheKey( - const VariableToColumnMap& varColMap) const { - string key = typeid(*this).name(); - for (const auto& child : _children) { - key += child->getCacheKey(varColMap); - } - return key; +SparqlExpression::Ptr makeLongitudeExpression(SparqlExpression::Ptr child) { + return std::make_unique(std::move(child)); } -#define INSTANTIATE_NARY(N, X, ...) \ - template class NaryExpression> - -INSTANTIATE_NARY(2, FV, - SET); - -INSTANTIATE_NARY(2, FV, - SET); - -INSTANTIATE_NARY(1, FV, - SET); - -INSTANTIATE_NARY(1, FV); - -INSTANTIATE_NARY(2, FV); - -INSTANTIATE_NARY(2, FV); - -INSTANTIATE_NARY(2, FV); - -INSTANTIATE_NARY(2, FV); - -INSTANTIATE_NARY(1, - FV, - StringValueGetter>); -INSTANTIATE_NARY(1, - FV, - StringValueGetter>); -INSTANTIATE_NARY(2, FV, - StringValueGetter>); - -INSTANTIATE_NARY(1, FV); -INSTANTIATE_NARY(1, FV); -INSTANTIATE_NARY(1, FV); -INSTANTIATE_NARY(1, FV); -INSTANTIATE_NARY(1, FV); -} // namespace detail } // namespace sparqlExpression diff --git a/src/engine/sparqlExpressions/NaryExpression.h b/src/engine/sparqlExpressions/NaryExpression.h index b1e6269836..a5ad1a5551 100644 --- a/src/engine/sparqlExpressions/NaryExpression.h +++ b/src/engine/sparqlExpressions/NaryExpression.h @@ -10,308 +10,43 @@ #include #include "engine/sparqlExpressions/SparqlExpression.h" -#include "engine/sparqlExpressions/SparqlExpressionGenerators.h" -#include "util/Conversions.h" -#include "util/GeoSparqlHelpers.h" +// Factory functions for all kinds of expressions that only have other +// expressions as arguments. The actual types and implementations of the +// expressions are hidden in the respective `.cpp` file to reduce compile times. namespace sparqlExpression { -namespace detail { -// TODO: This comment is out of date. It refers to `BinaryOperations`, -// `ValueGetter`, and `create`, none of which can be found in this file. -// -// A sequence of binary operations, which is executed from left to right, for -// example (?a or ?b), (?a and ?b ?and ?c), (3 * 5 / 7 * ?x) . Different -// operations in the same expression, like (?a + ?b - ?c) are supported by -// passing in multiple operations as the `BinaryOperations` template parameter -// and by choosing the corresponding operation for each sub-expression via the -// `tags` argument to the `create` function (see there). -// -// @tparam ValueGetter A callable type that takes a -// double/int64_t/Bool/string/StrongIdWithResultType and extracts the actual -// input to the operation. Can be used to perform type conversions before the -// actual operation. -// -// @tparam BinaryOperations The actual binary operations. They must be callable -// with the result types of the `ValueGetter`. -template -requires(isOperation) -class NaryExpression : public SparqlExpression { - public: - static constexpr size_t N = NaryOperation::N; - using Children = std::array; - - // Construct from an array of `N` child expressions. - explicit NaryExpression(Children&& children); - - // Construct from `N` child expressions. Each of the children must have a type - // `std::unique_ptr`. - explicit NaryExpression( - std::convertible_to auto... children) - requires(sizeof...(children) == N) - : NaryExpression{Children{std::move(children)...}} {} - - public: - // __________________________________________________________________________ - ExpressionResult evaluate(EvaluationContext* context) const override; - - // _________________________________________________________________________ - std::span children() override; - - // _________________________________________________________________________ - [[nodiscard]] string getCacheKey( - const VariableToColumnMap& varColMap) const override; - - private: - // Evaluate the `naryOperation` on the `operands` using the `context`. - static inline auto evaluateOnChildrenOperands = - []( - NaryOperation naryOperation, EvaluationContext* context, - Operands&&... operands) -> ExpressionResult { - // Perform a more efficient calculation if a specialized function exists - // that matches all operands. - if (isAnySpecializedFunctionPossible(naryOperation._specializedFunctions, - operands...)) { - auto optionalResult = evaluateOnSpecializedFunctionsIfPossible( - naryOperation._specializedFunctions, - std::forward(operands)...); - AD_CONTRACT_CHECK(optionalResult); - return std::move(optionalResult.value()); - } - - // We have to first determine the number of results we will produce. - auto targetSize = getResultSize(*context, operands...); - - // The result is a constant iff all the results are constants. - constexpr static bool resultIsConstant = - (... && isConstantResult); - - // The generator for the result of the operation. - auto resultGenerator = - applyOperation(targetSize, naryOperation, context, AD_FWD(operands)...); - - // Compute the result. - using ResultType = typename decltype(resultGenerator)::value_type; - VectorWithMemoryLimit result{context->_allocator}; - result.reserve(targetSize); - for (auto&& singleResult : resultGenerator) { - result.push_back(std::forward(singleResult)); - } - - if constexpr (resultIsConstant) { - AD_CONTRACT_CHECK(result.size() == 1); - return std::move(result[0]); - } else { - return result; - } - }; - Children _children; -}; - -// Takes a `Function` that returns a numeric value (integral or floating point) -// and converts it to a function, that takes the same arguments and returns the -// same result, but the return type is the `NumericValue` variant. -template -struct NumericIdWrapper { - // Note: Sonarcloud suggests `[[no_unique_address]]` for the following member, - // but adding it causes an internal compiler error in Clang 16. - Function function_{}; - Id operator()(auto&&... args) const { - return makeNumericId(function_(AD_FWD(args)...)); - } -}; - -// Takes a `Function` that takes and returns numeric values (integral or -// floating point) and converts it to a function, that takes the same arguments -// and returns the same result, but the arguments and the return type are the -// `NumericValue` variant. -template -inline auto makeNumericExpression() { - return [](const std::same_as auto&... args) { - auto visitor = [](const Ts&... t) { - if constexpr ((... || std::is_same_v)) { - return Id::makeUndefined(); - } else { - using C = std::common_type_t; - return makeNumericId(Function{}(static_cast(t)...)); - } - }; - return std::visit(visitor, args...); - }; -} - -// Two short aliases to make the instantiations more readable. -template -using FV = FunctionAndValueGetters; - -template -using NARY = NaryExpression>; - -// True iff all types `Ts` are `SetOfIntervals`. -inline auto areAllSetOfIntervals = [](const Ts&...) constexpr { - return (... && ad_utility::isSimilar); -}; -template -using SET = SpecializedFunction; - -using ad_utility::SetOfIntervals; - -// The types for the concrete MultiBinaryExpressions and UnaryExpressions. -using TernaryBool = EffectiveBooleanValueGetter::Result; - -// Or -inline auto orLambda = [](TernaryBool a, TernaryBool b) { - using enum TernaryBool; - if (a == True || b == True) { - return Id::makeFromBool(true); - } - if (a == False && b == False) { - return Id::makeFromBool(false); - } - return Id::makeUndefined(); -}; -using OrExpression = - NARY<2, FV, - SET>; - -// And -inline auto andLambda = [](TernaryBool a, TernaryBool b) { - using enum TernaryBool; - if (a == True && b == True) { - return Id::makeFromBool(true); - } - if (a == False || b == False) { - return Id::makeFromBool(false); - } - return Id::makeUndefined(); -}; -using AndExpression = - NARY<2, FV, - SET>; - -// Unary Negation -inline auto unaryNegate = [](TernaryBool a) { - using enum TernaryBool; - switch (a) { - case True: - return Id::makeFromBool(false); - case False: - return Id::makeFromBool(true); - case Undef: - return Id::makeUndefined(); - } - AD_FAIL(); -}; -using UnaryNegateExpression = - NARY<1, FV, - SET>; - -// Unary Minus. -inline auto unaryMinus = makeNumericExpression>(); -using UnaryMinusExpression = - NARY<1, FV>; - -// Multiplication. -inline auto multiply = makeNumericExpression>(); -using MultiplyExpression = NARY<2, FV>; - -// Division. -// -// TODO If `b == 0` this is technically undefined behavior and -// should lead to an expression error in SPARQL. Fix this as soon as we -// introduce the proper semantics for expression errors. -// Update: I checked it, and the standard differentiates between `xsd:decimal` -// (error) and `xsd:float/xsd:double` where we have `NaN` and `inf` results. We -// currently implement the latter behavior. Note: The result of a division in -// SPARQL is always a decimal number, so there is no integer division. -inline auto divide = makeNumericExpression>(); -using DivideExpression = NARY<2, FV>; - -// Addition and subtraction, currently all results are converted to double. -inline auto add = makeNumericExpression>(); -using AddExpression = NARY<2, FV>; - -inline auto subtract = makeNumericExpression>(); -using SubtractExpression = NARY<2, FV>; - -// Basic GeoSPARQL functions (code in util/GeoSparqlHelpers.h). -using LongitudeExpression = - NARY<1, FV, - StringValueGetter>>; -using LatitudeExpression = - NARY<1, FV, - StringValueGetter>>; -using DistExpression = - NARY<2, FV, - StringValueGetter>>; - -// Date functions. -// -inline auto extractYear = [](std::optional d) { - if (!d.has_value()) { - return Id::makeUndefined(); - } else { - return Id::makeFromInt(d->getYear()); - } -}; - -inline auto extractMonth = [](std::optional d) { - // TODO Use the monadic operations for std::optional - if (!d.has_value()) { - return Id::makeUndefined(); - } - auto optionalMonth = d.value().getMonth(); - if (!optionalMonth.has_value()) { - return Id::makeUndefined(); - } - return Id::makeFromInt(optionalMonth.value()); -}; - -inline auto extractDay = [](std::optional d) { - // TODO Use the monadic operations for `std::optional`. - if (!d.has_value()) { - return Id::makeUndefined(); - } - auto optionalDay = d.value().getDay(); - if (!optionalDay.has_value()) { - return Id::makeUndefined(); - } - return Id::makeFromInt(optionalDay.value()); -}; - -using YearExpression = NARY<1, FV>; -using MonthExpression = NARY<1, FV>; -using DayExpression = NARY<1, FV>; - -// String functions. -using StrExpression = NARY<1, FV>; - -// Compute string length. -inline auto strlen = [](const auto& s) -> Id { - return Id::makeFromInt(s.size()); -}; -using StrlenExpression = NARY<1, FV>; - -} // namespace detail - -using detail::AddExpression; -using detail::AndExpression; -using detail::DivideExpression; -using detail::MultiplyExpression; -using detail::OrExpression; -using detail::SubtractExpression; -using detail::UnaryMinusExpression; -using detail::UnaryNegateExpression; - -using detail::DistExpression; -using detail::LatitudeExpression; -using detail::LongitudeExpression; - -using detail::DayExpression; -using detail::MonthExpression; -using detail::YearExpression; - -using detail::StrExpression; -using detail::StrlenExpression; +SparqlExpression::Ptr makeAddExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2); +SparqlExpression::Ptr makeAndExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2); +SparqlExpression::Ptr makeDivideExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2); +SparqlExpression::Ptr makeMultiplyExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2); +SparqlExpression::Ptr makeOrExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2); +SparqlExpression::Ptr makeSubtractExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2); + +SparqlExpression::Ptr makeUnaryMinusExpression(SparqlExpression::Ptr child); +SparqlExpression::Ptr makeUnaryNegateExpression(SparqlExpression::Ptr child); + +SparqlExpression::Ptr makeRoundExpression(SparqlExpression::Ptr child); +SparqlExpression::Ptr makeAbsExpression(SparqlExpression::Ptr child); +SparqlExpression::Ptr makeCeilExpression(SparqlExpression::Ptr child); +SparqlExpression::Ptr makeFloorExpression(SparqlExpression::Ptr child); + +SparqlExpression::Ptr makeDistExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2); +SparqlExpression::Ptr makeLatitudeExpression(SparqlExpression::Ptr child); +SparqlExpression::Ptr makeLongitudeExpression(SparqlExpression::Ptr child); + +SparqlExpression::Ptr makeDayExpression(SparqlExpression::Ptr child); +SparqlExpression::Ptr makeMonthExpression(SparqlExpression::Ptr child); +SparqlExpression::Ptr makeYearExpression(SparqlExpression::Ptr child); + +SparqlExpression::Ptr makeStrExpression(SparqlExpression::Ptr child); +SparqlExpression::Ptr makeStrlenExpression(SparqlExpression::Ptr child); } // namespace sparqlExpression diff --git a/src/engine/sparqlExpressions/NaryExpressionImpl.h b/src/engine/sparqlExpressions/NaryExpressionImpl.h new file mode 100644 index 0000000000..96d7148eb4 --- /dev/null +++ b/src/engine/sparqlExpressions/NaryExpressionImpl.h @@ -0,0 +1,197 @@ +// Copyright 2023, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Kalmbach + +#pragma once + +#include + +#include "engine/sparqlExpressions/NaryExpression.h" +#include "engine/sparqlExpressions/SparqlExpressionGenerators.h" + +namespace sparqlExpression::detail { +template +requires(isOperation) +class NaryExpression : public SparqlExpression { + public: + static constexpr size_t N = NaryOperation::N; + using Children = std::array; + + private: + Children children_; + + public: + // Construct from an array of `N` child expressions. + explicit NaryExpression(Children&& children); + + // Construct from `N` child expressions. Each of the children must have a type + // `std::unique_ptr`. + explicit NaryExpression( + std::convertible_to auto... children) + requires(sizeof...(children) == N) + : NaryExpression{Children{std::move(children)...}} {} + + // __________________________________________________________________________ + ExpressionResult evaluate(EvaluationContext* context) const override; + + // _________________________________________________________________________ + std::span children() override; + + // _________________________________________________________________________ + [[nodiscard]] string getCacheKey( + const VariableToColumnMap& varColMap) const override; + + private: + // Evaluate the `naryOperation` on the `operands` using the `context`. + template + static ExpressionResult evaluateOnChildrenOperands( + NaryOperation naryOperation, EvaluationContext* context, + Operands&&... operands) { + // Perform a more efficient calculation if a specialized function exists + // that matches all operands. + if (isAnySpecializedFunctionPossible(naryOperation._specializedFunctions, + operands...)) { + auto optionalResult = evaluateOnSpecializedFunctionsIfPossible( + naryOperation._specializedFunctions, + std::forward(operands)...); + AD_CORRECTNESS_CHECK(optionalResult); + return std::move(optionalResult.value()); + } + + // We have to first determine the number of results we will produce. + auto targetSize = getResultSize(*context, operands...); + + // The result is a constant iff all the results are constants. + constexpr static bool resultIsConstant = + (... && isConstantResult); + + // The generator for the result of the operation. + auto resultGenerator = + applyOperation(targetSize, naryOperation, context, AD_FWD(operands)...); + + // Compute the result. + using ResultType = typename decltype(resultGenerator)::value_type; + VectorWithMemoryLimit result{context->_allocator}; + result.reserve(targetSize); + std::ranges::move(resultGenerator, std::back_inserter(result)); + + if constexpr (resultIsConstant) { + AD_CORRECTNESS_CHECK(result.size() == 1); + return std::move(result[0]); + } else { + return result; + } + } +}; + +// Takes a `Function` that returns a numeric value (integral or floating point) +// and converts it to a function, that takes the same arguments and returns the +// same result, but the return type is the `NumericValue` variant. +template +struct NumericIdWrapper { + // Note: Sonarcloud suggests `[[no_unique_address]]` for the following member, + // but adding it causes an internal compiler error in Clang 16. + Function function_{}; + Id operator()(auto&&... args) const { + return makeNumericId(function_(AD_FWD(args)...)); + } +}; + +// Takes a `Function` that takes and returns numeric values (integral or +// floating point) and converts it to a function, that takes the same arguments +// and returns the same result, but the arguments and the return type are the +// `NumericValue` variant. +template +inline auto makeNumericExpression() { + return [](const std::same_as auto&... args) { + auto visitor = [](const Ts&... t) { + if constexpr ((... || std::is_same_v)) { + return Id::makeUndefined(); + } else { + return makeNumericId(Function{}(t...)); + } + }; + return std::visit(visitor, args...); + }; +} + +// Two short aliases to make the instantiations more readable. +template +using FV = FunctionAndValueGetters; + +template +using NARY = NaryExpression>; + +// True iff all types `Ts` are `SetOfIntervals`. +inline auto areAllSetOfIntervals = [](const Ts&...) constexpr { + return (... && ad_utility::isSimilar); +}; +template +using SET = SpecializedFunction; + +using ad_utility::SetOfIntervals; + +// The types for the concrete MultiBinaryExpressions and UnaryExpressions. +using TernaryBool = EffectiveBooleanValueGetter::Result; + +// _____________________________________________________________________________ +template +requires(isOperation) +NaryExpression::NaryExpression(Children&& children) + : children_{std::move(children)} {} + +// _____________________________________________________________________________ + +template +requires(isOperation) +ExpressionResult NaryExpression::evaluate( + EvaluationContext* context) const { + auto resultsOfChildren = ad_utility::applyFunctionToEachElementOfTuple( + [context](const auto& child) { return child->evaluate(context); }, + children_); + + // Bind the `evaluateOnChildrenOperands` to a lambda. + auto evaluateOnChildOperandsAsLambda = [](auto&&... args) { + return evaluateOnChildrenOperands(AD_FWD(args)...); + }; + + // A function that only takes several `ExpressionResult`s, + // and evaluates the expression. + auto evaluateOnChildrenResults = std::bind_front( + ad_utility::visitWithVariantsAndParameters, + evaluateOnChildOperandsAsLambda, NaryOperation{}, context); + + return std::apply(evaluateOnChildrenResults, std::move(resultsOfChildren)); +} + +// _____________________________________________________________________________ +template +requires(isOperation) +std::span NaryExpression::children() { + return {children_.data(), children_.size()}; +} + +// __________________________________________________________________________ +template +requires(isOperation) [[nodiscard]] string NaryExpression::getCacheKey( + const VariableToColumnMap& varColMap) const { + string key = typeid(*this).name(); + key += ad_utility::lazyStrJoin( + children_ | std::views::transform([&varColMap](const auto& child) { + return child->getCacheKey(varColMap); + }), + ""); + return key; +} + +// Define a class `Name` that is a strong typedef (via inheritance) from +// `NaryExpresssion`. The strong typedef (vs. a simple `using` +// declaration) is used to improve compiler messages as the resulting class has +// a short and descriptive name. +#define NARY_EXPRESSION(Name, N, X, ...) \ + class Name : public NaryExpression> { \ + using Base = NaryExpression>; \ + using Base::Base; \ + }; + +} // namespace sparqlExpression::detail diff --git a/src/engine/sparqlExpressions/NumericBinaryExpressions.cpp b/src/engine/sparqlExpressions/NumericBinaryExpressions.cpp new file mode 100644 index 0000000000..fca4da662d --- /dev/null +++ b/src/engine/sparqlExpressions/NumericBinaryExpressions.cpp @@ -0,0 +1,99 @@ +// Copyright 2023, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Kalmbach +#include "engine/sparqlExpressions/NaryExpressionImpl.h" + +namespace sparqlExpression { +namespace detail { +// Multiplication. +inline auto multiply = makeNumericExpression>(); +NARY_EXPRESSION(MultiplyExpression, 2, + FV); + +// Division. +// +// TODO If `b == 0` this is technically undefined behavior and +// should lead to an expression error in SPARQL. Fix this as soon as we +// introduce the proper semantics for expression errors. +// Update: I checked it, and the standard differentiates between `xsd:decimal` +// (error) and `xsd:float/xsd:double` where we have `NaN` and `inf` results. We +// currently implement the latter behavior. Note: The result of a division in +// SPARQL is always a decimal number, so there is no integer division. +[[maybe_unused]] inline auto divideImpl = [](auto x, auto y) { + return static_cast(x) / static_cast(y); +}; +inline auto divide = makeNumericExpression(); +NARY_EXPRESSION(DivideExpression, 2, FV); + +// Addition and subtraction, currently all results are converted to double. +inline auto add = makeNumericExpression>(); +NARY_EXPRESSION(AddExpression, 2, FV); + +inline auto subtract = makeNumericExpression>(); +NARY_EXPRESSION(SubtractExpression, 2, + FV); + +// Or +inline auto orLambda = [](TernaryBool a, TernaryBool b) { + using enum TernaryBool; + if (a == True || b == True) { + return Id::makeFromBool(true); + } + if (a == False && b == False) { + return Id::makeFromBool(false); + } + return Id::makeUndefined(); +}; + +NARY_EXPRESSION(OrExpression, 2, + FV, + SET); + +// And +inline auto andLambda = [](TernaryBool a, TernaryBool b) { + using enum TernaryBool; + if (a == True && b == True) { + return Id::makeFromBool(true); + } + if (a == False || b == False) { + return Id::makeFromBool(false); + } + return Id::makeUndefined(); +}; +NARY_EXPRESSION(AndExpression, 2, + FV, + SET); + +} // namespace detail + +using namespace detail; +SparqlExpression::Ptr makeAddExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2) { + return std::make_unique(std::move(child1), std::move(child2)); +} + +SparqlExpression::Ptr makeDivideExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2) { + return std::make_unique(std::move(child1), + std::move(child2)); +} +SparqlExpression::Ptr makeMultiplyExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2) { + return std::make_unique(std::move(child1), + std::move(child2)); +} +SparqlExpression::Ptr makeSubtractExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2) { + return std::make_unique(std::move(child1), + std::move(child2)); +} + +SparqlExpression::Ptr makeAndExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2) { + return std::make_unique(std::move(child1), std::move(child2)); +} +SparqlExpression::Ptr makeOrExpression(SparqlExpression::Ptr child1, + SparqlExpression::Ptr child2) { + return std::make_unique(std::move(child1), std::move(child2)); +} +} // namespace sparqlExpression diff --git a/src/engine/sparqlExpressions/NumericUnaryExpressions.cpp b/src/engine/sparqlExpressions/NumericUnaryExpressions.cpp new file mode 100644 index 0000000000..0b2b99af3e --- /dev/null +++ b/src/engine/sparqlExpressions/NumericUnaryExpressions.cpp @@ -0,0 +1,96 @@ +// Copyright 2023, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Kalmbach + +#include "engine/sparqlExpressions/NaryExpressionImpl.h" + +namespace sparqlExpression { +namespace detail { + +// Unary negation. +inline auto unaryNegate = [](TernaryBool a) { + using enum TernaryBool; + switch (a) { + case True: + return Id::makeFromBool(false); + case False: + return Id::makeFromBool(true); + case Undef: + return Id::makeUndefined(); + } + AD_FAIL(); +}; +NARY_EXPRESSION(UnaryNegateExpression, 1, + FV, + SET); + +// Unary Minus. +inline auto unaryMinus = makeNumericExpression>(); +NARY_EXPRESSION(UnaryMinusExpression, 1, + FV); +// Abs +inline const auto absImpl = [](T num) { return std::abs(num); }; +inline const auto abs = makeNumericExpression(); +NARY_EXPRESSION(AbsExpression, 1, FV); + +// Rounding. +inline const auto roundImpl = [](T num) { + if constexpr (std::is_floating_point_v) { + auto res = std::round(num); + // In SPARQL, negative numbers are rounded towards zero if they lie exactly + // between two integers. + return (num < 0 && std::abs(res - num) == 0.5) ? res + 1 : res; + } else { + return num; + } +}; + +inline const auto round = makeNumericExpression(); +NARY_EXPRESSION(RoundExpression, 1, FV); + +// Ceiling. +inline const auto ceilImpl = [](T num) { + if constexpr (std::is_floating_point_v) { + return std::ceil(num); + } else { + return num; + } +}; +inline const auto ceil = makeNumericExpression(); +NARY_EXPRESSION(CeilExpression, 1, FV); + +// Flooring. +inline const auto floorImpl = [](T num) { + if constexpr (std::is_floating_point_v) { + return std::floor(num); + } else { + return num; + } +}; +inline const auto floor = makeNumericExpression(); +using FloorExpression = NARY<1, FV>; +} // namespace detail + +using namespace detail; +SparqlExpression::Ptr makeRoundExpression(SparqlExpression::Ptr child) { + return std::make_unique(std::move(child)); +} +SparqlExpression::Ptr makeAbsExpression(SparqlExpression::Ptr child) { + return std::make_unique(std::move(child)); +} +SparqlExpression::Ptr makeCeilExpression(SparqlExpression::Ptr child) { + return std::make_unique(std::move(child)); +} +SparqlExpression::Ptr makeFloorExpression(SparqlExpression::Ptr child) { + return std::make_unique(std::move(child)); +} + +SparqlExpression::Ptr makeUnaryMinusExpression(SparqlExpression::Ptr child) { + return std::make_unique(std::move(child)); +} + +SparqlExpression::Ptr makeUnaryNegateExpression(SparqlExpression::Ptr child) { + return std::make_unique(std::move(child)); +} + +} // namespace sparqlExpression diff --git a/src/engine/sparqlExpressions/RegexExpression.cpp b/src/engine/sparqlExpressions/RegexExpression.cpp index 09fc33d01c..41bef36569 100644 --- a/src/engine/sparqlExpressions/RegexExpression.cpp +++ b/src/engine/sparqlExpressions/RegexExpression.cpp @@ -82,7 +82,7 @@ RegexExpression::RegexExpression( SparqlExpression::Ptr child, SparqlExpression::Ptr regex, std::optional optionalFlags) : child_{std::move(child)} { - if (dynamic_cast(child_.get())) { + if (child_->isStrExpression()) { child_ = std::move(std::move(*child_).moveChildrenOut().at(0)); childIsStrExpression_ = true; } diff --git a/src/engine/sparqlExpressions/SparqlExpression.h b/src/engine/sparqlExpressions/SparqlExpression.h index d2327d1d48..bb1d61e352 100644 --- a/src/engine/sparqlExpressions/SparqlExpression.h +++ b/src/engine/sparqlExpressions/SparqlExpression.h @@ -138,6 +138,10 @@ class SparqlExpression { // implementation returns `false`. virtual bool isConstantExpression() const { return false; } + // Returns true iff this expression is a STR(...) expression. Default + // implementation returns `false`. + virtual bool isStrExpression() const { return false; } + // __________________________________________________________________________ virtual ~SparqlExpression() = default; diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp new file mode 100644 index 0000000000..5ea92e96bc --- /dev/null +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -0,0 +1,29 @@ +// Copyright 2023, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Kalmbach +#include "engine/sparqlExpressions/NaryExpressionImpl.h" +namespace sparqlExpression { +namespace detail { +// String functions. +NARY_EXPRESSION(StrExpressionImpl, 1, FV); + +class StrExpression : public StrExpressionImpl { + using StrExpressionImpl::StrExpressionImpl; + bool isStrExpression() const override { return true; } +}; + +// Compute string length. +inline auto strlen = [](std::string_view s) { + return Id::makeFromInt(s.size()); +}; +NARY_EXPRESSION(StrlenExpression, 1, FV); + +} // namespace detail +using namespace detail; +SparqlExpression::Ptr makeStrExpression(SparqlExpression::Ptr child) { + return std::make_unique(std::move(child)); +} +SparqlExpression::Ptr makeStrlenExpression(SparqlExpression::Ptr child) { + return std::make_unique(std::move(child)); +} +} // namespace sparqlExpression diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index bcca18c992..5f0f8bac8b 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -78,16 +78,14 @@ ExpressionPtr Visitor::processIriFunctionCall( iriView.remove_suffix(1); if (iriView == "distance") { checkNumArgs("geof:", iriView, 2); - return createExpression( - std::move(argList[0]), std::move(argList[1])); + return sparqlExpression::makeDistExpression(std::move(argList[0]), + std::move(argList[1])); } else if (iriView == "longitude") { checkNumArgs("geof:", iriView, 1); - return createExpression( - std::move(argList[0])); + return sparqlExpression::makeLongitudeExpression(std::move(argList[0])); } else if (iriView == "latitude") { checkNumArgs("geof:", iriView, 1); - return createExpression( - std::move(argList[0])); + return sparqlExpression::makeLatitudeExpression(std::move(argList[0])); } } reportNotSupported(ctx, "Function \"" + iri + "\" is"); @@ -1303,11 +1301,10 @@ ExpressionPtr Visitor::visit(Parser::ConditionalOrExpressionContext* ctx) { auto children = visitVector(ctx->conditionalAndExpression()); AD_CONTRACT_CHECK(!children.empty()); auto result = std::move(children.front()); - using C = sparqlExpression::OrExpression::Children; std::for_each(children.begin() + 1, children.end(), [&result](ExpressionPtr& ptr) { - result = std::make_unique( - C{std::move(result), std::move(ptr)}); + result = sparqlExpression::makeOrExpression(std::move(result), + std::move(ptr)); }); result->descriptor() = ctx->getText(); return result; @@ -1318,11 +1315,10 @@ ExpressionPtr Visitor::visit(Parser::ConditionalAndExpressionContext* ctx) { auto children = visitVector(ctx->valueLogical()); AD_CONTRACT_CHECK(!children.empty()); auto result = std::move(children.front()); - using C = sparqlExpression::AndExpression::Children; std::for_each(children.begin() + 1, children.end(), [&result](ExpressionPtr& ptr) { - result = std::make_unique( - C{std::move(result), std::move(ptr)}); + result = sparqlExpression::makeAndExpression( + std::move(result), std::move(ptr)); }); result->descriptor() = ctx->getText(); return result; @@ -1379,11 +1375,11 @@ ExpressionPtr Visitor::visit(Parser::AdditiveExpressionContext* ctx) { visitVector(ctx->multiplicativeExpressionWithSign())) { switch (signAndExpression.operator_) { case Operator::Plus: - result = createExpression( + result = sparqlExpression::makeAddExpression( std::move(result), std::move(signAndExpression.expression_)); break; case Operator::Minus: - result = createExpression( + result = sparqlExpression::makeSubtractExpression( std::move(result), std::move(signAndExpression.expression_)); break; default: @@ -1446,11 +1442,11 @@ Visitor::OperatorAndExpression Visitor::visit( visitVector(ctx->multiplyOrDivideExpression())) { switch (opAndExp.operator_) { case Operator::Multiply: - expression = createExpression( + expression = sparqlExpression::makeMultiplyExpression( std::move(expression), std::move(opAndExp.expression_)); break; case Operator::Divide: - expression = createExpression( + expression = sparqlExpression::makeDivideExpression( std::move(expression), std::move(opAndExp.expression_)); break; default: @@ -1468,11 +1464,11 @@ ExpressionPtr Visitor::visit(Parser::MultiplicativeExpressionContext* ctx) { visitVector(ctx->multiplyOrDivideExpression())) { switch (opAndExp.operator_) { case Operator::Multiply: - result = createExpression( + result = sparqlExpression::makeMultiplyExpression( std::move(result), std::move(opAndExp.expression_)); break; case Operator::Divide: - result = createExpression( + result = sparqlExpression::makeDivideExpression( std::move(result), std::move(opAndExp.expression_)); break; default: @@ -1505,11 +1501,9 @@ Visitor::OperatorAndExpression Visitor::visit( ExpressionPtr Visitor::visit(Parser::UnaryExpressionContext* ctx) { auto child = visit(ctx->primaryExpression()); if (ctx->children[0]->getText() == "-") { - return createExpression( - std::move(child)); + return sparqlExpression::makeUnaryMinusExpression(std::move(child)); } else if (ctx->children[0]->getText() == "!") { - return createExpression( - std::move(child)); + return sparqlExpression::makeUnaryNegateExpression(std::move(child)); } else { // no sign or an explicit '+' return child; @@ -1575,24 +1569,34 @@ ExpressionPtr Visitor::visit([[maybe_unused]] Parser::BuiltInCallContext* ctx) { auto functionName = ad_utility::getLowercase(ctx->children[0]->getText()); auto argList = visitVector(ctx->expression()); using namespace sparqlExpression; - // Create the expression using the matching lambda from `NaryExpression.h`. - auto createUnaryExpression = [this, &argList]() { + // Create the expression using the matching factory function from + // `NaryExpression.h`. + auto createUnary = [&argList](Function function) + requires std::is_invocable_r_v { AD_CONTRACT_CHECK(argList.size() == 1); - return createExpression(std::move(argList[0])); + return function(std::move(argList[0])); }; if (functionName == "str") { - return createUnaryExpression.template operator()(); + return createUnary(&makeStrExpression); } else if (functionName == "strlen") { - return createUnaryExpression.template operator()(); + return createUnary(&makeStrlenExpression); } else if (functionName == "year") { - return createUnaryExpression.template operator()(); + return createUnary(&makeYearExpression); } else if (functionName == "month") { - return createUnaryExpression.template operator()(); + return createUnary(&makeMonthExpression); } else if (functionName == "day") { - return createUnaryExpression.template operator()(); + return createUnary(&makeDayExpression); } else if (functionName == "rand") { AD_CONTRACT_CHECK(argList.empty()); return std::make_unique(); + } else if (functionName == "ceil") { + return createUnary(&makeCeilExpression); + } else if (functionName == "abs") { + return createUnary(&makeAbsExpression); + } else if (functionName == "round") { + return createUnary(&makeRoundExpression); + } else if (functionName == "floor") { + return createUnary(&makeFloorExpression); } else { reportError( ctx, diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h index 402d9f7819..089882460e 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.h +++ b/src/parser/sparqlParser/SparqlQleverVisitor.h @@ -452,7 +452,7 @@ class SparqlQleverVisitor { // Process an IRI function call. This is used in both `visitFunctionCall` and // `visitIriOrFunction`. - [[nodiscard]] ExpressionPtr processIriFunctionCall( + [[nodiscard]] static ExpressionPtr processIriFunctionCall( const std::string& iri, std::vector argList, antlr4::ParserRuleContext*); diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index abb66a0b4e..6ea0946b3e 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -708,14 +708,14 @@ TEST(GroupBy, GroupedVariableInExpressions) { using namespace sparqlExpression; // Create `Alias` object for `(AVG(?a + ?b) AS ?x)`. - auto sum = make(make(varA), - make(varB)); + auto sum = makeAddExpression(make(varA), + make(varB)); auto avg = make(false, std::move(sum)); auto alias1 = Alias{SparqlExpressionPimpl{std::move(avg), "avg(?a + ?b"}, Variable{"?x"}}; // Create `Alias` object for `(?a + COUNT(?b) AS ?y)`. - auto expr2 = make( + auto expr2 = makeAddExpression( make(varA), make(false, make(varB))); auto alias2 = Alias{SparqlExpressionPimpl{std::move(expr2), "?a + COUNT(?b)"}, @@ -770,14 +770,14 @@ TEST(GroupBy, AliasResultReused) { using namespace sparqlExpression; // Create `Alias` object for `(AVG(?a + ?b) AS ?x)`. - auto sum = make(make(varA), - make(varB)); + auto sum = makeAddExpression(make(varA), + make(varB)); auto avg = make(false, std::move(sum)); auto alias1 = Alias{SparqlExpressionPimpl{std::move(avg), "avg(?a + ?b"}, Variable{"?x"}}; // Create `Alias` object for `(?a + COUNT(?b) AS ?y)`. - auto expr2 = make( + auto expr2 = makeAddExpression( make(Variable{"?x"}), make(false, make(varB))); auto alias2 = Alias{SparqlExpressionPimpl{std::move(expr2), "?x + COUNT(?b)"}, diff --git a/test/RegexExpressionTest.cpp b/test/RegexExpressionTest.cpp index 2884d3168d..1baa193dc0 100644 --- a/test/RegexExpressionTest.cpp +++ b/test/RegexExpressionTest.cpp @@ -32,8 +32,7 @@ RegexExpression makeRegexExpression( SparqlExpression::Ptr variableExpression = std::make_unique(Variable{std::move(variable)}); if (childAsStr) { - variableExpression = - std::make_unique(std::move(variableExpression)); + variableExpression = makeStrExpression(std::move(variableExpression)); } auto regexExpression = std::make_unique(lit(regex)); std::optional flagsExpression = std::nullopt; diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp index a3b22dbd5c..a4ba809028 100644 --- a/test/SparqlAntlrParserTest.cpp +++ b/test/SparqlAntlrParserTest.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include "./SparqlExpressionTestHelpers.h" @@ -539,32 +540,6 @@ TEST(SparqlParser, GroupCondition) { "(?test)")); } -TEST(SparqlParser, FunctionCall) { - auto expectFunctionCall = ExpectCompleteParse<&Parser::functionCall>{}; - auto expectFunctionCallFails = ExpectParseFails<&Parser::functionCall>{}; - - // Correct function calls. Check that the parser picks the correct expression. - expectFunctionCall( - "(?a)", - m::ExpressionWithType()); - expectFunctionCall( - "(?a)", - m::ExpressionWithType()); - expectFunctionCall( - "(?a, ?b)", - m::ExpressionWithType()); - - // Wrong number of arguments. - expectFunctionCallFails( - "(?a)"); - // Unknown function with the `geof:` prefix. - expectFunctionCallFails( - "()"); - // Prefix for which no function is known. - expectFunctionCallFails( - "()"); -} - TEST(SparqlParser, GroupClause) { expectCompleteParse( parse<&Parser::groupClause>( @@ -1227,8 +1202,6 @@ TEST(SparqlParser, Query) { } // Some helper matchers for the `builtInCall` test below. -// TODO The first of these matchers can probably also be used to -// test the parsing of other expressions more cleanly. namespace builtInCallTestHelpers { // Return a matcher that checks whether a given `SparqlExpression::Ptr` actually // (via `dynamic_cast`) points to an object of type `Expression`, and that this @@ -1241,17 +1214,48 @@ auto matchPtr(Matcher matcher = Matcher{}) } // Return a matcher that checks whether a given `SparqlExpression::Ptr` points -// (via `dynamic_cast`) to an object of type `UnaryExpression` that has a single -// child expression that is the variable `x`. (e.g. "COUNT(?x)" or -// "STRLEN(?x)". -template -auto matchUnaryX() +// (via `dynamic_cast`) to an object of the same type that a call to the +// `makeFunction` yields. The matcher also checks that the expression's children +// match the `childrenMatchers`. +auto matchNaryWithChildrenMatchers(auto makeFunction, + auto&&... childrenMatchers) + -> ::testing::Matcher { + using namespace sparqlExpression; + auto typeIdLambda = [](const auto& ptr) { + return std::type_index{typeid(*ptr)}; + }; + + auto makeDummyChild = [](auto&&) -> SparqlExpression::Ptr { + return std::make_unique(Variable{"?x"}); + }; + auto expectedTypeIndex = + typeIdLambda(makeFunction(makeDummyChild(childrenMatchers)...)); + ::testing::Matcher typeIdMatcher = + ::testing::ResultOf(typeIdLambda, ::testing::Eq(expectedTypeIndex)); + return ::testing::AllOf(typeIdMatcher, + ::testing::Pointee(AD_PROPERTY( + SparqlExpression, childrenForTesting, + ::testing::ElementsAre(childrenMatchers...)))); +} + +// Return a matcher that checks whether a given `SparqlExpression::Ptr` points +// (via `dynamic_cast`) to an object of the same type that a call to the +// `makeFunction` yields. The matcher also checks that the expression's children +// are the `variables`. +auto matchNary(auto makeFunction, + ad_utility::SimilarTo auto&&... variables) -> ::testing::Matcher { using namespace sparqlExpression; - auto varX = matchPtr( - AD_PROPERTY(VariableExpression, value, testing::Eq(Variable("?x")))); - return matchPtr(AD_PROPERTY( - SparqlExpression, childrenForTesting, ::testing::ElementsAre(varX))); + auto variableMatcher = [](const Variable& var) { + return matchPtr( + AD_PROPERTY(VariableExpression, value, testing::Eq(var))); + }; + return matchNaryWithChildrenMatchers(makeFunction, + variableMatcher(variables)...); +} +auto matchUnary(auto makeFunction) + -> ::testing::Matcher { + return matchNary(makeFunction, Variable{"?x"}); } } // namespace builtInCallTestHelpers @@ -1261,10 +1265,15 @@ TEST(SparqlParser, builtInCall) { using namespace builtInCallTestHelpers; auto expectBuiltInCall = ExpectCompleteParse<&Parser::builtInCall>{}; auto expectFails = ExpectParseFails<&Parser::builtInCall>{}; - expectBuiltInCall("StrLEN(?x)", matchUnaryX()); - expectBuiltInCall("year(?x)", matchUnaryX()); - expectBuiltInCall("month(?x)", matchUnaryX()); - expectBuiltInCall("day(?x)", matchUnaryX()); + expectBuiltInCall("StrLEN(?x)", matchUnary(&makeStrlenExpression)); + expectBuiltInCall("StR(?x)", matchUnary(&makeStrExpression)); + expectBuiltInCall("year(?x)", matchUnary(&makeYearExpression)); + expectBuiltInCall("month(?x)", matchUnary(&makeMonthExpression)); + expectBuiltInCall("day(?x)", matchUnary(&makeDayExpression)); + expectBuiltInCall("abs(?x)", matchUnary(&makeAbsExpression)); + expectBuiltInCall("ceil(?x)", matchUnary(&makeCeilExpression)); + expectBuiltInCall("floor(?x)", matchUnary(&makeFloorExpression)); + expectBuiltInCall("round(?x)", matchUnary(&makeRoundExpression)); expectBuiltInCall("RAND()", matchPtr()); // The following three cases delegate to a separate parsing function, so we @@ -1274,3 +1283,129 @@ TEST(SparqlParser, builtInCall) { expectBuiltInCall("LANG(?x)", matchPtr()); expectFails("SHA512(?x)"); } + +TEST(SparqlParser, unaryExpression) { + using namespace sparqlExpression; + using namespace builtInCallTestHelpers; + auto expectUnary = ExpectCompleteParse<&Parser::unaryExpression>{}; + + expectUnary("-?x", matchUnary(&makeUnaryMinusExpression)); + expectUnary("!?x", matchUnary(&makeUnaryNegateExpression)); +} + +TEST(SparqlParser, multiplicativeExpression) { + using namespace sparqlExpression; + using namespace builtInCallTestHelpers; + Variable x{"?x"}; + Variable y{"?y"}; + Variable z{"?z"}; + auto expectMultiplicative = + ExpectCompleteParse<&Parser::multiplicativeExpression>{}; + expectMultiplicative("?x * ?y", matchNary(&makeMultiplyExpression, x, y)); + expectMultiplicative("?y / ?x", matchNary(&makeDivideExpression, y, x)); + expectMultiplicative( + "?z * ?y / abs(?x)", + matchNaryWithChildrenMatchers(&makeDivideExpression, + matchNary(&makeMultiplyExpression, z, y), + matchUnary(&makeAbsExpression))); + expectMultiplicative( + "?y / ?z * abs(?x)", + matchNaryWithChildrenMatchers(&makeMultiplyExpression, + matchNary(&makeDivideExpression, y, z), + matchUnary(&makeAbsExpression))); +} + +// Return a matcher for an `OperatorAndExpression`. +::testing::Matcher +matchOperatorAndExpression( + SparqlQleverVisitor::Operator op, + const ::testing::Matcher& + expressionMatcher) { + using OpAndExp = SparqlQleverVisitor::OperatorAndExpression; + return ::testing::AllOf(AD_FIELD(OpAndExp, operator_, ::testing::Eq(op)), + AD_FIELD(OpAndExp, expression_, expressionMatcher)); +} + +TEST(SparqlParser, multiplicativeExpressionLeadingSignButNoSpaceContext) { + using namespace sparqlExpression; + using namespace builtInCallTestHelpers; + Variable x{"?x"}; + Variable y{"?y"}; + Variable z{"?z"}; + using Op = SparqlQleverVisitor::Operator; + auto expectMultiplicative = ExpectCompleteParse< + &Parser::multiplicativeExpressionWithLeadingSignButNoSpace>{}; + auto matchVariableExpression = [](Variable var) { + return matchPtr( + AD_PROPERTY(VariableExpression, value, ::testing::Eq(var))); + }; + auto matchIdExpression = [](Id id) { + return matchPtr( + AD_PROPERTY(IdExpression, value, ::testing::Eq(id))); + }; + + expectMultiplicative("-3 * ?y", + matchOperatorAndExpression( + Op::Minus, matchNaryWithChildrenMatchers( + &makeMultiplyExpression, + matchIdExpression(Id::makeFromInt(3)), + matchVariableExpression(y)))); + expectMultiplicative( + "-3.7 / ?y", + matchOperatorAndExpression( + Op::Minus, + matchNaryWithChildrenMatchers( + &makeDivideExpression, matchIdExpression(Id::makeFromDouble(3.7)), + matchVariableExpression(y)))); + + expectMultiplicative("+5 * ?y", + matchOperatorAndExpression( + Op::Plus, matchNaryWithChildrenMatchers( + &makeMultiplyExpression, + matchIdExpression(Id::makeFromInt(5)), + matchVariableExpression(y)))); + expectMultiplicative( + "+3.9 / ?y", matchOperatorAndExpression( + Op::Plus, matchNaryWithChildrenMatchers( + &makeDivideExpression, + matchIdExpression(Id::makeFromDouble(3.9)), + matchVariableExpression(y)))); + expectMultiplicative( + "-3.2 / abs(?x) * ?y", + matchOperatorAndExpression( + Op::Minus, matchNaryWithChildrenMatchers( + &makeMultiplyExpression, + matchNaryWithChildrenMatchers( + &makeDivideExpression, + matchIdExpression(Id::makeFromDouble(3.2)), + matchUnary(&makeAbsExpression)), + matchVariableExpression(y)))); +} + +TEST(SparqlParser, FunctionCall) { + using namespace sparqlExpression; + using namespace builtInCallTestHelpers; + auto expectFunctionCall = ExpectCompleteParse<&Parser::functionCall>{}; + auto expectFunctionCallFails = ExpectParseFails<&Parser::functionCall>{}; + + // Correct function calls. Check that the parser picks the correct expression. + expectFunctionCall( + "(?x)", + matchUnary(&makeLatitudeExpression)); + expectFunctionCall( + "(?x)", + matchUnary(&makeLongitudeExpression)); + expectFunctionCall( + "(?a, ?b)", + matchNary(&makeDistExpression, Variable{"?a"}, Variable{"?b"})); + + // Wrong number of arguments. + expectFunctionCallFails( + "(?a)"); + // Unknown function with the `geof:` prefix. + expectFunctionCallFails( + "()"); + // Prefix for which no function is known. + expectFunctionCallFails( + "()"); +} diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index 2acb727e11..98dd81c01d 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -35,6 +35,9 @@ auto I = ad_utility::testing::IntId; auto Voc = ad_utility::testing::VocabId; auto U = Id::makeUndefined(); +using Ids = std::vector; +using Strings = std::vector; + // Test allocator (the inputs to our `SparqlExpression`s are // `VectorWithMemoryLimit`s, and these require an `AllocatorWithLimit`). // @@ -90,8 +93,8 @@ auto checkResultsEqual = []( // Assert that the given `NaryExpression` with the given `operands` has the // `expected` result. -template -auto testNaryExpression = [](SingleExpressionResult auto& expected, +auto testNaryExpression = [](auto&& makeExpression, + SingleExpressionResult auto&& expected, SingleExpressionResult auto&&... operands) { ad_utility::AllocatorWithLimit alloc{ ad_utility::makeAllocationMemoryLeftThreadsafeObject(1000)}; @@ -125,7 +128,8 @@ auto testNaryExpression = [](SingleExpressionResult auto& expected, std::array children{ std::make_unique(ExpressionResult{clone(operands)})...}; - auto expression = NaryExpression{std::move(children)}; + auto expressionPtr = std::apply(makeExpression, std::move(children)); + auto& expression = *expressionPtr; ExpressionResult result = expression.evaluate(&context); @@ -137,32 +141,35 @@ auto testNaryExpression = [](SingleExpressionResult auto& expected, // Assert that the given commutative binary expression has the `expected` result // in both orders of the operands `op1` and `op2`. -template auto testBinaryExpressionCommutative = - [](const SingleExpressionResult auto& expected, + [](auto makeFunction, const SingleExpressionResult auto& expected, const SingleExpressionResult auto& op1, const SingleExpressionResult auto& op2, source_location l = source_location::current()) { auto t = generateLocationTrace(l); - testNaryExpression(expected, op1, op2); - testNaryExpression(expected, op2, op1); + testNaryExpression(makeFunction, expected, op1, op2); + testNaryExpression(makeFunction, expected, op2, op1); }; -template -auto testBinaryExpression = [](const SingleExpressionResult auto& expected, +auto testBinaryExpression = [](auto makeExpression, + const SingleExpressionResult auto& expected, const SingleExpressionResult auto& op1, const SingleExpressionResult auto& op2, source_location l = source_location::current()) { auto t = generateLocationTrace(l); - testNaryExpression(expected, op1, op2); + testNaryExpression(makeExpression, expected, op1, op2); }; -auto testOr = testBinaryExpressionCommutative; -auto testAnd = testBinaryExpressionCommutative; -auto testPlus = testBinaryExpressionCommutative; -auto testMultiply = testBinaryExpressionCommutative; -auto testMinus = testBinaryExpression; -auto testDivide = testBinaryExpression; +auto testOr = + std::bind_front(testBinaryExpressionCommutative, &makeOrExpression); +auto testAnd = + std::bind_front(testBinaryExpressionCommutative, &makeAndExpression); +auto testPlus = + std::bind_front(testBinaryExpressionCommutative, &makeAddExpression); +auto testMultiply = + std::bind_front(testBinaryExpressionCommutative, &makeMultiplyExpression); +auto testMinus = std::bind_front(testBinaryExpression, &makeSubtractExpression); +auto testDivide = std::bind_front(testBinaryExpression, &makeDivideExpression); } // namespace // Test `AndExpression` and `OrExpression`. @@ -210,6 +217,9 @@ TEST(SparqlExpression, logicalOperators) { testOr(dOrS, d, s); testOr(sOrI, i, s); + using S = ad_utility::SetOfIntervals; + testOr(S{{{0, 6}}}, S{{{0, 4}}}, S{{{3, 6}}}); + testAnd(b, b, allTrue); testAnd(dAsBool, d, allTrue); testAnd(allFalse, b, allFalse); @@ -219,6 +229,7 @@ TEST(SparqlExpression, logicalOperators) { testAnd(dAndI, d, i); testAnd(dAndS, d, s); testAnd(sAndI, s, i); + testAnd(S{{{3, 4}}}, S{{{0, 4}}}, S{{{3, 6}}}); testOr(allTrue, b, B(true)); testOr(b, b, B(false)); @@ -324,26 +335,26 @@ TEST(SparqlExpression, arithmeticOperators) { // // TODO: The tests above could also be simplified (and made much more readable) // in this vein. -template -auto testUnaryExpression = [](std::vector&& operand, - std::vector&& expected, - source_location l = source_location::current()) { - auto trace = generateLocationTrace(l); - V operandV{std::make_move_iterator(operand.begin()), - std::make_move_iterator(operand.end()), alloc}; - V expectedV{std::make_move_iterator(expected.begin()), - std::make_move_iterator(expected.end()), alloc}; - testNaryExpression(expectedV, operandV); -}; +auto testUnaryExpression = + []( + auto makeFunction, std::vector operand, + std::vector expected, + source_location l = source_location::current()) { + auto trace = generateLocationTrace(l); + V operandV{std::make_move_iterator(operand.begin()), + std::make_move_iterator(operand.end()), alloc}; + V expectedV{std::make_move_iterator(expected.begin()), + std::make_move_iterator(expected.end()), alloc}; + testNaryExpression(makeFunction, expectedV, operandV); + }; // Test `YearExpression`, `MonthExpression`, and `DayExpression`. TEST(SparqlExpression, dateOperators) { // Helper function that asserts that the date operators give the expected // result on the given date. - auto checkYear = testUnaryExpression; - auto checkMonth = testUnaryExpression; - auto checkDay = testUnaryExpression; + auto checkYear = std::bind_front(testUnaryExpression, &makeYearExpression); + auto checkMonth = std::bind_front(testUnaryExpression, &makeMonthExpression); + auto checkDay = std::bind_front(testUnaryExpression, &makeDayExpression); auto check = [&checkYear, &checkMonth, &checkDay]( const DateOrLargeYear& date, std::optional expectedYear, std::optional expectedMonth, @@ -357,9 +368,9 @@ TEST(SparqlExpression, dateOperators) { return Id::makeUndefined(); } }; - checkYear({Id::makeFromDate(date)}, {optToId(expectedYear)}); - checkMonth({Id::makeFromDate(date)}, {optToId(expectedMonth)}); - checkDay({Id::makeFromDate(date)}, {optToId(expectedDay)}); + checkYear(Ids{Id::makeFromDate(date)}, Ids{optToId(expectedYear)}); + checkMonth(Ids{Id::makeFromDate(date)}, Ids{optToId(expectedMonth)}); + checkDay(Ids{Id::makeFromDate(date)}, Ids{optToId(expectedDay)}); }; using D = DateOrLargeYear; @@ -383,70 +394,89 @@ TEST(SparqlExpression, dateOperators) { check(D::parseXsdDate("-12345-03-04"), -12345, 1, 1); // Invalid inputs for date expressions. - checkYear({Id::makeFromInt(42)}, {Id::makeUndefined()}); - checkMonth({Id::makeFromInt(42)}, {Id::makeUndefined()}); - checkDay({Id::makeFromInt(42)}, {Id::makeUndefined()}); - testUnaryExpression({Id::makeFromDouble(42.0)}, - {Id::makeUndefined()}); - testUnaryExpression({Id::makeFromBool(false)}, - {Id::makeUndefined()}); - testUnaryExpression({"noDate"}, - {Id::makeUndefined()}); + checkYear(Ids{Id::makeFromInt(42)}, Ids{Id::makeUndefined()}); + checkMonth(Ids{Id::makeFromInt(42)}, Ids{Id::makeUndefined()}); + checkDay(Ids{Id::makeFromInt(42)}, Ids{Id::makeUndefined()}); + auto testYear = std::bind_front(testUnaryExpression, &makeYearExpression); + testYear(Ids{Id::makeFromDouble(42.0)}, Ids{U}); + testYear(Ids{Id::makeFromBool(false)}, Ids{U}); + testYear(Strings{"noDate"}, Ids{U}); } // Test `StrlenExpression` and `StrExpression`. -auto checkStrlen = testUnaryExpression; -template -auto checkStr = [](std::vector&& operand, - std::vector&& expected) { - testUnaryExpression( - std::move(operand), std::move(expected)); -}; +auto checkStrlen = std::bind_front(testUnaryExpression, &makeStrlenExpression); +auto checkStr = std::bind_front(testUnaryExpression, &makeStrExpression); TEST(SparqlExpression, stringOperators) { - checkStrlen({"one", "two", "three", ""}, {I(3), I(3), I(5), I(0)}); - checkStr({I(1), I(2), I(3)}, {"1", "2", "3"}); - checkStr({D(-1.0), D(1.0), D(2.34)}, {"-1", "1", "2.34"}); - checkStr({B(true), B(false), B(true)}, {"true", "false", "true"}); - checkStr({"one", "two", "three"}, {"one", "two", "three"}); + checkStrlen(Strings{"one", "two", "three", ""}, Ids{I(3), I(3), I(5), I(0)}); + checkStr(Ids{I(1), I(2), I(3)}, Strings{"1", "2", "3"}); + checkStr(Ids{D(-1.0), D(1.0), D(2.34)}, Strings{"-1", "1", "2.34"}); + checkStr(Ids{B(true), B(false), B(true)}, Strings{"true", "false", "true"}); + checkStr(Strings{"one", "two", "three"}, Strings{"one", "two", "three"}); } // _____________________________________________________________________________________ TEST(SparqlExpression, unaryNegate) { - auto checkNegate = testUnaryExpression; - auto checkNegateStr = - testUnaryExpression; + auto checkNegate = + std::bind_front(testUnaryExpression, &makeUnaryNegateExpression); // Zero and NaN are considered to be false, so their negation is true checkNegate( - {B(true), B(false), I(0), I(3), D(0), D(12), D(naN), U}, - {B(false), B(true), B(true), B(false), B(true), B(false), B(true), U}); + Ids{B(true), B(false), I(0), I(3), D(0), D(12), D(naN), U}, + Ids{B(false), B(true), B(true), B(false), B(true), B(false), B(true), U}); // Empty strings are considered to be true. - checkNegateStr({"true", "false", "", "blibb"}, - {B(false), B(false), B(true), B(false)}); - - // Complete the test coverage for normally unreachable code. - ASSERT_ANY_THROW(sparqlExpression::detail::unaryNegate( - static_cast(42))); + checkNegate(Strings{"true", "false", "", "blibb"}, + Ids{B(false), B(false), B(true), B(false)}); } // _____________________________________________________________________________________ TEST(SparqlExpression, unaryMinus) { - auto checkMinus = testUnaryExpression; - auto checkMinusStr = - testUnaryExpression; - checkMinus({B(true), B(false), I(0), I(3), D(0), D(12.8), D(naN), U, Voc(6)}, - {I(-1), I(0), I(0), I(-3), D(-0.0), D(-12.8), D(-naN), U, U}); - checkMinusStr({"true", "false", "", ""}, {U, U, U, U}); + auto checkMinus = + std::bind_front(testUnaryExpression, &makeUnaryMinusExpression); + // Zero and NaN are considered to be false, so their negation is true + checkMinus( + Ids{B(true), B(false), I(0), I(3), D(0), D(12.8), D(naN), U, Voc(6)}, + Ids{I(-1), I(0), I(0), I(-3), D(-0.0), D(-12.8), D(-naN), U, U}); + checkMinus(Strings{"true", "false", "", ""}, Ids{U, U, U, U}); +} + +TEST(SparqlExpression, ceilFloorAbsRound) { + auto bindUnary = [](auto f) { + return std::bind_front(testUnaryExpression, f); + }; + auto checkFloor = bindUnary(&makeFloorExpression); + auto checkAbs = bindUnary(&makeAbsExpression); + auto checkRound = bindUnary(&makeRoundExpression); + auto checkCeil = bindUnary(&makeCeilExpression); + + std::vector input{B(true), B(false), I(-3), I(0), I(3), + D(-13.6), D(-0.5), D(-0.0), D(0.0), D(0.5), + D(1.8), Voc(6), U}; + std::vector abs{I(1), I(0), I(3), I(0), I(3), D(13.6), D(0.5), + D(0.0), D(0.0), D(0.5), D(1.8), U, U}; + std::vector floor{I(1), I(0), I(-3), I(0), I(3), + D(-14.0), D(-1.0), D(-0.0), D(0.0), D(0.0), + D(1.0), U, U}; + std::vector ceil{I(1), I(0), I(-3), I(0), I(3), D(-13.0), D(-0.0), + D(-0.0), D(0.0), D(1.0), D(2.0), U, U}; + std::vector round{I(1), I(0), I(-3), I(0), I(3), + D(-14.0), D(-0.0), D(-0.0), D(0.0), D(1.0), + D(2.0), U, U}; + + checkAbs(input, abs); + checkFloor(input, floor); + checkCeil(input, ceil); + checkRound(input, round); } // ________________________________________________________________________________________ TEST(SparqlExpression, geoSparqlExpressions) { - auto checkLat = testUnaryExpression; - auto checkLong = testUnaryExpression; - auto checkDist = testBinaryExpressionCommutative; + auto checkLat = std::bind_front(testUnaryExpression, &makeLatitudeExpression); + auto checkLong = + std::bind_front(testUnaryExpression, &makeLongitudeExpression); + auto checkDist = std::bind_front(testNaryExpression, &makeDistExpression); - checkLat({"POINT(24.3 26.8)", "NotAPoint"}, {D(26.8), U}); - checkLong({"POINT(24.3 26.8)", "NotAPoint"}, {D(24.3), U}); + checkLat(Strings{"POINT(24.3 26.8)", "NotAPoint"}, Ids{D(26.8), U}); + checkLong(Strings{"POINT(24.3 26.8)", "NotAPoint"}, Ids{D(24.3), U}); checkDist(D(0.0), "POINT(24.3 26.8)"s, "POINT(24.3 26.8)"s); checkDist(U, "POINT(24.3 26.8)"s, "NotAPoint"s); checkDist(U, "NotAPoint"s, "POINT(24.3 26.8)"s);