Skip to content

Commit

Permalink
implement built-in function call/xsd:datatype() conversion to xsd:dat…
Browse files Browse the repository at this point in the history
…eTime/date
  • Loading branch information
realHannes committed Feb 20, 2025
1 parent caaf76c commit 87a1b72
Show file tree
Hide file tree
Showing 9 changed files with 209 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/engine/sparqlExpressions/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ add_library(sparqlExpressions
ConditionalExpressions.cpp
SparqlExpressionTypes.cpp
SparqlExpression.cpp
ConvertToNumericExpression.cpp
ConvertToDtypeConstructor.cpp
RdfTermExpressions.cpp
LangExpression.cpp
CountStarExpression.cpp
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2024, University of Freiburg,
// Copyright 2024 - 2025, University of Freiburg,
// Chair of Algorithms and Data Structures
// Author: Hannes Baumann <[email protected]>

Expand All @@ -7,7 +7,28 @@

#include "engine/sparqlExpressions/NaryExpressionImpl.h"

/*
The SparqlExpressions specified in the following namespace sections enable
xsd:datatype casting/mapping for XML-schema-datatype values.
For more details regarding the casting/mapping definition see:
https://www.w3.org/TR/sparql11-query/#FunctionMapping
EXAMPLES
(1) BIND(xsd:dateTime(?var) as ?dateTimeValue) will try to convert the
date-time provided (bound to ?var) as an xsd:string value to an actual
xsd:dateTime value and bind it to ?dateTimeValue under the condition the
string was appropriately formatted.
(2) BIND(xsd:integer(?var) to ?integerValue) attempts to convert ?var to an
xsd:integer value and bind it to variable ?integerValue, given the datatype
casting can be successfully performed.
*/

namespace sparqlExpression {

//______________________________________________________________________________
// CONVERT TO NUMERIC
namespace detail::to_numeric {

// class that converts an input `int64_t`, `double` or `std::string`
Expand All @@ -16,7 +37,7 @@ CPP_template(typename T, bool AllowExponentialNotation = true)(
requires(concepts::same_as<int64_t, T> ||
concepts::same_as<double, T>)) class ToNumericImpl {
private:
Id getFromString(const std::string& input) const {
ValueId getFromString(const std::string& input) const {
auto str = absl::StripAsciiWhitespace(input);
// Abseil and the standard library don't match leading + signs, so we skip
// them.
Expand Down Expand Up @@ -57,7 +78,7 @@ CPP_template(typename T, bool AllowExponentialNotation = true)(
};

public:
Id operator()(IntDoubleStr value) const {
ValueId operator()(IntDoubleStr value) const {
if (std::holds_alternative<std::string>(value)) {
return getFromString(std::get<std::string>(value));
} else if (std::holds_alternative<int64_t>(value)) {
Expand All @@ -77,6 +98,8 @@ using ToDecimal =
NARY<1, FV<ToNumericImpl<double, false>, ToNumericValueGetter>>;
} // namespace detail::to_numeric

//______________________________________________________________________________
// CONVERT TO BOOLEAN
namespace detail::to_boolean {
class ToBooleanImpl {
public:
Expand All @@ -103,8 +126,55 @@ class ToBooleanImpl {
using ToBoolean = NARY<1, FV<ToBooleanImpl, ToNumericValueGetter>>;
} // namespace detail::to_boolean

//______________________________________________________________________________
// CONVERT TO DATE(TIME)
namespace detail::to_datetime {

// Cast to xsd:dateTime or xsd:date (ValueId)
template <bool ToJustXsdDate>
inline auto convertStringToDateTimeValueId =
[](OptIdOrString input) -> ValueId {
if (!input.has_value()) {
return Id::makeUndefined();
}
const auto& inputValue = input.value();

// Remark: If the parsing procedure for datetime / date string values with
// parseXsdDatetimeGetOptDate / parseXsdDateGetOptDate fails,
// Id::makeUndefined() is returned as well.
const auto retrieveValueId = [](std::optional<DateYearOrDuration> optValue) {
if (optValue.has_value()) {
return Id::makeFromDate(optValue.value());
}
return Id::makeUndefined();
};

if (auto* valueId = std::get_if<ValueId>(&inputValue)) {
return valueId->getDate().isDate() ? *valueId : Id::makeUndefined();
}

auto* str = std::get_if<std::string>(&inputValue);
AD_CORRECTNESS_CHECK(str != nullptr);
if constexpr (ToJustXsdDate) {
return retrieveValueId(DateYearOrDuration::parseXsdDateGetOptDate(*str));
} else {
return retrieveValueId(
DateYearOrDuration::parseXsdDatetimeGetOptDate(*str));
}
};

NARY_EXPRESSION(ToXsdDateTime, 1,
FV<decltype(convertStringToDateTimeValueId<false>),
DateIdOrLiteralValueGetter>);
NARY_EXPRESSION(ToXsdDate, 1,
FV<decltype(convertStringToDateTimeValueId<true>),
DateIdOrLiteralValueGetter>);

} // namespace detail::to_datetime

using namespace detail::to_numeric;
using namespace detail::to_boolean;
using namespace detail::to_datetime;
using Expr = SparqlExpression::Ptr;

Expr makeConvertToIntExpression(Expr child) {
Expand All @@ -122,4 +192,13 @@ Expr makeConvertToDecimalExpression(Expr child) {
Expr makeConvertToBooleanExpression(Expr child) {
return std::make_unique<ToBoolean>(std::move(child));
}

Expr makeConvertToDateTimeExpression(Expr child) {
return std::make_unique<ToXsdDateTime>(std::move(child));
}

Expr makeConvertToDateExpression(Expr child) {
return std::make_unique<ToXsdDate>(std::move(child));
}

} // namespace sparqlExpression
5 changes: 4 additions & 1 deletion src/engine/sparqlExpressions/NaryExpression.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,17 @@ SparqlExpression::Ptr makeIfExpression(SparqlExpression::Ptr child1,
SparqlExpression::Ptr child2,
SparqlExpression::Ptr child3);

// Implemented in ConvertToNumeric.cpp
// Implemented in ConvertToDtypeConstructor.cpp
SparqlExpression::Ptr makeConvertToIntExpression(SparqlExpression::Ptr child);
SparqlExpression::Ptr makeConvertToDoubleExpression(
SparqlExpression::Ptr child);
SparqlExpression::Ptr makeConvertToDecimalExpression(
SparqlExpression::Ptr child);
SparqlExpression::Ptr makeConvertToBooleanExpression(
SparqlExpression::Ptr child);
SparqlExpression::Ptr makeConvertToDateTimeExpression(
SparqlExpression::Ptr child);
SparqlExpression::Ptr makeConvertToDateExpression(SparqlExpression::Ptr child);

// Implemented in RdfTermExpressions.cpp
SparqlExpression::Ptr makeDatatypeExpression(SparqlExpression::Ptr child);
Expand Down
21 changes: 21 additions & 0 deletions src/engine/sparqlExpressions/SparqlExpressionValueGetters.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,4 +350,25 @@ struct IriOrUriValueGetter : Mixin<IriOrUriValueGetter> {
const EvaluationContext* context) const;
};

// Defines the return type for value-getter `DateIdOrLiteralValueGetter`.
using OptIdOrString = std::optional<std::variant<ValueId, std::string>>;

// This value-getter returns a `Date` related `ValueId` or `std::string` (from
// literal).
struct DateIdOrLiteralValueGetter : Mixin<DateIdOrLiteralValueGetter> {
using Mixin<DateIdOrLiteralValueGetter>::operator();
// Remark: We use only LiteralFromIdGetter because Iri values should never
// contain date-related string values.
OptIdOrString operator()(ValueId id, const EvaluationContext* context) const {
if (id.getDatatype() == Datatype::Date) {
return id;
}
return LiteralFromIdGetter{}(id, context);
}
OptIdOrString operator()(const LiteralOrIri& litOrIri,
const EvaluationContext* context) const {
return LiteralFromIdGetter{}(litOrIri, context);
}
};

} // namespace sparqlExpression::detail
10 changes: 10 additions & 0 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,16 @@ ExpressionPtr Visitor::processIriFunctionCall(
return sparqlExpression::makeConvertToStringExpression(
std::move(argList[0]));
}
if (functionName == "dateTime") {
checkNumArgs(1);
return sparqlExpression::makeConvertToDateTimeExpression(
std::move(argList[0]));
}
if (functionName == "date") {
checkNumArgs(1);
return sparqlExpression::makeConvertToDateExpression(
std::move(argList[0]));
}
}

// QLever-internal functions.
Expand Down
42 changes: 36 additions & 6 deletions src/util/DateYearDuration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,14 +130,13 @@ static DateYearOrDuration makeDateOrLargeYear(std::string_view fullInput,
}

// _____________________________________________________________________________
DateYearOrDuration DateYearOrDuration::parseXsdDatetime(
static std::optional<DateYearOrDuration> parseXsdDatetimeImpl(
std::string_view dateString) {
constexpr static ctll::fixed_string dateTime =
dateRegex + "T" + timeRegex + grp(timeZoneRegex) + "?";
auto match = ctre::match<dateTime>(dateString);
if (!match) {
throw DateParseException{absl::StrCat(
"The value ", dateString, " cannot be parsed as an `xsd:dateTime`.")};
return std::nullopt;
}
int64_t year = match.template get<"year">().to_number<int64_t>();
int month = match.template get<"month">().to_number();
Expand All @@ -150,14 +149,29 @@ DateYearOrDuration DateYearOrDuration::parseXsdDatetime(
}

// _____________________________________________________________________________
DateYearOrDuration DateYearOrDuration::parseXsdDate(
DateYearOrDuration DateYearOrDuration::parseXsdDatetime(
std::string_view dateString) {
if (auto optDate = parseXsdDatetimeImpl(dateString); optDate) {
return optDate.value();
}
throw DateParseException{absl::StrCat(
"The value ", dateString, " cannot be parsed as an `xsd:dateTime`.")};
}

// _____________________________________________________________________________
std::optional<DateYearOrDuration>
DateYearOrDuration::parseXsdDatetimeGetOptDate(std::string_view dateString) {
return parseXsdDatetimeImpl(dateString);
}

// _____________________________________________________________________________
static std::optional<DateYearOrDuration> parseXsdDateImpl(
std::string_view dateString) {
constexpr static ctll::fixed_string dateTime =
dateRegex + grp(timeZoneRegex) + "?";
auto match = ctre::match<dateTime>(dateString);
if (!match) {
throw DateParseException{absl::StrCat(
"The value ", dateString, " cannot be parsed as an `xsd:date`.")};
return std::nullopt;
}
int64_t year = match.template get<"year">().to_number<int64_t>();
int month = match.template get<"month">().to_number();
Expand All @@ -166,6 +180,22 @@ DateYearOrDuration DateYearOrDuration::parseXsdDate(
parseTimeZone(match));
}

// _____________________________________________________________________________
DateYearOrDuration DateYearOrDuration::parseXsdDate(
std::string_view dateString) {
if (auto optDate = parseXsdDateImpl(dateString); optDate) {
return optDate.value();
}
throw DateParseException{absl::StrCat("The value ", dateString,
" cannot be parsed as an `xsd:date`.")};
}

// _____________________________________________________________________________
std::optional<DateYearOrDuration> DateYearOrDuration::parseXsdDateGetOptDate(
std::string_view dateString) {
return parseXsdDateImpl(dateString);
}

// _____________________________________________________________________________
DateYearOrDuration DateYearOrDuration::parseGYear(std::string_view dateString) {
constexpr static ctll::fixed_string yearRegex = "(?<year>-?\\d{4,})";
Expand Down
26 changes: 19 additions & 7 deletions src/util/DateYearDuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,31 @@ class DateYearOrDuration {
// 2. If the year is outside the range [-9999, 9999], then the date must be
// January 1, 00:00 hours.

// Parse from xsd:dateTime (e.g. 1900-12-13T03:12:00.33Z)
// Parse from `xsd:dateTime` (e.g. `1900-12-13T03:12:00.33Z`)
static DateYearOrDuration parseXsdDatetime(std::string_view dateString);

// Parse from xsd:date (e.g. 1900-12-13)
// Parse from `xsd:dateTime` (e.g. `1900-12-13T03:12:00.33Z`). Returns a
// `DateYearOrDuration` value under the condition that `dateString` adheres to
// the correct datetime string format (is parsable). If the parsing procedure
// fails `std::nullopt` is returned.
static std::optional<DateYearOrDuration> parseXsdDatetimeGetOptDate(
std::string_view dateString);

// Parse from `xsd:date` (e.g. `1900-12-13`)
static DateYearOrDuration parseXsdDate(std::string_view dateString);

// Parse from xsd:gYearMonth (e.g. 1900-03)
// Parse from `xsd:date` (e.g. `1900-12-13`). Returns a `DateYearOrDuration`
// value under the condition that `dateString` adheres to the correct date
// string format (is parsable). If the parsing procedure fails `std::nullopt`
// is returned.
static std::optional<DateYearOrDuration> parseXsdDateGetOptDate(
std::string_view dateString);

// Parse from `xsd:gYearMonth` (e.g. `1900-03`)
static DateYearOrDuration parseGYearMonth(std::string_view dateString);

// Parse from xsd:gYear (e.g. 1900)
// Parse from `xsd:gYear` (e.g. `1900`)
static DateYearOrDuration parseGYear(std::string_view dateString);

// Parse from xsd:dayTimeDuration (e.g. P2DT3H59M59.99S)
// Parse from `xsd:dayTimeDuration` (e.g. `P2DT3H59M59.99S`)
static DateYearOrDuration parseXsdDayTimeDuration(
std::string_view dayTimeDurationString);

Expand Down
6 changes: 6 additions & 0 deletions test/SparqlAntlrParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1862,13 +1862,19 @@ TEST(SparqlParser, FunctionCall) {
matchUnary(&makeConvertToDecimalExpression));
expectFunctionCall(absl::StrCat(xsd, "boolean>(?x)"),
matchUnary(&makeConvertToBooleanExpression));
expectFunctionCall(absl::StrCat(xsd, "date>(?x)"),
matchUnary(&makeConvertToDateExpression));
expectFunctionCall(absl::StrCat(xsd, "dateTime>(?x)"),
matchUnary(&makeConvertToDateTimeExpression));

expectFunctionCall(absl::StrCat(xsd, "string>(?x)"),
matchUnary(&makeConvertToStringExpression));

// Wrong number of arguments.
expectFunctionCallFails(absl::StrCat(geof, "distance>(?a)"));
expectFunctionCallFails(absl::StrCat(geof, "distance>(?a, ?b, ?c)"));
expectFunctionCallFails(absl::StrCat(xsd, "date>(?varYear, ?varMonth)"));
expectFunctionCallFails(absl::StrCat(xsd, "dateTime>(?varYear, ?varMonth)"));

// Unknown function with `geof:`, `math:`, `xsd:`, or `ql` prefix.
expectFunctionCallFails(absl::StrCat(geof, "nada>(?x)"));
Expand Down
30 changes: 30 additions & 0 deletions test/SparqlExpressionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,36 @@ TEST(SparqlExpression, testToNumericExpression) {
Ids{U, U, I(1), I(0), U, I(1), I(-33)});
}

// ____________________________________________________________________________
TEST(SparqlExpression, testToDateOrDateTimeExpression) {
using namespace ad_utility::testing;
Id T = Id::makeFromBool(true);
Id F = Id::makeFromBool(false);
Id G = Id::makeFromGeoPoint(GeoPoint(50.0, 50.0));
auto parserDate = DateYearOrDuration::parseXsdDate;
auto parserDateTime = DateYearOrDuration::parseXsdDatetime;
auto checkGetDate = testUnaryExpression<&makeConvertToDateExpression>;
auto checkGetDateTime = testUnaryExpression<&makeConvertToDateTimeExpression>;

checkGetDate(idOrLitOrStringVec({"---", T, F, G, "2025-02", I(10), D(0.01),
"-2025-02-20", "2025-02-20", "2025-1-1",
DateId(parserDate, "0000-01-01")}),
Ids{U, U, U, U, U, U, U, DateId(parserDate, "-2025-02-20"),
DateId(parserDate, "2025-02-20"), U,
DateId(parserDate, "0000-01-01")});
checkGetDateTime(
idOrLitOrStringVec({"---", T, F, G, "2025-02", I(10), D(0.01),
"-2025-02-20", "2025-02-20", "2025-1-1",
"1900-12-13T03:12:00.33Z", "-1900-12-13T03:12:00.33Z",
"2025-02-20T17:12:00.01-05:00",
DateId(parserDateTime, "2025-02-20T17:12:00.01Z")}),
Ids{U, U, U, U, U, U, U, U, U, U,
DateId(parserDateTime, "1900-12-13T03:12:00.33Z"),
DateId(parserDateTime, "-1900-12-13T03:12:00.33Z"),
DateId(parserDateTime, "2025-02-20T17:12:00.01-05:00"),
DateId(parserDateTime, "2025-02-20T17:12:00.01Z")});
}

// ____________________________________________________________________________
TEST(SparqlExpression, testToBooleanExpression) {
Id T = Id::makeFromBool(true);
Expand Down

0 comments on commit 87a1b72

Please sign in to comment.