Skip to content

Commit

Permalink
align null with spark
Browse files Browse the repository at this point in the history
  • Loading branch information
marin-ma committed Feb 23, 2024
1 parent 50d6027 commit 06fec8f
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 58 deletions.
91 changes: 60 additions & 31 deletions velox/functions/sparksql/DateTimeFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,34 +21,66 @@
namespace facebook::velox::functions::sparksql {
namespace {

Timestamp makeTimeStampFromDecodedArgs(
std::optional<Timestamp> makeTimeStampFromDecodedArgs(
vector_size_t row,
DecodedVector* year,
DecodedVector* month,
DecodedVector* day,
DecodedVector* hour,
DecodedVector* minute,
DecodedVector* micros) {
auto totalMicros = micros->valueAt<int64_t>(row);
auto seconds = totalMicros / util::kMicrosPerSec;
VELOX_USER_CHECK(
seconds <= 60,
"Invalid value for SecondOfMinute (valid values 0 - 59): {}.",
seconds);
if (seconds == 60) {
VELOX_USER_CHECK(
totalMicros % util::kMicrosPerSec == 0,
"The fraction of sec must be zero. Valid range is [0, 60].");
DecodedVector* yearVector,
DecodedVector* monthVector,
DecodedVector* dayVector,
DecodedVector* hourVector,
DecodedVector* minuteVector,
DecodedVector* microsVector) {
// Check hour.
auto hour = hourVector->valueAt<int32_t>(row);
if (hour < 0 || hour > 24) {
return std::nullopt;
}
// Check miniute.
auto minute = minuteVector->valueAt<int32_t>(row);
if (minute < 0 || minute > 60) {
return std::nullopt;
}
// Check microseconds.
auto micros = microsVector->valueAt<int64_t>(row);
if (micros < 0) {
return std::nullopt;
}
auto seconds = micros / util::kMicrosPerSec;
if (seconds > 60 || seconds == 60 && micros % util::kMicrosPerSec != 0) {
// Invalid microsecond.
return std::nullopt;
}

auto daysSinceEpoch = util::daysSinceEpochFromDate(
year->valueAt<int32_t>(row),
month->valueAt<int32_t>(row),
day->valueAt<int32_t>(row));
auto localMicros = hour->valueAt<int32_t>(row) * util::kMicrosPerHour +
minute->valueAt<int32_t>(row) * util::kMicrosPerMinute +
micros->valueAt<int64_t>(row);
return util::fromDatetime(daysSinceEpoch, localMicros);
// year, month, day will be checked in utils::daysSinceEpochFromDate;
try {
auto daysSinceEpoch = util::daysSinceEpochFromDate(
yearVector->valueAt<int32_t>(row),
monthVector->valueAt<int32_t>(row),
dayVector->valueAt<int32_t>(row));
auto localMicros =
hourVector->valueAt<int32_t>(row) * util::kMicrosPerHour +
minuteVector->valueAt<int32_t>(row) * util::kMicrosPerMinute + micros;
return util::fromDatetime(daysSinceEpoch, localMicros);
} catch (const VeloxException& e) {
if (!e.isUserError()) {
throw;
}
return std::nullopt;
} catch (const std::exception&) {
throw;
}
}

void setTimestampOrNull(
int32_t row,
std::optional<Timestamp> timestamp,
int64_t tzID,
FlatVector<Timestamp>* result) {
if (timestamp.has_value()) {
(*timestamp).toGMT(tzID);
result->set(row, *timestamp);
} else {
result->setNull(row, true);
}
}

class MakeTimestampFunction : public exec::VectorFunction {
Expand Down Expand Up @@ -84,8 +116,7 @@ class MakeTimestampFunction : public exec::VectorFunction {
rows.applyToSelected([&](vector_size_t row) {
auto timestamp = makeTimeStampFromDecodedArgs(
row, year, month, day, hour, minute, micros);
timestamp.toGMT(constantTzID);
resultFlatVector->set(row, timestamp);
setTimestampOrNull(row, timestamp, constantTzID, resultFlatVector);
});
} else {
auto timeZone = decodedArgs.at(6);
Expand All @@ -94,8 +125,7 @@ class MakeTimestampFunction : public exec::VectorFunction {
row, year, month, day, hour, minute, micros);
auto tzID =
util::getTimeZoneID(timeZone->valueAt<StringView>(row).str());
timestamp.toGMT(tzID);
resultFlatVector->set(row, timestamp);
setTimestampOrNull(row, timestamp, tzID, resultFlatVector);
});
}
} else {
Expand All @@ -104,8 +134,7 @@ class MakeTimestampFunction : public exec::VectorFunction {
rows.applyToSelected([&](vector_size_t row) {
auto timestamp = makeTimeStampFromDecodedArgs(
row, year, month, day, hour, minute, micros);
timestamp.toGMT(sessionTzID_);
resultFlatVector->set(row, timestamp);
setTimestampOrNull(row, timestamp, sessionTzID_, resultFlatVector);
});
}
}
Expand Down
73 changes: 46 additions & 27 deletions velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,7 @@ TEST_F(DateTimeFunctionsTest, makeTimestamp) {

const auto microsType = DECIMAL(16, 6);

// Valid cases w/o timezone.
// Valid cases w/o time zone argument.
{
const auto year = makeFlatVector<int32_t>({2021, 2021, 2021, 2021, 2021});
const auto month = makeFlatVector<int32_t>({7, 7, 7, 7, 7});
Expand All @@ -790,7 +790,7 @@ TEST_F(DateTimeFunctionsTest, makeTimestamp) {
{45678000, 1e6, 6e7, 59999999, std::nullopt}, microsType);
auto data = makeRowVector({year, month, day, hour, minute, micros});

// Test w/o session timezone.
// Test w/o session time zone.
setQueryTimeZone("");
auto expectedGMT = makeNullableFlatVector<Timestamp>(
{util::fromTimestampString("2021-07-11 06:30:45.678"),
Expand All @@ -801,7 +801,7 @@ TEST_F(DateTimeFunctionsTest, makeTimestamp) {
testMakeTimestamp(data, expectedGMT, false);
testConstantTimezone(data, "GMT", expectedGMT);

// Test w/ session timezone.
// Test w/ session time zone.
setQueryTimeZone("Asia/Shanghai");
auto expectedSessionTimezone = makeNullableFlatVector<Timestamp>(
{util::fromTimestampString("2021-07-10 22:30:45.678"),
Expand All @@ -813,7 +813,7 @@ TEST_F(DateTimeFunctionsTest, makeTimestamp) {
testConstantTimezone(data, "GMT", expectedGMT);
}

// Valid cases w/ timezone.
// Valid cases w/ time zone argument.
{
const auto year = makeFlatVector<int32_t>({2021, 2021, 1});
const auto month = makeFlatVector<int32_t>({07, 07, 1});
Expand All @@ -822,10 +822,10 @@ TEST_F(DateTimeFunctionsTest, makeTimestamp) {
const auto minute = makeFlatVector<int32_t>({30, 30, 1});
const auto micros =
makeNullableFlatVector<int64_t>({45678000, 45678000, 1e6}, microsType);
const auto timezone =
const auto timeZone =
makeNullableFlatVector<StringView>({"GMT", "CET", std::nullopt});
auto data =
makeRowVector({year, month, day, hour, minute, micros, timezone});
makeRowVector({year, month, day, hour, minute, micros, timeZone});
{
setQueryTimeZone("");
auto expected = makeNullableFlatVector<Timestamp>(
Expand All @@ -835,6 +835,8 @@ TEST_F(DateTimeFunctionsTest, makeTimestamp) {
testMakeTimestamp(data, expected, true);
}
{
// Session time zone will be ignored if time zone is specified in
// argument.
setQueryTimeZone("Asia/Shanghai");
auto expected = makeNullableFlatVector<Timestamp>(
{util::fromTimestampString("2021-07-11 06:30:45.678"),
Expand All @@ -846,11 +848,44 @@ TEST_F(DateTimeFunctionsTest, makeTimestamp) {

// Invalid cases.
{
const auto year = makeFlatVector<int32_t>(std::vector<int32_t>{1});
const auto month = makeFlatVector<int32_t>(std::vector<int32_t>{1});
const auto day = makeFlatVector<int32_t>(std::vector<int32_t>{1});
const auto hour = makeFlatVector<int32_t>(std::vector<int32_t>{1});
const auto minute = makeFlatVector<int32_t>(std::vector<int32_t>{1});
const auto year = makeFlatVector<int32_t>(
{facebook::velox::util::kMinYear - 1,
facebook::velox::util::kMaxYear + 1,
1,
1,
1,
1,
1,
1});
const auto month = makeFlatVector<int32_t>({1, 1, 0, 13, 1, 1, 1, 1});
const auto day = makeFlatVector<int32_t>({1, 1, 1, 1, 0, 32, 1, 1});
const auto hour = makeFlatVector<int32_t>({1, 1, 1, 1, 1, 1, 25, 1});
const auto minute = makeFlatVector<int32_t>({1, 1, 1, 1, 1, 1, 1, 61});
const auto micros =
makeFlatVector<int64_t>({1, 1, 1, 1, 1, 1, 1, 1}, microsType);
auto expected = makeNullableFlatVector<Timestamp>(
{std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt});
auto data = makeRowVector({year, month, day, hour, minute, micros});
testMakeTimestamp(data, expected, false);

const auto testInvalidMicros = [&](std::optional<int64_t> microsec) {
auto result = evaluateOnce<Timestamp, int64_t>(
"make_timestamp(c0, c1, c2, c3, c4, c5)",
{1, 1, 1, 1, 1, microsec},
{INTEGER(), INTEGER(), INTEGER(), INTEGER(), INTEGER(), microsType});
EXPECT_EQ(result, std::nullopt);
};
testInvalidMicros(61e6);
testInvalidMicros(99999999);
testInvalidMicros(999999999);
testInvalidMicros(60007000);

const auto testMicrosError = [&](int64_t microsec,
const TypePtr& microsType,
Expand All @@ -862,22 +897,6 @@ TEST_F(DateTimeFunctionsTest, makeTimestamp) {
evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data),
expectedError);
};
testMicrosError(
61e6,
microsType,
"Invalid value for SecondOfMinute (valid values 0 - 59): 61.");
testMicrosError(
99999999,
microsType,
"Invalid value for SecondOfMinute (valid values 0 - 59): 99.");
testMicrosError(
999999999,
microsType,
"Invalid value for SecondOfMinute (valid values 0 - 59): 999.");
testMicrosError(
60007000,
microsType,
"The fraction of sec must be zero. Valid range is [0, 60].");
testMicrosError(
60007000,
DECIMAL(20, 8),
Expand Down

0 comments on commit 06fec8f

Please sign in to comment.