Skip to content

Commit

Permalink
DX-90582: move 3 commits from gerrit (#4)
Browse files Browse the repository at this point in the history
Moving old commits with history

---------

Co-authored-by: Paul Nienaber <[email protected]>
  • Loading branch information
Antropovi and Paul Nienaber authored Jan 14, 2025
1 parent 156ac3d commit 42aa3df
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 102 deletions.
4 changes: 1 addition & 3 deletions flight_sql/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ if (MSVC)
-DCMAKE_DEPENDS_USE_COMPILER=FALSE
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow/cpp
)
elseif(APPLE)
set(ARROW_CMAKE_ARGS
Expand All @@ -90,7 +89,6 @@ elseif(APPLE)
-DCMAKE_DEPENDS_USE_COMPILER=FALSE
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow/cpp
)
if (DEFINED CMAKE_TOOLCHAIN_FILE)
list(APPEND ARROW_CMAKE_ARGS -DARROW_DEPENDENCY_SOURCE=VCPKG)
Expand All @@ -109,7 +107,6 @@ else()
-DCMAKE_DEPENDS_USE_COMPILER=FALSE
-DOPENSSL_INCLUDE_DIR=${OPENSSL_INCLUDE_DIR}
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install
${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow/cpp
)
endif()

Expand All @@ -120,6 +117,7 @@ message("Using Arrow from ${ARROW_GIT_REPOSITORY} on tag ${ARROW_GIT_TAG}")
ExternalProject_Add(ApacheArrow
GIT_REPOSITORY ${ARROW_GIT_REPOSITORY}
GIT_TAG ${ARROW_GIT_TAG}
SOURCE_SUBDIR "cpp"
CMAKE_ARGS ${ARROW_CMAKE_ARGS})

include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install/include)
Expand Down
50 changes: 34 additions & 16 deletions flight_sql/accessors/date_array_accessor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,23 @@ using namespace arrow;
using namespace odbcabstraction;

TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) {
std::vector<int32_t> values = {7589, 12320, 18980, 19095};
std::vector<int32_t> values = {7589, 12320, 18980, 19095, -1, 0};
std::vector<DATE_STRUCT> expected = {
{1990, 10, 12},
{2003, 9, 25},
{2021, 12, 19},
{2022, 4, 13},
{1969, 12, 31},
{1970, 1, 1},
};

std::shared_ptr<Array> array;
ArrayFromVector<Date32Type, int32_t>(values, &array);

DateArrayFlightSqlAccessor<CDataType_DATE, Date32Array> accessor(
dynamic_cast<NumericArray<Date32Type> *>(array.get()));

std::vector<tagDATE_STRUCT> buffer(values.size());
std::vector<DATE_STRUCT> buffer(values.size());
std::vector<ssize_t> strlen_buffer(values.size());

ColumnBinding binding(CDataType_DATE, 0, 0, buffer.data(), 0, strlen_buffer.data());
Expand All @@ -37,27 +45,39 @@ TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) {

for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]);
tm date{};

int64_t converted_time = values[i] * 86400;
GetTimeForSecondsSinceEpoch(date, converted_time);
ASSERT_EQ((date.tm_year + 1900), buffer[i].year);
ASSERT_EQ(date.tm_mon + 1, buffer[i].month);
ASSERT_EQ(date.tm_mday, buffer[i].day);
ASSERT_EQ(expected[i].year, buffer[i].year);
ASSERT_EQ(expected[i].month, buffer[i].month);
ASSERT_EQ(expected[i].day, buffer[i].day);
}
}

TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) {
std::vector<int64_t> values = {86400000, 172800000, 259200000, 1649793238110,
345600000, 432000000, 518400000};
std::vector<int64_t> values = {86400000, 172800000, 259200000, 1649793238110, 0,
345600000, 432000000, 518400000, -86400000, -17987443200000};
std::vector<DATE_STRUCT> expected = {
/* year(16), month(u16), day(u16) */
{1970, 1, 2},
{1970, 1, 3},
{1970, 1, 4},
{2022, 4, 12},
{1970, 1, 1},
{1970, 1, 5},
{1970, 1, 6},
{1970, 1, 7},
{1969, 12, 31},
// This is the documented lower limit of supported Gregorian dates for some parts of Boost,
// however boost::posix_time may go lower?
{1400, 1, 1},
};

std::shared_ptr<Array> array;
ArrayFromVector<Date64Type, int64_t>(values, &array);

DateArrayFlightSqlAccessor<CDataType_DATE, Date64Array> accessor(
dynamic_cast<NumericArray<Date64Type> *>(array.get()));

std::vector<tagDATE_STRUCT> buffer(values.size());
std::vector<DATE_STRUCT> buffer(values.size());
std::vector<ssize_t> strlen_buffer(values.size());

ColumnBinding binding(CDataType_DATE, 0, 0, buffer.data(), 0, strlen_buffer.data());
Expand All @@ -71,11 +91,9 @@ TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) {
ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]);
tm date{};

int64_t converted_time = values[i] / 1000;
GetTimeForSecondsSinceEpoch(date, converted_time);
ASSERT_EQ((date.tm_year + 1900), buffer[i].year);
ASSERT_EQ(date.tm_mon + 1, buffer[i].month);
ASSERT_EQ(date.tm_mday, buffer[i].day);
ASSERT_EQ(expected[i].year, buffer[i].year);
ASSERT_EQ(expected[i].month, buffer[i].month);
ASSERT_EQ(expected[i].day, buffer[i].day);
}
}

Expand Down
54 changes: 34 additions & 20 deletions flight_sql/accessors/timestamp_array_accessor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@
#include "timestamp_array_accessor.h"
#include "odbcabstraction/calendar_utils.h"

#include <cmath>
#include <limits>

using namespace arrow;

namespace {
int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) {
inline int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) {
int64_t divisor = 1;
switch (unit) {
case TimeUnit::SECOND:
Expand All @@ -32,27 +35,25 @@ int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) {
return divisor;
}

uint32_t CalculateFraction(TimeUnit::type unit, uint64_t units_since_epoch) {
uint32_t CalculateFraction(TimeUnit::type unit, int64_t units_since_epoch) {
// Convert the given remainder and time unit to nanoseconds
// since the fraction field on TIMESTAMP_STRUCT is in nanoseconds.
switch (unit) {
case TimeUnit::SECOND:
if (unit == TimeUnit::SECOND)
return 0;
case TimeUnit::MILLI:
// 1000000 nanoseconds = 1 millisecond.
return (units_since_epoch %
driver::odbcabstraction::MILLI_TO_SECONDS_DIVISOR) *
1000000;
case TimeUnit::MICRO:
// 1000 nanoseconds = 1 microsecond.
return (units_since_epoch %
driver::odbcabstraction::MICRO_TO_SECONDS_DIVISOR) * 1000;
case TimeUnit::NANO:
// 1000 nanoseconds = 1 microsecond.
return (units_since_epoch %
driver::odbcabstraction::NANO_TO_SECONDS_DIVISOR);
}
return 0;

const int64_t divisor = GetConversionToSecondsDivisor(unit);
const int64_t nano_divisor = GetConversionToSecondsDivisor(TimeUnit::NANO);

if (units_since_epoch < 0)
if (units_since_epoch <= (std::numeric_limits<decltype(units_since_epoch)>::min() + divisor))
// Prevent trying to derive and add a value larger than INT64_MAX (i.e. the time value at the start of
// the second which is used to shift the value positive before the modulo operation)) in next statement.
units_since_epoch += divisor;
// See below regarding floor division; here we want ceiling division.
// FIXME this goes poorly (trying to use a value > INT64_MAX when units_since_epoch is
// less than the smallest multiple of divisor greater than INT64_MIN.
units_since_epoch += divisor * std::abs((units_since_epoch - (divisor - 1)) / divisor);
return (units_since_epoch % divisor) * (nano_divisor / divisor);
}
} // namespace

Expand All @@ -72,11 +73,24 @@ TimestampArrayFlightSqlAccessor<TARGET_TYPE, UNIT>::MoveSingleCell_impl(
ColumnBinding *binding, int64_t arrow_row, int64_t cell_counter,
int64_t &value_offset, bool update_value_offset,
odbcabstraction::Diagnostics &diagnostics) {
// Times less than the minimum integer number of seconds that can be represented
// for each time unit will not convert correctly. This is mostly interesting for
// nanoseconds as timestamps in other units are outside of the accepted range of
// Gregorian dates.
auto *buffer = static_cast<TIMESTAMP_STRUCT *>(binding->buffer);

int64_t value = this->GetArray()->Value(arrow_row);
const auto divisor = GetConversionToSecondsDivisor(UNIT);
const auto converted_result_seconds = value / divisor;
const auto converted_result_seconds =
// We want floor division here; C++ will round towards zero
(value < 0)
// Floor division: Shift all "fractional" (not a multiple of divisor) values so they round towards
// zero (and to the same value) along with the "floor" less than them, then add 1 to get back to
// the floor. Althernative we could shift negatively by (divisor - 1) but this breaks near
// INT64_MIN causing underflow..
? ((value + 1) / divisor) - 1
// Towards zero is already floor
: value / divisor;
tm timestamp = {0};

GetTimeForSecondsSinceEpoch(timestamp, converted_result_seconds);
Expand Down
141 changes: 86 additions & 55 deletions flight_sql/accessors/timestamp_array_accessor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,27 @@ using namespace arrow;
using namespace odbcabstraction;

TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) {
std::vector<int64_t> values = {86400370, 172800000, 259200000, 1649793238110LL,
345600000, 432000000, 518400000};

std::vector<int64_t> values = {86400370, 172800000, 259200000, 1649793238110LL, 345600000,
432000000, 518400000, -86399000, 0, -86399999, -86399001,
86400001, 86400999};
std::vector<TIMESTAMP_STRUCT> expected = {
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
{1970, 1, 2, 0, 0, 0, 370000000},
{1970, 1, 3, 0, 0, 0, 0},
{1970, 1, 4, 0, 0, 0, 0},
{2022, 4, 12, 19, 53, 58, 110000000},
{1970, 1, 5, 0, 0, 0, 0},
{1970, 1, 6, 0, 0, 0, 0},
{1970, 1, 7, 0, 0, 0, 0},
{1969, 12, 31, 0, 0, 1, 0},
{1970, 1, 1, 0, 0, 0, 0},
/* Tests both ends of the fraction rounding range to ensure we don't tip the wrong way */
{1969, 12, 31, 0, 0, 0, 1000000},
{1969, 12, 31, 0, 0, 0, 999000000},
{1970, 1, 2, 0, 0, 0, 1000000},
{1970, 1, 2, 0, 0, 0, 999000000},
};

std::shared_ptr<Array> timestamp_array;

auto timestamp_field = field("timestamp_field", timestamp(TimeUnit::MILLI));
Expand All @@ -40,26 +58,31 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) {
for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);

tm date{};

auto converted_time = values[i] / MILLI_TO_SECONDS_DIVISOR;
GetTimeForSecondsSinceEpoch(date, converted_time);

ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
ASSERT_EQ(buffer[i].day, date.tm_mday);
ASSERT_EQ(buffer[i].hour, date.tm_hour);
ASSERT_EQ(buffer[i].minute, date.tm_min);
ASSERT_EQ(buffer[i].second, date.tm_sec);

constexpr uint32_t NANOSECONDS_PER_MILLI = 1000000;
ASSERT_EQ(buffer[i].fraction, (values[i] % MILLI_TO_SECONDS_DIVISOR) * NANOSECONDS_PER_MILLI);
ASSERT_EQ(buffer[i].year, expected[i].year);
ASSERT_EQ(buffer[i].month, expected[i].month);
ASSERT_EQ(buffer[i].day, expected[i].day);
ASSERT_EQ(buffer[i].hour, expected[i].hour);
ASSERT_EQ(buffer[i].minute, expected[i].minute);
ASSERT_EQ(buffer[i].second, expected[i].second);
ASSERT_EQ(buffer[i].fraction, expected[i].fraction);
}
}

TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_SECONDS) {
std::vector<int64_t> values = {86400, 172800, 259200, 1649793238,
345600, 432000, 518400};
345600, 432000, 518400, -86399, 0};
std::vector<TIMESTAMP_STRUCT> expected = {
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
{1970, 1, 2, 0, 0, 0, 0},
{1970, 1, 3, 0, 0, 0, 0},
{1970, 1, 4, 0, 0, 0, 0},
{2022, 4, 12, 19, 53, 58, 0},
{1970, 1, 5, 0, 0, 0, 0},
{1970, 1, 6, 0, 0, 0, 0},
{1970, 1, 7, 0, 0, 0, 0},
{1969, 12, 31, 0, 0, 1, 0},
{1970, 1, 1, 0, 0, 0, 0},
};

std::shared_ptr<Array> timestamp_array;

Expand All @@ -81,23 +104,27 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_SECONDS) {

for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);
tm date{};

auto converted_time = values[i];
GetTimeForSecondsSinceEpoch(date, converted_time);

ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
ASSERT_EQ(buffer[i].day, date.tm_mday);
ASSERT_EQ(buffer[i].hour, date.tm_hour);
ASSERT_EQ(buffer[i].minute, date.tm_min);
ASSERT_EQ(buffer[i].second, date.tm_sec);
ASSERT_EQ(buffer[i].year, expected[i].year);
ASSERT_EQ(buffer[i].month, expected[i].month);
ASSERT_EQ(buffer[i].day, expected[i].day);
ASSERT_EQ(buffer[i].hour, expected[i].hour);
ASSERT_EQ(buffer[i].minute, expected[i].minute);
ASSERT_EQ(buffer[i].second, expected[i].second);
ASSERT_EQ(buffer[i].fraction, 0);
}
}

TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MICRO) {
std::vector<int64_t> values = {86400000000, 1649793238000000};
std::vector<int64_t> values = {0, 86400000000, 1649793238000000, -86399999999, -86399000001};
std::vector<TIMESTAMP_STRUCT> expected = {
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
{1970, 1, 1, 0, 0, 0, 0},
{1970, 1, 2, 0, 0, 0, 0},
{2022, 4, 12, 19, 53, 58, 0},
{1969, 12, 31, 0, 0, 0, 1000},
{1969, 12, 31, 0, 0, 0, 999999000},
};

std::shared_ptr<Array> timestamp_array;

Expand All @@ -120,24 +147,31 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MICRO) {
for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);

tm date{};

auto converted_time = values[i] / MICRO_TO_SECONDS_DIVISOR;
GetTimeForSecondsSinceEpoch(date, converted_time);

ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
ASSERT_EQ(buffer[i].day, date.tm_mday);
ASSERT_EQ(buffer[i].hour, date.tm_hour);
ASSERT_EQ(buffer[i].minute, date.tm_min);
ASSERT_EQ(buffer[i].second, date.tm_sec);
constexpr uint32_t MICROS_PER_NANO = 1000;
ASSERT_EQ(buffer[i].fraction, (values[i] % MICRO_TO_SECONDS_DIVISOR) * MICROS_PER_NANO);
ASSERT_EQ(buffer[i].year, expected[i].year);
ASSERT_EQ(buffer[i].month, expected[i].month);
ASSERT_EQ(buffer[i].day, expected[i].day);
ASSERT_EQ(buffer[i].hour, expected[i].hour);
ASSERT_EQ(buffer[i].minute, expected[i].minute);
ASSERT_EQ(buffer[i].second, expected[i].second);
ASSERT_EQ(buffer[i].fraction, expected[i].fraction);
}
}

TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_NANO) {
std::vector<int64_t> values = {86400000010000, 1649793238000000000};
std::vector<int64_t> values = {86400000010000, 1649793238000000000, -86399999999999, -86399000000001,
86400000000001, 86400999999999, 0, -9223372036000000001};
std::vector<TIMESTAMP_STRUCT> expected = {
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
{1970, 1, 2, 0, 0, 0, 10000},
{2022, 4, 12, 19, 53, 58, 0},
{1969, 12, 31, 0, 0, 0, 1},
{1969, 12, 31, 0, 0, 0, 999999999},
{1970, 1, 2, 0, 0, 0, 1},
{1970, 1, 2, 0, 0, 0, 999999999},
{1970, 1, 1, 0, 0, 0, 0},
/* Test within range where floor (seconds) value is below INT64_MIN in nanoseconds */
{1677, 9, 21, 0, 12, 43, 999999999},
};

std::shared_ptr<Array> timestamp_array;

Expand All @@ -159,19 +193,16 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_NANO) {

for (size_t i = 0; i < values.size(); ++i) {
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);
tm date{};

auto converted_time = values[i] / NANO_TO_SECONDS_DIVISOR;
GetTimeForSecondsSinceEpoch(date, converted_time);

ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
ASSERT_EQ(buffer[i].day, date.tm_mday);
ASSERT_EQ(buffer[i].hour, date.tm_hour);
ASSERT_EQ(buffer[i].minute, date.tm_min);
ASSERT_EQ(buffer[i].second, date.tm_sec);
ASSERT_EQ(buffer[i].fraction, (values[i] % NANO_TO_SECONDS_DIVISOR));

ASSERT_EQ(buffer[i].year, expected[i].year);
ASSERT_EQ(buffer[i].month, expected[i].month);
ASSERT_EQ(buffer[i].day, expected[i].day);
ASSERT_EQ(buffer[i].hour, expected[i].hour);
ASSERT_EQ(buffer[i].minute, expected[i].minute);
ASSERT_EQ(buffer[i].second, expected[i].second);
ASSERT_EQ(buffer[i].fraction, expected[i].fraction);
}
}

} // namespace flight_sql
} // namespace driver
Loading

0 comments on commit 42aa3df

Please sign in to comment.