Skip to content

Commit 42aa3df

Browse files
AntropoviPaul Nienaber
andauthored
DX-90582: move 3 commits from gerrit (#4)
Moving old commits with history --------- Co-authored-by: Paul Nienaber <[email protected]>
1 parent 156ac3d commit 42aa3df

File tree

6 files changed

+159
-102
lines changed

6 files changed

+159
-102
lines changed

flight_sql/CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ if (MSVC)
7070
-DCMAKE_DEPENDS_USE_COMPILER=FALSE
7171
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install
7272
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
73-
${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow/cpp
7473
)
7574
elseif(APPLE)
7675
set(ARROW_CMAKE_ARGS
@@ -90,7 +89,6 @@ elseif(APPLE)
9089
-DCMAKE_DEPENDS_USE_COMPILER=FALSE
9190
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install
9291
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
93-
${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow/cpp
9492
)
9593
if (DEFINED CMAKE_TOOLCHAIN_FILE)
9694
list(APPEND ARROW_CMAKE_ARGS -DARROW_DEPENDENCY_SOURCE=VCPKG)
@@ -109,7 +107,6 @@ else()
109107
-DCMAKE_DEPENDS_USE_COMPILER=FALSE
110108
-DOPENSSL_INCLUDE_DIR=${OPENSSL_INCLUDE_DIR}
111109
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install
112-
${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow/cpp
113110
)
114111
endif()
115112

@@ -120,6 +117,7 @@ message("Using Arrow from ${ARROW_GIT_REPOSITORY} on tag ${ARROW_GIT_TAG}")
120117
ExternalProject_Add(ApacheArrow
121118
GIT_REPOSITORY ${ARROW_GIT_REPOSITORY}
122119
GIT_TAG ${ARROW_GIT_TAG}
120+
SOURCE_SUBDIR "cpp"
123121
CMAKE_ARGS ${ARROW_CMAKE_ARGS})
124122

125123
include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install/include)

flight_sql/accessors/date_array_accessor_test.cc

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,23 @@ using namespace arrow;
1717
using namespace odbcabstraction;
1818

1919
TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) {
20-
std::vector<int32_t> values = {7589, 12320, 18980, 19095};
20+
std::vector<int32_t> values = {7589, 12320, 18980, 19095, -1, 0};
21+
std::vector<DATE_STRUCT> expected = {
22+
{1990, 10, 12},
23+
{2003, 9, 25},
24+
{2021, 12, 19},
25+
{2022, 4, 13},
26+
{1969, 12, 31},
27+
{1970, 1, 1},
28+
};
2129

2230
std::shared_ptr<Array> array;
2331
ArrayFromVector<Date32Type, int32_t>(values, &array);
2432

2533
DateArrayFlightSqlAccessor<CDataType_DATE, Date32Array> accessor(
2634
dynamic_cast<NumericArray<Date32Type> *>(array.get()));
2735

28-
std::vector<tagDATE_STRUCT> buffer(values.size());
36+
std::vector<DATE_STRUCT> buffer(values.size());
2937
std::vector<ssize_t> strlen_buffer(values.size());
3038

3139
ColumnBinding binding(CDataType_DATE, 0, 0, buffer.data(), 0, strlen_buffer.data());
@@ -37,27 +45,39 @@ TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) {
3745

3846
for (size_t i = 0; i < values.size(); ++i) {
3947
ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]);
40-
tm date{};
4148

42-
int64_t converted_time = values[i] * 86400;
43-
GetTimeForSecondsSinceEpoch(date, converted_time);
44-
ASSERT_EQ((date.tm_year + 1900), buffer[i].year);
45-
ASSERT_EQ(date.tm_mon + 1, buffer[i].month);
46-
ASSERT_EQ(date.tm_mday, buffer[i].day);
49+
ASSERT_EQ(expected[i].year, buffer[i].year);
50+
ASSERT_EQ(expected[i].month, buffer[i].month);
51+
ASSERT_EQ(expected[i].day, buffer[i].day);
4752
}
4853
}
4954

5055
TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) {
51-
std::vector<int64_t> values = {86400000, 172800000, 259200000, 1649793238110,
52-
345600000, 432000000, 518400000};
56+
std::vector<int64_t> values = {86400000, 172800000, 259200000, 1649793238110, 0,
57+
345600000, 432000000, 518400000, -86400000, -17987443200000};
58+
std::vector<DATE_STRUCT> expected = {
59+
/* year(16), month(u16), day(u16) */
60+
{1970, 1, 2},
61+
{1970, 1, 3},
62+
{1970, 1, 4},
63+
{2022, 4, 12},
64+
{1970, 1, 1},
65+
{1970, 1, 5},
66+
{1970, 1, 6},
67+
{1970, 1, 7},
68+
{1969, 12, 31},
69+
// This is the documented lower limit of supported Gregorian dates for some parts of Boost,
70+
// however boost::posix_time may go lower?
71+
{1400, 1, 1},
72+
};
5373

5474
std::shared_ptr<Array> array;
5575
ArrayFromVector<Date64Type, int64_t>(values, &array);
5676

5777
DateArrayFlightSqlAccessor<CDataType_DATE, Date64Array> accessor(
5878
dynamic_cast<NumericArray<Date64Type> *>(array.get()));
5979

60-
std::vector<tagDATE_STRUCT> buffer(values.size());
80+
std::vector<DATE_STRUCT> buffer(values.size());
6181
std::vector<ssize_t> strlen_buffer(values.size());
6282

6383
ColumnBinding binding(CDataType_DATE, 0, 0, buffer.data(), 0, strlen_buffer.data());
@@ -71,11 +91,9 @@ TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) {
7191
ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]);
7292
tm date{};
7393

74-
int64_t converted_time = values[i] / 1000;
75-
GetTimeForSecondsSinceEpoch(date, converted_time);
76-
ASSERT_EQ((date.tm_year + 1900), buffer[i].year);
77-
ASSERT_EQ(date.tm_mon + 1, buffer[i].month);
78-
ASSERT_EQ(date.tm_mday, buffer[i].day);
94+
ASSERT_EQ(expected[i].year, buffer[i].year);
95+
ASSERT_EQ(expected[i].month, buffer[i].month);
96+
ASSERT_EQ(expected[i].day, buffer[i].day);
7997
}
8098
}
8199

flight_sql/accessors/timestamp_array_accessor.cc

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@
77
#include "timestamp_array_accessor.h"
88
#include "odbcabstraction/calendar_utils.h"
99

10+
#include <cmath>
11+
#include <limits>
12+
1013
using namespace arrow;
1114

1215
namespace {
13-
int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) {
16+
inline int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) {
1417
int64_t divisor = 1;
1518
switch (unit) {
1619
case TimeUnit::SECOND:
@@ -32,27 +35,25 @@ int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) {
3235
return divisor;
3336
}
3437

35-
uint32_t CalculateFraction(TimeUnit::type unit, uint64_t units_since_epoch) {
38+
uint32_t CalculateFraction(TimeUnit::type unit, int64_t units_since_epoch) {
3639
// Convert the given remainder and time unit to nanoseconds
3740
// since the fraction field on TIMESTAMP_STRUCT is in nanoseconds.
38-
switch (unit) {
39-
case TimeUnit::SECOND:
41+
if (unit == TimeUnit::SECOND)
4042
return 0;
41-
case TimeUnit::MILLI:
42-
// 1000000 nanoseconds = 1 millisecond.
43-
return (units_since_epoch %
44-
driver::odbcabstraction::MILLI_TO_SECONDS_DIVISOR) *
45-
1000000;
46-
case TimeUnit::MICRO:
47-
// 1000 nanoseconds = 1 microsecond.
48-
return (units_since_epoch %
49-
driver::odbcabstraction::MICRO_TO_SECONDS_DIVISOR) * 1000;
50-
case TimeUnit::NANO:
51-
// 1000 nanoseconds = 1 microsecond.
52-
return (units_since_epoch %
53-
driver::odbcabstraction::NANO_TO_SECONDS_DIVISOR);
54-
}
55-
return 0;
43+
44+
const int64_t divisor = GetConversionToSecondsDivisor(unit);
45+
const int64_t nano_divisor = GetConversionToSecondsDivisor(TimeUnit::NANO);
46+
47+
if (units_since_epoch < 0)
48+
if (units_since_epoch <= (std::numeric_limits<decltype(units_since_epoch)>::min() + divisor))
49+
// Prevent trying to derive and add a value larger than INT64_MAX (i.e. the time value at the start of
50+
// the second which is used to shift the value positive before the modulo operation)) in next statement.
51+
units_since_epoch += divisor;
52+
// See below regarding floor division; here we want ceiling division.
53+
// FIXME this goes poorly (trying to use a value > INT64_MAX when units_since_epoch is
54+
// less than the smallest multiple of divisor greater than INT64_MIN.
55+
units_since_epoch += divisor * std::abs((units_since_epoch - (divisor - 1)) / divisor);
56+
return (units_since_epoch % divisor) * (nano_divisor / divisor);
5657
}
5758
} // namespace
5859

@@ -72,11 +73,24 @@ TimestampArrayFlightSqlAccessor<TARGET_TYPE, UNIT>::MoveSingleCell_impl(
7273
ColumnBinding *binding, int64_t arrow_row, int64_t cell_counter,
7374
int64_t &value_offset, bool update_value_offset,
7475
odbcabstraction::Diagnostics &diagnostics) {
76+
// Times less than the minimum integer number of seconds that can be represented
77+
// for each time unit will not convert correctly. This is mostly interesting for
78+
// nanoseconds as timestamps in other units are outside of the accepted range of
79+
// Gregorian dates.
7580
auto *buffer = static_cast<TIMESTAMP_STRUCT *>(binding->buffer);
7681

7782
int64_t value = this->GetArray()->Value(arrow_row);
7883
const auto divisor = GetConversionToSecondsDivisor(UNIT);
79-
const auto converted_result_seconds = value / divisor;
84+
const auto converted_result_seconds =
85+
// We want floor division here; C++ will round towards zero
86+
(value < 0)
87+
// Floor division: Shift all "fractional" (not a multiple of divisor) values so they round towards
88+
// zero (and to the same value) along with the "floor" less than them, then add 1 to get back to
89+
// the floor. Althernative we could shift negatively by (divisor - 1) but this breaks near
90+
// INT64_MIN causing underflow..
91+
? ((value + 1) / divisor) - 1
92+
// Towards zero is already floor
93+
: value / divisor;
8094
tm timestamp = {0};
8195

8296
GetTimeForSecondsSinceEpoch(timestamp, converted_result_seconds);

flight_sql/accessors/timestamp_array_accessor_test.cc

Lines changed: 86 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,27 @@ using namespace arrow;
1717
using namespace odbcabstraction;
1818

1919
TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) {
20-
std::vector<int64_t> values = {86400370, 172800000, 259200000, 1649793238110LL,
21-
345600000, 432000000, 518400000};
22-
20+
std::vector<int64_t> values = {86400370, 172800000, 259200000, 1649793238110LL, 345600000,
21+
432000000, 518400000, -86399000, 0, -86399999, -86399001,
22+
86400001, 86400999};
23+
std::vector<TIMESTAMP_STRUCT> expected = {
24+
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
25+
{1970, 1, 2, 0, 0, 0, 370000000},
26+
{1970, 1, 3, 0, 0, 0, 0},
27+
{1970, 1, 4, 0, 0, 0, 0},
28+
{2022, 4, 12, 19, 53, 58, 110000000},
29+
{1970, 1, 5, 0, 0, 0, 0},
30+
{1970, 1, 6, 0, 0, 0, 0},
31+
{1970, 1, 7, 0, 0, 0, 0},
32+
{1969, 12, 31, 0, 0, 1, 0},
33+
{1970, 1, 1, 0, 0, 0, 0},
34+
/* Tests both ends of the fraction rounding range to ensure we don't tip the wrong way */
35+
{1969, 12, 31, 0, 0, 0, 1000000},
36+
{1969, 12, 31, 0, 0, 0, 999000000},
37+
{1970, 1, 2, 0, 0, 0, 1000000},
38+
{1970, 1, 2, 0, 0, 0, 999000000},
39+
};
40+
2341
std::shared_ptr<Array> timestamp_array;
2442

2543
auto timestamp_field = field("timestamp_field", timestamp(TimeUnit::MILLI));
@@ -40,26 +58,31 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) {
4058
for (size_t i = 0; i < values.size(); ++i) {
4159
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);
4260

43-
tm date{};
44-
45-
auto converted_time = values[i] / MILLI_TO_SECONDS_DIVISOR;
46-
GetTimeForSecondsSinceEpoch(date, converted_time);
47-
48-
ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
49-
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
50-
ASSERT_EQ(buffer[i].day, date.tm_mday);
51-
ASSERT_EQ(buffer[i].hour, date.tm_hour);
52-
ASSERT_EQ(buffer[i].minute, date.tm_min);
53-
ASSERT_EQ(buffer[i].second, date.tm_sec);
54-
55-
constexpr uint32_t NANOSECONDS_PER_MILLI = 1000000;
56-
ASSERT_EQ(buffer[i].fraction, (values[i] % MILLI_TO_SECONDS_DIVISOR) * NANOSECONDS_PER_MILLI);
61+
ASSERT_EQ(buffer[i].year, expected[i].year);
62+
ASSERT_EQ(buffer[i].month, expected[i].month);
63+
ASSERT_EQ(buffer[i].day, expected[i].day);
64+
ASSERT_EQ(buffer[i].hour, expected[i].hour);
65+
ASSERT_EQ(buffer[i].minute, expected[i].minute);
66+
ASSERT_EQ(buffer[i].second, expected[i].second);
67+
ASSERT_EQ(buffer[i].fraction, expected[i].fraction);
5768
}
5869
}
5970

6071
TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_SECONDS) {
6172
std::vector<int64_t> values = {86400, 172800, 259200, 1649793238,
62-
345600, 432000, 518400};
73+
345600, 432000, 518400, -86399, 0};
74+
std::vector<TIMESTAMP_STRUCT> expected = {
75+
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
76+
{1970, 1, 2, 0, 0, 0, 0},
77+
{1970, 1, 3, 0, 0, 0, 0},
78+
{1970, 1, 4, 0, 0, 0, 0},
79+
{2022, 4, 12, 19, 53, 58, 0},
80+
{1970, 1, 5, 0, 0, 0, 0},
81+
{1970, 1, 6, 0, 0, 0, 0},
82+
{1970, 1, 7, 0, 0, 0, 0},
83+
{1969, 12, 31, 0, 0, 1, 0},
84+
{1970, 1, 1, 0, 0, 0, 0},
85+
};
6386

6487
std::shared_ptr<Array> timestamp_array;
6588

@@ -81,23 +104,27 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_SECONDS) {
81104

82105
for (size_t i = 0; i < values.size(); ++i) {
83106
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);
84-
tm date{};
85-
86-
auto converted_time = values[i];
87-
GetTimeForSecondsSinceEpoch(date, converted_time);
88107

89-
ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
90-
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
91-
ASSERT_EQ(buffer[i].day, date.tm_mday);
92-
ASSERT_EQ(buffer[i].hour, date.tm_hour);
93-
ASSERT_EQ(buffer[i].minute, date.tm_min);
94-
ASSERT_EQ(buffer[i].second, date.tm_sec);
108+
ASSERT_EQ(buffer[i].year, expected[i].year);
109+
ASSERT_EQ(buffer[i].month, expected[i].month);
110+
ASSERT_EQ(buffer[i].day, expected[i].day);
111+
ASSERT_EQ(buffer[i].hour, expected[i].hour);
112+
ASSERT_EQ(buffer[i].minute, expected[i].minute);
113+
ASSERT_EQ(buffer[i].second, expected[i].second);
95114
ASSERT_EQ(buffer[i].fraction, 0);
96115
}
97116
}
98117

99118
TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MICRO) {
100-
std::vector<int64_t> values = {86400000000, 1649793238000000};
119+
std::vector<int64_t> values = {0, 86400000000, 1649793238000000, -86399999999, -86399000001};
120+
std::vector<TIMESTAMP_STRUCT> expected = {
121+
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
122+
{1970, 1, 1, 0, 0, 0, 0},
123+
{1970, 1, 2, 0, 0, 0, 0},
124+
{2022, 4, 12, 19, 53, 58, 0},
125+
{1969, 12, 31, 0, 0, 0, 1000},
126+
{1969, 12, 31, 0, 0, 0, 999999000},
127+
};
101128

102129
std::shared_ptr<Array> timestamp_array;
103130

@@ -120,24 +147,31 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MICRO) {
120147
for (size_t i = 0; i < values.size(); ++i) {
121148
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);
122149

123-
tm date{};
124-
125-
auto converted_time = values[i] / MICRO_TO_SECONDS_DIVISOR;
126-
GetTimeForSecondsSinceEpoch(date, converted_time);
127-
128-
ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
129-
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
130-
ASSERT_EQ(buffer[i].day, date.tm_mday);
131-
ASSERT_EQ(buffer[i].hour, date.tm_hour);
132-
ASSERT_EQ(buffer[i].minute, date.tm_min);
133-
ASSERT_EQ(buffer[i].second, date.tm_sec);
134-
constexpr uint32_t MICROS_PER_NANO = 1000;
135-
ASSERT_EQ(buffer[i].fraction, (values[i] % MICRO_TO_SECONDS_DIVISOR) * MICROS_PER_NANO);
150+
ASSERT_EQ(buffer[i].year, expected[i].year);
151+
ASSERT_EQ(buffer[i].month, expected[i].month);
152+
ASSERT_EQ(buffer[i].day, expected[i].day);
153+
ASSERT_EQ(buffer[i].hour, expected[i].hour);
154+
ASSERT_EQ(buffer[i].minute, expected[i].minute);
155+
ASSERT_EQ(buffer[i].second, expected[i].second);
156+
ASSERT_EQ(buffer[i].fraction, expected[i].fraction);
136157
}
137158
}
138159

139160
TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_NANO) {
140-
std::vector<int64_t> values = {86400000010000, 1649793238000000000};
161+
std::vector<int64_t> values = {86400000010000, 1649793238000000000, -86399999999999, -86399000000001,
162+
86400000000001, 86400999999999, 0, -9223372036000000001};
163+
std::vector<TIMESTAMP_STRUCT> expected = {
164+
/* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */
165+
{1970, 1, 2, 0, 0, 0, 10000},
166+
{2022, 4, 12, 19, 53, 58, 0},
167+
{1969, 12, 31, 0, 0, 0, 1},
168+
{1969, 12, 31, 0, 0, 0, 999999999},
169+
{1970, 1, 2, 0, 0, 0, 1},
170+
{1970, 1, 2, 0, 0, 0, 999999999},
171+
{1970, 1, 1, 0, 0, 0, 0},
172+
/* Test within range where floor (seconds) value is below INT64_MIN in nanoseconds */
173+
{1677, 9, 21, 0, 12, 43, 999999999},
174+
};
141175

142176
std::shared_ptr<Array> timestamp_array;
143177

@@ -159,19 +193,16 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_NANO) {
159193

160194
for (size_t i = 0; i < values.size(); ++i) {
161195
ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]);
162-
tm date{};
163-
164-
auto converted_time = values[i] / NANO_TO_SECONDS_DIVISOR;
165-
GetTimeForSecondsSinceEpoch(date, converted_time);
166-
167-
ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year));
168-
ASSERT_EQ(buffer[i].month, date.tm_mon + 1);
169-
ASSERT_EQ(buffer[i].day, date.tm_mday);
170-
ASSERT_EQ(buffer[i].hour, date.tm_hour);
171-
ASSERT_EQ(buffer[i].minute, date.tm_min);
172-
ASSERT_EQ(buffer[i].second, date.tm_sec);
173-
ASSERT_EQ(buffer[i].fraction, (values[i] % NANO_TO_SECONDS_DIVISOR));
196+
197+
ASSERT_EQ(buffer[i].year, expected[i].year);
198+
ASSERT_EQ(buffer[i].month, expected[i].month);
199+
ASSERT_EQ(buffer[i].day, expected[i].day);
200+
ASSERT_EQ(buffer[i].hour, expected[i].hour);
201+
ASSERT_EQ(buffer[i].minute, expected[i].minute);
202+
ASSERT_EQ(buffer[i].second, expected[i].second);
203+
ASSERT_EQ(buffer[i].fraction, expected[i].fraction);
174204
}
175205
}
206+
176207
} // namespace flight_sql
177208
} // namespace driver

0 commit comments

Comments
 (0)