Skip to content

Commit

Permalink
feat: add Decimal32/Decimal64 support (#683)
Browse files Browse the repository at this point in the history
Initial implementation of Decimal32/Decimal64 support in nanoarrow.
  • Loading branch information
zeroshade authored Nov 20, 2024
1 parent 253b7ec commit e54b7df
Show file tree
Hide file tree
Showing 16 changed files with 482 additions and 43 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-and-test-device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ jobs:
if: steps.cache-arrow-build.outputs.cache-hit != 'true'
shell: bash
run: |
ci/scripts/build-arrow-cpp-minimal.sh 15.0.2 arrow
ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow
- name: Build
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-and-test-ipc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ jobs:
if: steps.cache-arrow-build.outputs.cache-hit != 'true'
shell: bash
run: |
ci/scripts/build-arrow-cpp-minimal.sh 15.0.2 arrow
ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow
- name: Build
run: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build-and-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
if: steps.cache-arrow-build.outputs.cache-hit != 'true'
shell: bash
run: |
ci/scripts/build-arrow-cpp-minimal.sh 15.0.2 arrow
ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow
- name: Build nanoarrow
run: |
Expand Down Expand Up @@ -154,7 +154,7 @@ jobs:
if: steps.cache-arrow-build.outputs.cache-hit != 'true'
shell: bash
run: |
ci/scripts/build-arrow-cpp-minimal.sh 16.0.0 arrow
ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow
- name: Run meson testing script
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/clang-tidy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
if: steps.cache-arrow-build.outputs.cache-hit != 'true'
shell: bash
run: |
ci/scripts/build-arrow-cpp-minimal.sh 15.0.2 arrow
ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow
- name: Build nanoarrow
run: |
Expand Down
2 changes: 1 addition & 1 deletion ci/docker/alpine.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ RUN apk add bash linux-headers git cmake R R-dev g++ gfortran gnupg curl py3-vir

# For Arrow C++
COPY ci/scripts/build-arrow-cpp-minimal.sh /
RUN /build-arrow-cpp-minimal.sh 15.0.2 /arrow
RUN /build-arrow-cpp-minimal.sh 18.0.0 /arrow

# There's a missing define that numpy's build needs on s390x and there is no wheel
RUN (grep -e "S390" /usr/include/bits/hwcap.h && echo "#define HWCAP_S390_VX HWCAP_S390_VXRS" >> /usr/include/bits/hwcap.h) || true
Expand Down
2 changes: 2 additions & 0 deletions src/nanoarrow/common/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
case NANOARROW_TYPE_HALF_FLOAT:
case NANOARROW_TYPE_FLOAT:
case NANOARROW_TYPE_DOUBLE:
case NANOARROW_TYPE_DECIMAL32:
case NANOARROW_TYPE_DECIMAL64:
case NANOARROW_TYPE_DECIMAL128:
case NANOARROW_TYPE_DECIMAL256:
case NANOARROW_TYPE_INTERVAL_MONTHS:
Expand Down
164 changes: 164 additions & 0 deletions src/nanoarrow/common/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,94 @@ TEST(ArrayTest, ArrayTestAppendToIntervalArrayMonthDayNano) {
#endif
}

TEST(ArrayTest, ArrayTestAppendToDecimal32Array) {
struct ArrowArray array;
struct ArrowDecimal decimal;

ArrowDecimalInit(&decimal, 32, 8, 3);
ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_DECIMAL32), NANOARROW_OK);
EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);

ArrowDecimalSetInt(&decimal, 12345);
EXPECT_EQ(ArrowArrayAppendDecimal(&array, &decimal), NANOARROW_OK);

EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);

ArrowDecimalSetInt(&decimal, -67890);
EXPECT_EQ(ArrowArrayAppendDecimal(&array, &decimal), NANOARROW_OK);

EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(array.length, 4);
EXPECT_EQ(array.null_count, 2);
auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
auto data_buffer = reinterpret_cast<const uint8_t*>(array.buffers[1]);
EXPECT_EQ(validity_buffer[0], 0b00001001);

ArrowDecimalSetInt(&decimal, 12345);
EXPECT_EQ(memcmp(data_buffer, decimal.words, 4), 0);
ArrowDecimalSetInt(&decimal, -67890);
EXPECT_EQ(memcmp(data_buffer + 3 * 4, decimal.words, 4), 0);

#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18
auto arrow_array = ImportArray(&array, decimal32(8, 3));
ARROW_EXPECT_OK(arrow_array);

auto builder = Decimal32Builder(decimal32(8, 3));
ARROW_EXPECT_OK(builder.Append(*Decimal32::FromString("12.345")));
ARROW_EXPECT_OK(builder.AppendNulls(2));
ARROW_EXPECT_OK(builder.Append(*Decimal32::FromString("-67.890")));
auto expected_array = builder.Finish();

EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe()));
#else
ArrowArrayRelease(&array);
#endif
}

TEST(ArrayTest, ArrayTestAppendToDecimal64Array) {
struct ArrowArray array;
struct ArrowDecimal decimal;

ArrowDecimalInit(&decimal, 64, 10, 3);
ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_DECIMAL64), NANOARROW_OK);
EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);

ArrowDecimalSetInt(&decimal, 12345);
EXPECT_EQ(ArrowArrayAppendDecimal(&array, &decimal), NANOARROW_OK);

EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);

ArrowDecimalSetInt(&decimal, -67890);
EXPECT_EQ(ArrowArrayAppendDecimal(&array, &decimal), NANOARROW_OK);

EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(array.length, 4);
EXPECT_EQ(array.null_count, 2);
auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
auto data_buffer = reinterpret_cast<const uint8_t*>(array.buffers[1]);
EXPECT_EQ(validity_buffer[0], 0b00001001);

ArrowDecimalSetInt(&decimal, 12345);
EXPECT_EQ(memcmp(data_buffer, decimal.words, 8), 0);
ArrowDecimalSetInt(&decimal, -67890);
EXPECT_EQ(memcmp(data_buffer + 3 * 8, decimal.words, 8), 0);

#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18
auto arrow_array = ImportArray(&array, decimal64(10, 3));
ARROW_EXPECT_OK(arrow_array);

auto builder = Decimal64Builder(decimal64(10, 3));
ARROW_EXPECT_OK(builder.Append(*Decimal64::FromString("12.345")));
ARROW_EXPECT_OK(builder.AppendNulls(2));
ARROW_EXPECT_OK(builder.Append(*Decimal64::FromString("-67.890")));
auto expected_array = builder.Finish();

EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe()));
#else
ArrowArrayRelease(&array);
#endif
}

TEST(ArrayTest, ArrayTestAppendToDecimal128Array) {
struct ArrowArray array;
struct ArrowDecimal decimal;
Expand Down Expand Up @@ -3821,6 +3909,82 @@ TEST(ArrayViewTest, ArrayViewTestGetIntervalMonthDayNano) {
ArrowArrayRelease(&array);
}

#if ARROW_VERSION_MAJOR >= 18
TEST(ArrayViewTest, ArrayViewTestGetDecimal32) {
struct ArrowArray array;
struct ArrowSchema schema;
struct ArrowArrayView array_view;
struct ArrowError error;

auto type = decimal32(8, 3);

// Array with nulls
auto builder = Decimal32Builder(type);
ARROW_EXPECT_OK(builder.Append(*Decimal32::FromReal(1.234, 8, 3)));
ARROW_EXPECT_OK(builder.AppendNulls(2));
ARROW_EXPECT_OK(builder.Append(*Decimal32::FromReal(-5.678, 8, 3)));
auto maybe_arrow_array = builder.Finish();
ARROW_EXPECT_OK(maybe_arrow_array);
auto arrow_array = maybe_arrow_array.ValueUnsafe();

ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewValidate(&array_view, NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);

ArrowDecimal decimal;
ArrowDecimalInit(&decimal, 32, 8, 3);

ArrowArrayViewGetDecimalUnsafe(&array_view, 0, &decimal);
EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), 1234);

ArrowArrayViewGetDecimalUnsafe(&array_view, 3, &decimal);
EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), -5678);

ArrowArrayViewReset(&array_view);
ArrowSchemaRelease(&schema);
ArrowArrayRelease(&array);
}

TEST(ArrayViewTest, ArrayViewTestGetDecimal64) {
struct ArrowArray array;
struct ArrowSchema schema;
struct ArrowArrayView array_view;
struct ArrowError error;

auto type = decimal64(10, 3);

// Array with nulls
auto builder = Decimal64Builder(type);
ARROW_EXPECT_OK(builder.Append(*Decimal64::FromReal(1.234, 10, 3)));
ARROW_EXPECT_OK(builder.AppendNulls(2));
ARROW_EXPECT_OK(builder.Append(*Decimal64::FromReal(-5.678, 10, 3)));
auto maybe_arrow_array = builder.Finish();
ARROW_EXPECT_OK(maybe_arrow_array);
auto arrow_array = maybe_arrow_array.ValueUnsafe();

ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewValidate(&array_view, NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);

ArrowDecimal decimal;
ArrowDecimalInit(&decimal, 64, 10, 3);

ArrowArrayViewGetDecimalUnsafe(&array_view, 0, &decimal);
EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), 1234);

ArrowArrayViewGetDecimalUnsafe(&array_view, 3, &decimal);
EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), -5678);

ArrowArrayViewReset(&array_view);
ArrowSchemaRelease(&schema);
ArrowArrayRelease(&array);
}
#endif

TEST(ArrayViewTest, ArrayViewTestGetDecimal128) {
struct ArrowArray array;
struct ArrowSchema schema;
Expand Down
22 changes: 22 additions & 0 deletions src/nanoarrow/common/inline_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,22 @@ static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array,
struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);

switch (private_data->storage_type) {
case NANOARROW_TYPE_DECIMAL32:
if (value->n_words != 0) {
return EINVAL;
} else {
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppend(data_buffer, value->words, sizeof(uint32_t)));
break;
}
case NANOARROW_TYPE_DECIMAL64:
if (value->n_words != 1) {
return EINVAL;
} else {
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppend(data_buffer, value->words, sizeof(uint64_t)));
break;
}
case NANOARROW_TYPE_DECIMAL128:
if (value->n_words != 2) {
return EINVAL;
Expand Down Expand Up @@ -1267,6 +1283,12 @@ static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView* a
i += array_view->offset;
const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8;
switch (array_view->storage_type) {
case NANOARROW_TYPE_DECIMAL32:
ArrowDecimalSetBytes(out, data_view + (i * 4));
break;
case NANOARROW_TYPE_DECIMAL64:
ArrowDecimalSetBytes(out, data_view + (i * 8));
break;
case NANOARROW_TYPE_DECIMAL128:
ArrowDecimalSetBytes(out, data_view + (i * 16));
break;
Expand Down
Loading

0 comments on commit e54b7df

Please sign in to comment.