diff --git a/.github/workflows/build-and-test-device.yaml b/.github/workflows/build-and-test-device.yaml index 3304d8fb1..7fd2c388f 100644 --- a/.github/workflows/build-and-test-device.yaml +++ b/.github/workflows/build-and-test-device.yaml @@ -85,7 +85,7 @@ jobs: if: steps.cache-arrow-build.outputs.cache-hit != 'true' shell: bash run: | - ci/scripts/build-arrow-cpp-minimal.sh 15.0.2 arrow + ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow - name: Build run: | diff --git a/.github/workflows/build-and-test-ipc.yaml b/.github/workflows/build-and-test-ipc.yaml index cd28908a8..6defa4bd0 100644 --- a/.github/workflows/build-and-test-ipc.yaml +++ b/.github/workflows/build-and-test-ipc.yaml @@ -78,7 +78,7 @@ jobs: if: steps.cache-arrow-build.outputs.cache-hit != 'true' shell: bash run: | - ci/scripts/build-arrow-cpp-minimal.sh 15.0.2 arrow + ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow - name: Build run: | diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index 9432c359a..ab9b8e8e7 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -70,7 +70,7 @@ jobs: if: steps.cache-arrow-build.outputs.cache-hit != 'true' shell: bash run: | - ci/scripts/build-arrow-cpp-minimal.sh 15.0.2 arrow + ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow - name: Build nanoarrow run: | @@ -154,7 +154,7 @@ jobs: if: steps.cache-arrow-build.outputs.cache-hit != 'true' shell: bash run: | - ci/scripts/build-arrow-cpp-minimal.sh 16.0.0 arrow + ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow - name: Run meson testing script run: | diff --git a/.github/workflows/clang-tidy.yaml b/.github/workflows/clang-tidy.yaml index f7fbdfbcb..a5e4d1a32 100644 --- a/.github/workflows/clang-tidy.yaml +++ b/.github/workflows/clang-tidy.yaml @@ -54,7 +54,7 @@ jobs: if: steps.cache-arrow-build.outputs.cache-hit != 'true' shell: bash run: | - ci/scripts/build-arrow-cpp-minimal.sh 15.0.2 arrow + ci/scripts/build-arrow-cpp-minimal.sh 18.0.0 arrow - name: Build nanoarrow run: | diff --git a/ci/docker/alpine.dockerfile b/ci/docker/alpine.dockerfile index 1c9d51374..9aeb0aa78 100644 --- a/ci/docker/alpine.dockerfile +++ b/ci/docker/alpine.dockerfile @@ -23,7 +23,7 @@ RUN apk add bash linux-headers git cmake R R-dev g++ gfortran gnupg curl py3-vir # For Arrow C++ COPY ci/scripts/build-arrow-cpp-minimal.sh / -RUN /build-arrow-cpp-minimal.sh 15.0.2 /arrow +RUN /build-arrow-cpp-minimal.sh 18.0.0 /arrow # There's a missing define that numpy's build needs on s390x and there is no wheel RUN (grep -e "S390" /usr/include/bits/hwcap.h && echo "#define HWCAP_S390_VX HWCAP_S390_VXRS" >> /usr/include/bits/hwcap.h) || true diff --git a/src/nanoarrow/common/array.c b/src/nanoarrow/common/array.c index 3d04d0bc0..53cd4c659 100644 --- a/src/nanoarrow/common/array.c +++ b/src/nanoarrow/common/array.c @@ -104,6 +104,8 @@ static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, case NANOARROW_TYPE_HALF_FLOAT: case NANOARROW_TYPE_FLOAT: case NANOARROW_TYPE_DOUBLE: + case NANOARROW_TYPE_DECIMAL32: + case NANOARROW_TYPE_DECIMAL64: case NANOARROW_TYPE_DECIMAL128: case NANOARROW_TYPE_DECIMAL256: case NANOARROW_TYPE_INTERVAL_MONTHS: diff --git a/src/nanoarrow/common/array_test.cc b/src/nanoarrow/common/array_test.cc index 04d3b1d25..dddc779d8 100644 --- a/src/nanoarrow/common/array_test.cc +++ b/src/nanoarrow/common/array_test.cc @@ -1245,6 +1245,94 @@ TEST(ArrayTest, ArrayTestAppendToIntervalArrayMonthDayNano) { #endif } +TEST(ArrayTest, ArrayTestAppendToDecimal32Array) { + struct ArrowArray array; + struct ArrowDecimal decimal; + + ArrowDecimalInit(&decimal, 32, 8, 3); + ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_DECIMAL32), NANOARROW_OK); + EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK); + + ArrowDecimalSetInt(&decimal, 12345); + EXPECT_EQ(ArrowArrayAppendDecimal(&array, &decimal), NANOARROW_OK); + + EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK); + + ArrowDecimalSetInt(&decimal, -67890); + EXPECT_EQ(ArrowArrayAppendDecimal(&array, &decimal), NANOARROW_OK); + + EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK); + EXPECT_EQ(array.length, 4); + EXPECT_EQ(array.null_count, 2); + auto validity_buffer = reinterpret_cast(array.buffers[0]); + auto data_buffer = reinterpret_cast(array.buffers[1]); + EXPECT_EQ(validity_buffer[0], 0b00001001); + + ArrowDecimalSetInt(&decimal, 12345); + EXPECT_EQ(memcmp(data_buffer, decimal.words, 4), 0); + ArrowDecimalSetInt(&decimal, -67890); + EXPECT_EQ(memcmp(data_buffer + 3 * 4, decimal.words, 4), 0); + +#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18 + auto arrow_array = ImportArray(&array, decimal32(8, 3)); + ARROW_EXPECT_OK(arrow_array); + + auto builder = Decimal32Builder(decimal32(8, 3)); + ARROW_EXPECT_OK(builder.Append(*Decimal32::FromString("12.345"))); + ARROW_EXPECT_OK(builder.AppendNulls(2)); + ARROW_EXPECT_OK(builder.Append(*Decimal32::FromString("-67.890"))); + auto expected_array = builder.Finish(); + + EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe())); +#else + ArrowArrayRelease(&array); +#endif +} + +TEST(ArrayTest, ArrayTestAppendToDecimal64Array) { + struct ArrowArray array; + struct ArrowDecimal decimal; + + ArrowDecimalInit(&decimal, 64, 10, 3); + ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_DECIMAL64), NANOARROW_OK); + EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK); + + ArrowDecimalSetInt(&decimal, 12345); + EXPECT_EQ(ArrowArrayAppendDecimal(&array, &decimal), NANOARROW_OK); + + EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK); + + ArrowDecimalSetInt(&decimal, -67890); + EXPECT_EQ(ArrowArrayAppendDecimal(&array, &decimal), NANOARROW_OK); + + EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK); + EXPECT_EQ(array.length, 4); + EXPECT_EQ(array.null_count, 2); + auto validity_buffer = reinterpret_cast(array.buffers[0]); + auto data_buffer = reinterpret_cast(array.buffers[1]); + EXPECT_EQ(validity_buffer[0], 0b00001001); + + ArrowDecimalSetInt(&decimal, 12345); + EXPECT_EQ(memcmp(data_buffer, decimal.words, 8), 0); + ArrowDecimalSetInt(&decimal, -67890); + EXPECT_EQ(memcmp(data_buffer + 3 * 8, decimal.words, 8), 0); + +#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18 + auto arrow_array = ImportArray(&array, decimal64(10, 3)); + ARROW_EXPECT_OK(arrow_array); + + auto builder = Decimal64Builder(decimal64(10, 3)); + ARROW_EXPECT_OK(builder.Append(*Decimal64::FromString("12.345"))); + ARROW_EXPECT_OK(builder.AppendNulls(2)); + ARROW_EXPECT_OK(builder.Append(*Decimal64::FromString("-67.890"))); + auto expected_array = builder.Finish(); + + EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe())); +#else + ArrowArrayRelease(&array); +#endif +} + TEST(ArrayTest, ArrayTestAppendToDecimal128Array) { struct ArrowArray array; struct ArrowDecimal decimal; @@ -3821,6 +3909,82 @@ TEST(ArrayViewTest, ArrayViewTestGetIntervalMonthDayNano) { ArrowArrayRelease(&array); } +#if ARROW_VERSION_MAJOR >= 18 +TEST(ArrayViewTest, ArrayViewTestGetDecimal32) { + struct ArrowArray array; + struct ArrowSchema schema; + struct ArrowArrayView array_view; + struct ArrowError error; + + auto type = decimal32(8, 3); + + // Array with nulls + auto builder = Decimal32Builder(type); + ARROW_EXPECT_OK(builder.Append(*Decimal32::FromReal(1.234, 8, 3))); + ARROW_EXPECT_OK(builder.AppendNulls(2)); + ARROW_EXPECT_OK(builder.Append(*Decimal32::FromReal(-5.678, 8, 3))); + auto maybe_arrow_array = builder.Finish(); + ARROW_EXPECT_OK(maybe_arrow_array); + auto arrow_array = maybe_arrow_array.ValueUnsafe(); + + ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema)); + ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK); + ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK); + EXPECT_EQ(ArrowArrayViewValidate(&array_view, NANOARROW_VALIDATION_LEVEL_FULL, &error), + NANOARROW_OK); + + ArrowDecimal decimal; + ArrowDecimalInit(&decimal, 32, 8, 3); + + ArrowArrayViewGetDecimalUnsafe(&array_view, 0, &decimal); + EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), 1234); + + ArrowArrayViewGetDecimalUnsafe(&array_view, 3, &decimal); + EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), -5678); + + ArrowArrayViewReset(&array_view); + ArrowSchemaRelease(&schema); + ArrowArrayRelease(&array); +} + +TEST(ArrayViewTest, ArrayViewTestGetDecimal64) { + struct ArrowArray array; + struct ArrowSchema schema; + struct ArrowArrayView array_view; + struct ArrowError error; + + auto type = decimal64(10, 3); + + // Array with nulls + auto builder = Decimal64Builder(type); + ARROW_EXPECT_OK(builder.Append(*Decimal64::FromReal(1.234, 10, 3))); + ARROW_EXPECT_OK(builder.AppendNulls(2)); + ARROW_EXPECT_OK(builder.Append(*Decimal64::FromReal(-5.678, 10, 3))); + auto maybe_arrow_array = builder.Finish(); + ARROW_EXPECT_OK(maybe_arrow_array); + auto arrow_array = maybe_arrow_array.ValueUnsafe(); + + ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema)); + ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK); + ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK); + EXPECT_EQ(ArrowArrayViewValidate(&array_view, NANOARROW_VALIDATION_LEVEL_FULL, &error), + NANOARROW_OK); + + ArrowDecimal decimal; + ArrowDecimalInit(&decimal, 64, 10, 3); + + ArrowArrayViewGetDecimalUnsafe(&array_view, 0, &decimal); + EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), 1234); + + ArrowArrayViewGetDecimalUnsafe(&array_view, 3, &decimal); + EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), -5678); + + ArrowArrayViewReset(&array_view); + ArrowSchemaRelease(&schema); + ArrowArrayRelease(&array); +} +#endif + TEST(ArrayViewTest, ArrayViewTestGetDecimal128) { struct ArrowArray array; struct ArrowSchema schema; diff --git a/src/nanoarrow/common/inline_array.h b/src/nanoarrow/common/inline_array.h index e85228d9e..9fe5e0b70 100644 --- a/src/nanoarrow/common/inline_array.h +++ b/src/nanoarrow/common/inline_array.h @@ -700,6 +700,22 @@ static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array, struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); switch (private_data->storage_type) { + case NANOARROW_TYPE_DECIMAL32: + if (value->n_words != 0) { + return EINVAL; + } else { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value->words, sizeof(uint32_t))); + break; + } + case NANOARROW_TYPE_DECIMAL64: + if (value->n_words != 1) { + return EINVAL; + } else { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value->words, sizeof(uint64_t))); + break; + } case NANOARROW_TYPE_DECIMAL128: if (value->n_words != 2) { return EINVAL; @@ -1267,6 +1283,12 @@ static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView* a i += array_view->offset; const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; switch (array_view->storage_type) { + case NANOARROW_TYPE_DECIMAL32: + ArrowDecimalSetBytes(out, data_view + (i * 4)); + break; + case NANOARROW_TYPE_DECIMAL64: + ArrowDecimalSetBytes(out, data_view + (i * 8)); + break; case NANOARROW_TYPE_DECIMAL128: ArrowDecimalSetBytes(out, data_view + (i * 16)); break; diff --git a/src/nanoarrow/common/inline_types.h b/src/nanoarrow/common/inline_types.h index 0a6026188..d4fdfba97 100644 --- a/src/nanoarrow/common/inline_types.h +++ b/src/nanoarrow/common/inline_types.h @@ -453,7 +453,9 @@ enum ArrowType { NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO, NANOARROW_TYPE_RUN_END_ENCODED, NANOARROW_TYPE_BINARY_VIEW, - NANOARROW_TYPE_STRING_VIEW + NANOARROW_TYPE_STRING_VIEW, + NANOARROW_TYPE_DECIMAL32, + NANOARROW_TYPE_DECIMAL64 }; /// \brief Get a string value of an enum ArrowType value @@ -510,6 +512,10 @@ static inline const char* ArrowTypeString(enum ArrowType type) { return "interval_months"; case NANOARROW_TYPE_INTERVAL_DAY_TIME: return "interval_day_time"; + case NANOARROW_TYPE_DECIMAL32: + return "decimal32"; + case NANOARROW_TYPE_DECIMAL64: + return "decimal64"; case NANOARROW_TYPE_DECIMAL128: return "decimal128"; case NANOARROW_TYPE_DECIMAL256: @@ -890,7 +896,8 @@ static inline void ArrowIntervalInit(struct ArrowInterval* interval, /// values set using ArrowDecimalSetInt(), ArrowDecimalSetBytes128(), /// or ArrowDecimalSetBytes256(). struct ArrowDecimal { - /// \brief An array of 64-bit integers of n_words length defined in native-endian order + /// \brief An array of 64-bit integers of n_words length defined in native-endian order. + /// For a 32-bit decimal value, index 0 will be a 32-bit integer value. uint64_t words[4]; /// \brief The number of significant digits this decimal number can represent @@ -899,7 +906,8 @@ struct ArrowDecimal { /// \brief The number of digits after the decimal point. This can be negative. int32_t scale; - /// \brief The number of words in the words array + /// \brief The number of 64-bit words in the words array. For the special case of a + /// 32-bit decimal value, this will be 0. int n_words; /// \brief Cached value used by the implementation @@ -916,13 +924,14 @@ static inline void ArrowDecimalInit(struct ArrowDecimal* decimal, int32_t bitwid memset(decimal->words, 0, sizeof(decimal->words)); decimal->precision = precision; decimal->scale = scale; + // n_words will be 0 for bitwidth == 32 decimal->n_words = (int)(bitwidth / 8 / sizeof(uint64_t)); if (_ArrowIsLittleEndian()) { decimal->low_word_index = 0; - decimal->high_word_index = decimal->n_words - 1; + decimal->high_word_index = decimal->n_words > 0 ? decimal->n_words - 1 : 0; } else { - decimal->low_word_index = decimal->n_words - 1; + decimal->low_word_index = decimal->n_words > 0 ? decimal->n_words - 1 : 0; decimal->high_word_index = 0; } } @@ -933,6 +942,9 @@ static inline void ArrowDecimalInit(struct ArrowDecimal* decimal, int32_t bitwid /// within the signed 64-bit integer range (A precision less than or equal /// to 18 is sufficiently small). static inline int64_t ArrowDecimalGetIntUnsafe(const struct ArrowDecimal* decimal) { + if (decimal->n_words == 0) { + return (int32_t)decimal->words[0]; + } return (int64_t)decimal->words[decimal->low_word_index]; } @@ -940,18 +952,28 @@ static inline int64_t ArrowDecimalGetIntUnsafe(const struct ArrowDecimal* decima /// \ingroup nanoarrow-utils static inline void ArrowDecimalGetBytes(const struct ArrowDecimal* decimal, uint8_t* out) { - memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t)); + memcpy(out, decimal->words, + (decimal->n_words > 0 ? decimal->n_words : 1) * sizeof(uint64_t)); } /// \brief Returns 1 if the value represented by decimal is >= 0 or -1 otherwise /// \ingroup nanoarrow-utils static inline int64_t ArrowDecimalSign(const struct ArrowDecimal* decimal) { + if (decimal->n_words == 0) { + return 1 | ((int32_t)(decimal->words[0]) >> 31); + } + return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63); } /// \brief Sets the integer value of this decimal /// \ingroup nanoarrow-utils static inline void ArrowDecimalSetInt(struct ArrowDecimal* decimal, int64_t value) { + if (decimal->n_words == 0) { + decimal->words[0] = (int32_t)value; + return; + } + if (value < 0) { memset(decimal->words, 0xff, decimal->n_words * sizeof(uint64_t)); } else { @@ -964,6 +986,13 @@ static inline void ArrowDecimalSetInt(struct ArrowDecimal* decimal, int64_t valu /// \brief Negate the value of this decimal in place /// \ingroup nanoarrow-utils static inline void ArrowDecimalNegate(struct ArrowDecimal* decimal) { + if (decimal->n_words == 0) { + uint32_t elem = (uint32_t)decimal->words[0]; + elem = ~elem + 1; + decimal->words[0] = (int32_t)elem; + return; + } + uint64_t carry = 1; if (decimal->low_word_index == 0) { @@ -987,7 +1016,11 @@ static inline void ArrowDecimalNegate(struct ArrowDecimal* decimal) { /// \ingroup nanoarrow-utils static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal, const uint8_t* value) { - memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t)); + if (decimal->n_words == 0) { + memcpy(decimal->words, value, sizeof(int32_t)); + } else { + memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t)); + } } #ifdef __cplusplus diff --git a/src/nanoarrow/common/schema.c b/src/nanoarrow/common/schema.c index 28fb33803..b0e538c08 100644 --- a/src/nanoarrow/common/schema.c +++ b/src/nanoarrow/common/schema.c @@ -271,6 +271,14 @@ ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowT char buffer[64]; int n_chars; switch (type) { + case NANOARROW_TYPE_DECIMAL32: + n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,32", decimal_precision, + decimal_scale); + break; + case NANOARROW_TYPE_DECIMAL64: + n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,64", decimal_precision, + decimal_scale); + break; case NANOARROW_TYPE_DECIMAL128: n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d", decimal_precision, decimal_scale); @@ -721,6 +729,12 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view, *format_end_out = parse_end; switch (schema_view->decimal_bitwidth) { + case 32: + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL32); + return NANOARROW_OK; + case 64: + ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL64); + return NANOARROW_OK; case 128: ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL128); return NANOARROW_OK; @@ -1157,6 +1171,8 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_vie case NANOARROW_TYPE_HALF_FLOAT: case NANOARROW_TYPE_FLOAT: case NANOARROW_TYPE_DOUBLE: + case NANOARROW_TYPE_DECIMAL32: + case NANOARROW_TYPE_DECIMAL64: case NANOARROW_TYPE_DECIMAL128: case NANOARROW_TYPE_DECIMAL256: case NANOARROW_TYPE_STRING: @@ -1316,6 +1332,8 @@ static int64_t ArrowSchemaTypeToStringInternal(struct ArrowSchemaView* schema_vi char* out, int64_t n) { const char* type_string = ArrowTypeString(schema_view->type); switch (schema_view->type) { + case NANOARROW_TYPE_DECIMAL32: + case NANOARROW_TYPE_DECIMAL64: case NANOARROW_TYPE_DECIMAL128: case NANOARROW_TYPE_DECIMAL256: return snprintf(out, n, "%s(%" PRId32 ", %" PRId32 ")", type_string, diff --git a/src/nanoarrow/common/schema_test.cc b/src/nanoarrow/common/schema_test.cc index f05f5e08f..d620a4742 100644 --- a/src/nanoarrow/common/schema_test.cc +++ b/src/nanoarrow/common/schema_test.cc @@ -248,6 +248,30 @@ TEST(SchemaTest, SchemaInitDecimal) { arrow_type = ImportType(&schema); ARROW_EXPECT_OK(arrow_type); EXPECT_TRUE(arrow_type.ValueUnsafe()->Equals(decimal256(3, 4))); + + ArrowSchemaInit(&schema); + EXPECT_EQ(ArrowSchemaSetTypeDecimal(&schema, NANOARROW_TYPE_DECIMAL32, 3, 4), + NANOARROW_OK); + EXPECT_STREQ(schema.format, "d:3,4,32"); +#if ARROW_MAJOR_VERSION >= 18 + arrow_type = ImportType(&schema); + ARROW_EXPECT_OK(arrow_type); + EXPECT_TRUE(arrow_type.ValueUnsafe()->Equals(decimal32(3, 4))); +#else + ArrowSchemaRelease(&schema); +#endif + + ArrowSchemaInit(&schema); + EXPECT_EQ(ArrowSchemaSetTypeDecimal(&schema, NANOARROW_TYPE_DECIMAL64, 3, 4), + NANOARROW_OK); + EXPECT_STREQ(schema.format, "d:3,4,64"); +#if ARROW_MAJOR_VERSION >= 18 + arrow_type = ImportType(&schema); + ARROW_EXPECT_OK(arrow_type); + EXPECT_TRUE(arrow_type.ValueUnsafe()->Equals(decimal64(3, 4))); +#else + ArrowSchemaRelease(&schema); +#endif #else ArrowSchemaRelease(&schema); #endif @@ -785,6 +809,46 @@ TEST(SchemaViewTest, SchemaViewInitDecimal) { struct ArrowSchemaView schema_view; struct ArrowError error; +#if ARROW_MAJOR_VERSION >= 18 + ARROW_EXPECT_OK(ExportType(*decimal32(5, 6), &schema)); + EXPECT_EQ(ArrowSchemaViewInit(&schema_view, &schema, &error), NANOARROW_OK); + EXPECT_EQ(schema_view.type, NANOARROW_TYPE_DECIMAL32); + EXPECT_EQ(schema_view.storage_type, NANOARROW_TYPE_DECIMAL32); + EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY); + EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA); + EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE); + EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL); + EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_DECIMAL32); + EXPECT_EQ(schema_view.layout.buffer_data_type[2], NANOARROW_TYPE_UNINITIALIZED); + EXPECT_EQ(schema_view.layout.element_size_bits[0], 1); + EXPECT_EQ(schema_view.layout.element_size_bits[1], 32); + EXPECT_EQ(schema_view.layout.element_size_bits[2], 0); + EXPECT_EQ(schema_view.decimal_bitwidth, 32); + EXPECT_EQ(schema_view.decimal_precision, 5); + EXPECT_EQ(schema_view.decimal_scale, 6); + EXPECT_EQ(ArrowSchemaToStdString(&schema), "decimal32(5, 6)"); + ArrowSchemaRelease(&schema); + + ARROW_EXPECT_OK(ExportType(*decimal64(5, 6), &schema)); + EXPECT_EQ(ArrowSchemaViewInit(&schema_view, &schema, &error), NANOARROW_OK); + EXPECT_EQ(schema_view.type, NANOARROW_TYPE_DECIMAL64); + EXPECT_EQ(schema_view.storage_type, NANOARROW_TYPE_DECIMAL64); + EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY); + EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA); + EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE); + EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL); + EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_DECIMAL64); + EXPECT_EQ(schema_view.layout.buffer_data_type[2], NANOARROW_TYPE_UNINITIALIZED); + EXPECT_EQ(schema_view.layout.element_size_bits[0], 1); + EXPECT_EQ(schema_view.layout.element_size_bits[1], 64); + EXPECT_EQ(schema_view.layout.element_size_bits[2], 0); + EXPECT_EQ(schema_view.decimal_bitwidth, 64); + EXPECT_EQ(schema_view.decimal_precision, 5); + EXPECT_EQ(schema_view.decimal_scale, 6); + EXPECT_EQ(ArrowSchemaToStdString(&schema), "decimal64(5, 6)"); + ArrowSchemaRelease(&schema); +#endif + ARROW_EXPECT_OK(ExportType(*decimal128(5, 6), &schema)); EXPECT_EQ(ArrowSchemaViewInit(&schema_view, &schema, &error), NANOARROW_OK); EXPECT_EQ(schema_view.type, NANOARROW_TYPE_DECIMAL128); diff --git a/src/nanoarrow/common/utils.c b/src/nanoarrow/common/utils.c index 7be65ea4f..400625f29 100644 --- a/src/nanoarrow/common/utils.c +++ b/src/nanoarrow/common/utils.c @@ -111,6 +111,7 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) { case NANOARROW_TYPE_UINT32: case NANOARROW_TYPE_INT32: case NANOARROW_TYPE_FLOAT: + case NANOARROW_TYPE_DECIMAL32: layout->element_size_bits[1] = 32; break; case NANOARROW_TYPE_INTERVAL_MONTHS: @@ -122,6 +123,7 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) { case NANOARROW_TYPE_INT64: case NANOARROW_TYPE_DOUBLE: case NANOARROW_TYPE_INTERVAL_DAY_TIME: + case NANOARROW_TYPE_DECIMAL64: layout->element_size_bits[1] = 64; break; @@ -326,7 +328,7 @@ ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal, // Use 32-bit words for portability uint32_t words32[8]; - int n_words32 = decimal->n_words * 2; + int n_words32 = decimal->n_words > 0 ? decimal->n_words * 2 : 1; NANOARROW_DCHECK(n_words32 <= 8); memset(words32, 0, sizeof(words32)); @@ -356,11 +358,14 @@ ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal* decimal, // https://github.com/apache/arrow/blob/cd3321b28b0c9703e5d7105d6146c1270bbadd7f/cpp/src/arrow/util/decimal.cc#L365 ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decimal, struct ArrowBuffer* buffer) { - NANOARROW_DCHECK(decimal->n_words == 2 || decimal->n_words == 4); + NANOARROW_DCHECK(decimal->n_words == 0 || decimal->n_words == 1 || + decimal->n_words == 2 || decimal->n_words == 4); int is_negative = ArrowDecimalSign(decimal) < 0; uint64_t words_little_endian[4]; - if (decimal->low_word_index == 0) { + if (decimal->n_words == 0) { + memcpy(words_little_endian, decimal->words, sizeof(uint32_t)); + } else if (decimal->low_word_index == 0) { memcpy(words_little_endian, decimal->words, decimal->n_words * sizeof(uint64_t)); } else { for (int i = 0; i < decimal->n_words; i++) { @@ -370,21 +375,33 @@ ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal* decim // We've already made a copy, so negate that if needed if (is_negative) { - uint64_t carry = 1; - for (int i = 0; i < decimal->n_words; i++) { - uint64_t elem = words_little_endian[i]; - elem = ~elem + carry; - carry &= (elem == 0); - words_little_endian[i] = elem; + if (decimal->n_words == 0) { + uint32_t elem = (uint32_t)words_little_endian[0]; + elem = ~elem + 1; + words_little_endian[0] = (int32_t)elem; + } else { + uint64_t carry = 1; + for (int i = 0; i < decimal->n_words; i++) { + uint64_t elem = words_little_endian[i]; + elem = ~elem + carry; + carry &= (elem == 0); + words_little_endian[i] = elem; + } } } // Find the most significant word that is non-zero int most_significant_elem_idx = -1; - for (int i = decimal->n_words - 1; i >= 0; i--) { - if (words_little_endian[i] != 0) { - most_significant_elem_idx = i; - break; + if (decimal->n_words == 0) { + if (words_little_endian[0] != 0) { + most_significant_elem_idx = 0; + } + } else { + for (int i = decimal->n_words - 1; i >= 0; i--) { + if (words_little_endian[i] != 0) { + most_significant_elem_idx = i; + break; + } } } diff --git a/src/nanoarrow/common/utils_test.cc b/src/nanoarrow/common/utils_test.cc index be7ff7582..7c86ca51f 100644 --- a/src/nanoarrow/common/utils_test.cc +++ b/src/nanoarrow/common/utils_test.cc @@ -256,6 +256,86 @@ TEST(AllocatorTest, AllocatorTestMemoryPool) { #endif } +TEST(DecimalTest, Decimal32Test) { + struct ArrowDecimal decimal; + ArrowDecimalInit(&decimal, 32, 8, 3); + + EXPECT_EQ(decimal.n_words, 0); + EXPECT_EQ(decimal.precision, 8); + EXPECT_EQ(decimal.scale, 3); + +#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18 + auto dec_pos = *Decimal32::FromString("12.345"); + uint8_t bytes_pos[4]; + dec_pos.ToBytes(bytes_pos); + + auto dec_neg = *Decimal32::FromString("-34.567"); + uint8_t bytes_neg[4]; + dec_neg.ToBytes(bytes_neg); +#endif + + ArrowDecimalSetInt(&decimal, 12345); + EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), 12345); + EXPECT_EQ(ArrowDecimalSign(&decimal), 1); +#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18 + EXPECT_EQ(memcmp(decimal.words, bytes_pos, sizeof(bytes_pos)), 0); + ArrowDecimalSetBytes(&decimal, bytes_pos); + EXPECT_EQ(memcmp(decimal.words, bytes_pos, sizeof(bytes_pos)), 0); +#endif + + ArrowDecimalSetInt(&decimal, -34567); + EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), -34567); + EXPECT_EQ(ArrowDecimalSign(&decimal), -1); +#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18 + EXPECT_EQ(memcmp(decimal.words, bytes_neg, sizeof(bytes_neg)), 0); + ArrowDecimalSetBytes(&decimal, bytes_neg); + EXPECT_EQ(memcmp(decimal.words, bytes_neg, sizeof(bytes_neg)), 0); +#endif +} + +TEST(DecimalTest, Decimal64Test) { + struct ArrowDecimal decimal; + ArrowDecimalInit(&decimal, 64, 10, 3); + + EXPECT_EQ(decimal.n_words, 1); + EXPECT_EQ(decimal.precision, 10); + EXPECT_EQ(decimal.scale, 3); + + if (_ArrowIsLittleEndian()) { + EXPECT_EQ(decimal.high_word_index - decimal.low_word_index + 1, decimal.n_words); + } else { + EXPECT_EQ(decimal.low_word_index - decimal.high_word_index + 1, decimal.n_words); + } + +#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18 + auto dec_pos = *Decimal64::FromString("12.345"); + uint8_t bytes_pos[8]; + dec_pos.ToBytes(bytes_pos); + + auto dec_neg = *Decimal64::FromString("-34.567"); + uint8_t bytes_neg[8]; + dec_neg.ToBytes(bytes_neg); +#endif + + ArrowDecimalSetInt(&decimal, 12345); + EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), 12345); + EXPECT_EQ(ArrowDecimalSign(&decimal), 1); +#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18 + EXPECT_EQ(memcmp(decimal.words, bytes_pos, sizeof(bytes_pos)), 0); + ArrowDecimalSetBytes(&decimal, bytes_pos); + EXPECT_EQ(memcmp(decimal.words, bytes_pos, sizeof(bytes_pos)), 0); +#endif + + ArrowDecimalSetInt(&decimal, -34567); + EXPECT_EQ(ArrowDecimalGetIntUnsafe(&decimal), -34567); + EXPECT_EQ(ArrowDecimalSign(&decimal), -1); +#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) && ARROW_VERSION_MAJOR >= 18 + EXPECT_EQ(memcmp(decimal.words, bytes_neg, sizeof(bytes_neg)), 0); + ArrowDecimalSetBytes(&decimal, bytes_neg); + EXPECT_EQ(memcmp(decimal.words, bytes_neg, sizeof(bytes_neg)), 0); +#endif +} + TEST(DecimalTest, Decimal128Test) { struct ArrowDecimal decimal; ArrowDecimalInit(&decimal, 128, 10, 3); @@ -302,8 +382,12 @@ TEST(DecimalTest, DecimalNegateTest) { struct ArrowBuffer buffer; ArrowBufferInit(&buffer); - for (auto bitwidth : {128, 256}) { - ArrowDecimalInit(&decimal, bitwidth, 39, 0); + for (auto bitwidth : {32, 64, 128, 256}) { + if (bitwidth > 64) { + ArrowDecimalInit(&decimal, bitwidth, 39, 0); + } else { + ArrowDecimalInit(&decimal, bitwidth, 8, 3); + } // Check with a value whose value is contained entirely in the least significant digit ArrowDecimalSetInt(&decimal, 12345); @@ -314,25 +398,41 @@ TEST(DecimalTest, DecimalNegateTest) { // Check with a value whose negative value will carry into a more significant digit memset(decimal.words, 0, sizeof(decimal.words)); - decimal.words[decimal.low_word_index] = std::numeric_limits::max(); + if (bitwidth > 64) { + decimal.words[decimal.low_word_index] = std::numeric_limits::max(); + } else if (bitwidth == 64) { + decimal.words[decimal.low_word_index] = std::numeric_limits::max(); + } else { + decimal.words[decimal.low_word_index] = std::numeric_limits::max(); + } ASSERT_EQ(ArrowDecimalSign(&decimal), 1); ArrowDecimalNegate(&decimal); ASSERT_EQ(ArrowDecimalSign(&decimal), -1); ArrowDecimalNegate(&decimal); ASSERT_EQ(ArrowDecimalSign(&decimal), 1); - EXPECT_EQ(decimal.words[decimal.low_word_index], - std::numeric_limits::max()); + if (bitwidth > 64) { + EXPECT_EQ(decimal.words[decimal.low_word_index], + std::numeric_limits::max()); + } else if (bitwidth == 64) { + EXPECT_EQ(decimal.words[decimal.low_word_index], + std::numeric_limits::max()); + } else { + EXPECT_EQ(decimal.words[decimal.low_word_index], + std::numeric_limits::max()); + } - // Check with a large value that fits in the 128 bit size - ASSERT_EQ( - ArrowDecimalSetDigits(&decimal, "123456789012345678901234567890123456789"_asv), - NANOARROW_OK); - ArrowDecimalNegate(&decimal); + if (bitwidth > 64) { + // Check with a large value that fits in the 128 bit size + ASSERT_EQ( + ArrowDecimalSetDigits(&decimal, "123456789012345678901234567890123456789"_asv), + NANOARROW_OK); + ArrowDecimalNegate(&decimal); - buffer.size_bytes = 0; - ASSERT_EQ(ArrowDecimalAppendDigitsToBuffer(&decimal, &buffer), NANOARROW_OK); - EXPECT_EQ(std::string(reinterpret_cast(buffer.data), buffer.size_bytes), - "-123456789012345678901234567890123456789"); + buffer.size_bytes = 0; + ASSERT_EQ(ArrowDecimalAppendDigitsToBuffer(&decimal, &buffer), NANOARROW_OK); + EXPECT_EQ(std::string(reinterpret_cast(buffer.data), buffer.size_bytes), + "-123456789012345678901234567890123456789"); + } } // Check with a large value that only fits in the 256 bit range diff --git a/src/nanoarrow/ipc/decoder.c b/src/nanoarrow/ipc/decoder.c index d2bad28ad..b74b4e0e6 100644 --- a/src/nanoarrow/ipc/decoder.c +++ b/src/nanoarrow/ipc/decoder.c @@ -429,6 +429,14 @@ static int ArrowIpcDecoderSetTypeDecimal(struct ArrowSchema* schema, int result; switch (bitwidth) { + case 32: + result = + ArrowSchemaSetTypeDecimal(schema, NANOARROW_TYPE_DECIMAL32, precision, scale); + break; + case 64: + result = + ArrowSchemaSetTypeDecimal(schema, NANOARROW_TYPE_DECIMAL64, precision, scale); + break; case 128: result = ArrowSchemaSetTypeDecimal(schema, NANOARROW_TYPE_DECIMAL128, precision, scale); @@ -1538,13 +1546,21 @@ static int ArrowIpcDecoderSwapEndian(struct ArrowIpcBufferSource* src, } switch (src->data_type) { + case NANOARROW_TYPE_DECIMAL32: { + uint32_t* ptr = (uint32_t*)dst->data; + for (int64_t i = 0; i < (dst->size_bytes / 4); i++) { + ptr[i] = bswap32(out_view->data.as_uint32[i]); + } + break; + } + case NANOARROW_TYPE_DECIMAL64: case NANOARROW_TYPE_DECIMAL128: case NANOARROW_TYPE_DECIMAL256: { const uint64_t* ptr_src = out_view->data.as_uint64; uint64_t* ptr_dst = (uint64_t*)dst->data; uint64_t words[4]; int n_words = (int)(src->element_size_bits / 64); - NANOARROW_DCHECK(n_words == 2 || n_words == 4); + NANOARROW_DCHECK(n_words == 1 || n_words == 2 || n_words == 4); for (int64_t i = 0; i < (dst->size_bytes / n_words / 8); i++) { for (int j = 0; j < n_words; j++) { diff --git a/src/nanoarrow/ipc/encoder.c b/src/nanoarrow/ipc/encoder.c index 6e920cddd..d58761432 100644 --- a/src/nanoarrow/ipc/encoder.c +++ b/src/nanoarrow/ipc/encoder.c @@ -184,6 +184,8 @@ static ArrowErrorCode ArrowIpcEncodeFieldType(flatcc_builder_t* builder, Field_type_FloatingPoint_create(builder, ns(Precision_DOUBLE)), error); return NANOARROW_OK; + case NANOARROW_TYPE_DECIMAL32: + case NANOARROW_TYPE_DECIMAL64: case NANOARROW_TYPE_DECIMAL128: case NANOARROW_TYPE_DECIMAL256: FLATCC_RETURN_UNLESS_0( diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h index 312ecbecd..3ca294f06 100644 --- a/src/nanoarrow/nanoarrow.h +++ b/src/nanoarrow/nanoarrow.h @@ -369,8 +369,9 @@ ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema* schema, /// \brief Set the format field of a decimal schema /// /// Returns EINVAL for scale <= 0 or for type that is not -/// NANOARROW_TYPE_DECIMAL128 or NANOARROW_TYPE_DECIMAL256. Schema must have been -/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +/// NANOARROW_TYPE_DECIMAL32, NANOARROW_TYPE_DECIMAL64, NANOARROW_TYPE_DECIMAL128 or +/// NANOARROW_TYPE_DECIMAL256. Schema must have been initialized using +/// ArrowSchemaInit() or ArrowSchemaDeepCopy(). ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowType type, int32_t decimal_precision, int32_t decimal_scale);