From bd49dedde60cd7f2dc1abe19ff5adce4de19a9a7 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 31 Jan 2025 16:17:58 -0500 Subject: [PATCH] feat: Implement LIST_VIEW and LARGE_LIST_VIEW support --- src/nanoarrow/common/array.c | 76 +++++++++++++-- src/nanoarrow/common/array_test.cc | 145 +++++++++++++++++++++++++++- src/nanoarrow/common/inline_array.h | 66 ++++++++++--- src/nanoarrow/common/inline_types.h | 11 ++- src/nanoarrow/common/schema.c | 28 ++++++ src/nanoarrow/common/utils.c | 18 ++++ src/nanoarrow/testing/testing.cc | 11 +++ 7 files changed, 331 insertions(+), 24 deletions(-) diff --git a/src/nanoarrow/common/array.c b/src/nanoarrow/common/array.c index 53cd4c659..17b5e75e6 100644 --- a/src/nanoarrow/common/array.c +++ b/src/nanoarrow/common/array.c @@ -123,6 +123,8 @@ static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, case NANOARROW_TYPE_LARGE_STRING: case NANOARROW_TYPE_BINARY: case NANOARROW_TYPE_LARGE_BINARY: + case NANOARROW_TYPE_LIST_VIEW: + case NANOARROW_TYPE_LARGE_LIST_VIEW: array->n_buffers = 3; break; @@ -687,6 +689,9 @@ void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) case NANOARROW_BUFFER_TYPE_VALIDITY: array_view->buffer_views[i].size_bytes = _ArrowBytesForBits(length); continue; + case NANOARROW_BUFFER_TYPE_SIZE: + array_view->buffer_views[i].size_bytes = element_size_bytes * length; + continue; case NANOARROW_BUFFER_TYPE_DATA_OFFSET: // Probably don't want/need to rely on the producer to have allocated an // offsets buffer of length 1 for a zero-size array @@ -856,11 +861,20 @@ static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view, min_buffer_size_bytes = _ArrowBytesForBits(offset_plus_length); break; + case NANOARROW_BUFFER_TYPE_SIZE: + min_buffer_size_bytes = element_size_bytes * offset_plus_length; + break; case NANOARROW_BUFFER_TYPE_DATA_OFFSET: - // Probably don't want/need to rely on the producer to have allocated an - // offsets buffer of length 1 for a zero-size array - min_buffer_size_bytes = - (offset_plus_length != 0) * element_size_bytes * (offset_plus_length + 1); + if (array_view->storage_type == NANOARROW_TYPE_LIST_VIEW || + array_view->storage_type == NANOARROW_TYPE_LARGE_LIST_VIEW) { + min_buffer_size_bytes = + (offset_plus_length != 0) * element_size_bytes * offset_plus_length; + } else { + // Probably don't want/need to rely on the producer to have allocated an + // offsets buffer of length 1 for a zero-size array + min_buffer_size_bytes = + (offset_plus_length != 0) * element_size_bytes * (offset_plus_length + 1); + } break; case NANOARROW_BUFFER_TYPE_DATA: min_buffer_size_bytes = @@ -898,6 +912,8 @@ static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view, case NANOARROW_TYPE_LARGE_LIST: case NANOARROW_TYPE_FIXED_SIZE_LIST: case NANOARROW_TYPE_MAP: + case NANOARROW_TYPE_LIST_VIEW: + case NANOARROW_TYPE_LARGE_LIST_VIEW: if (array_view->n_children != 1) { ArrowErrorSet(error, "Expected 1 child of %s array but found %" PRId64 " child arrays", @@ -1132,6 +1148,10 @@ static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view, case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_MAP: + case NANOARROW_TYPE_LIST_VIEW: { + const size_t idx = array_view->storage_type == NANOARROW_TYPE_LIST_VIEW + ? offset_plus_length - 1 + : offset_plus_length; if (array_view->buffer_views[1].size_bytes != 0) { first_offset = array_view->buffer_views[1].data.as_int32[array_view->offset]; if (first_offset < 0) { @@ -1140,7 +1160,7 @@ static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view, return EINVAL; } - last_offset = array_view->buffer_views[1].data.as_int32[offset_plus_length]; + last_offset = array_view->buffer_views[1].data.as_int32[idx]; if (last_offset < 0) { ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64, last_offset); @@ -1157,9 +1177,31 @@ static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view, return EINVAL; } } + + if ((array_view->storage_type == NANOARROW_TYPE_LIST_VIEW) && + (array_view->buffer_views[2].size_bytes != 0)) { + const int64_t first_size = + array_view->buffer_views[2].data.as_int32[array_view->offset]; + if (first_size < 0) { + ArrowErrorSet(error, "Expected first size >= 0 but found %" PRId64, first_size); + return EINVAL; + } + + const int64_t last_size = + array_view->buffer_views[2].data.as_int32[offset_plus_length - 1]; + if (last_size < 0) { + ArrowErrorSet(error, "Expected last size >= 0 but found %" PRId64, last_size); + return EINVAL; + } + } break; + } case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_LARGE_LIST_VIEW: { + const size_t idx = array_view->storage_type == NANOARROW_TYPE_LARGE_LIST_VIEW + ? offset_plus_length - 1 + : offset_plus_length; if (array_view->buffer_views[1].size_bytes != 0) { first_offset = array_view->buffer_views[1].data.as_int64[array_view->offset]; if (first_offset < 0) { @@ -1168,7 +1210,7 @@ static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view, return EINVAL; } - last_offset = array_view->buffer_views[1].data.as_int64[offset_plus_length]; + last_offset = array_view->buffer_views[1].data.as_int64[idx]; if (last_offset < 0) { ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64, last_offset); @@ -1177,14 +1219,32 @@ static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view, if (array_view->children[0]->length < last_offset) { ArrowErrorSet(error, - "Expected child of large list array to have length >= %" PRId64 + "Expected child of %s array to have length >= %" PRId64 " but found array " "with length %" PRId64, - last_offset, array_view->children[0]->length); + ArrowTypeString(array_view->storage_type), last_offset, + array_view->children[0]->length); + return EINVAL; + } + } + + if ((array_view->storage_type == NANOARROW_TYPE_LARGE_LIST_VIEW) && + (array_view->buffer_views[2].size_bytes != 0)) { + const int64_t first_size = + array_view->buffer_views[2].data.as_int64[array_view->offset]; + if (first_size < 0) { + ArrowErrorSet(error, "Expected first size >= 0 but found %" PRId64, first_size); + return EINVAL; + } + + const int64_t last_size = array_view->buffer_views[2].data.as_int64[idx]; + if (last_size < 0) { + ArrowErrorSet(error, "Expected last size >= 0 but found %" PRId64, last_size); return EINVAL; } } break; + } case NANOARROW_TYPE_RUN_END_ENCODED: { struct ArrowArrayView* run_ends_view = array_view->children[0]; diff --git a/src/nanoarrow/common/array_test.cc b/src/nanoarrow/common/array_test.cc index dddc779d8..25d68a7de 100644 --- a/src/nanoarrow/common/array_test.cc +++ b/src/nanoarrow/common/array_test.cc @@ -1525,7 +1525,7 @@ TEST(ArrayTest, ArrayTestAppendToLargeListArray) { EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL); EXPECT_STREQ( ArrowErrorMessage(&error), - "Expected child of large list array to have length >= 3 but found array with " + "Expected child of large_list array to have length >= 3 but found array with " "length 2"); array.children[0]->length = array.children[0]->length + 1; @@ -1555,6 +1555,149 @@ TEST(ArrayTest, ArrayTestAppendToLargeListArray) { #endif } +TEST(ArrayTest, ArrayTestAppendToListViewArray) { + struct ArrowArray array; + struct ArrowSchema schema; + struct ArrowError error; + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_LIST_VIEW), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_INT64), NANOARROW_OK); + ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK); + + ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK); + + // Check that we can reserve recursively without erroring + ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK); + EXPECT_EQ(ArrowArrayBuffer(array.children[0], 1)->capacity_bytes, 0); + + ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 123), NANOARROW_OK); + EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK); + + ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK); + + ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 456), NANOARROW_OK); + ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 789), NANOARROW_OK); + EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK); + + EXPECT_EQ(ArrowArrayAppendEmpty(&array, 1), NANOARROW_OK); + + // Make sure number of children is checked at finish + array.n_children = 0; + EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL); + EXPECT_STREQ(ArrowErrorMessage(&error), + "Expected 1 child of list_view array but found 0 child arrays"); + array.n_children = 1; + + // Make sure final child size is checked at finish + // TODO: this may be an expensive check with LIST_VIEW types + /* + array.children[0]->length = array.children[0]->length - 1; + EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL); + EXPECT_STREQ( + ArrowErrorMessage(&error), + "Expected child of list_view array to have length >= 3 but found array with " + "length 2"); + + array.children[0]->length = array.children[0]->length + 1; + */ + EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK); + +#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) + auto arrow_array = ImportArray(&array, &schema); + ARROW_EXPECT_OK(arrow_array); + + constexpr size_t nelems = 4; + const std::array offsets = {0, 0, 1, 0}; + const std::array sizes = {1, 0, 2, 0}; + const std::array valid_bytes = {1, 0, 1, 1}; + + auto child_builder = std::make_shared(); + auto builder = + ListViewBuilder(default_memory_pool(), child_builder, list_view(int64())); + ARROW_EXPECT_OK( + builder.AppendValues(offsets.data(), sizes.data(), nelems, valid_bytes.data())); + auto expected_array = builder.Finish(); + ARROW_EXPECT_OK(expected_array); + + EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe())); +#else + ArrowSchemaRelease(&schema); + ArrowArrayRelease(&array); +#endif +} + +TEST(ArrayTest, ArrayTestAppendToLargeListViewArray) { + struct ArrowArray array; + struct ArrowSchema schema; + struct ArrowError error; + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_LARGE_LIST_VIEW), + NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_INT64), NANOARROW_OK); + ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK); + + ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK); + + // Check that we can reserve recursively without erroring + ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK); + EXPECT_EQ(ArrowArrayBuffer(array.children[0], 1)->capacity_bytes, 0); + + ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 123), NANOARROW_OK); + EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK); + + ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK); + + ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 456), NANOARROW_OK); + ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 789), NANOARROW_OK); + EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK); + + EXPECT_EQ(ArrowArrayAppendEmpty(&array, 1), NANOARROW_OK); + + // Make sure number of children is checked at finish + array.n_children = 0; + EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL); + EXPECT_STREQ(ArrowErrorMessage(&error), + "Expected 1 child of large_list_view array but found 0 child arrays"); + array.n_children = 1; + + // Make sure final child size is checked at finish + // TODO: this may be an expensive check with LIST_VIEW types + /* + array.children[0]->length = array.children[0]->length - 1; + EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL); + EXPECT_STREQ( + ArrowErrorMessage(&error), + "Expected child of list_view array to have length >= 3 but found array with " + "length 2"); + + array.children[0]->length = array.children[0]->length + 1; + */ + EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK); + +#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW) + auto arrow_array = ImportArray(&array, &schema); + ARROW_EXPECT_OK(arrow_array); + + constexpr size_t nelems = 4; + const std::array offsets = {0, 0, 1, 0}; + const std::array sizes = {1, 0, 2, 0}; + const std::array valid_bytes = {1, 0, 1, 1}; + + auto child_builder = std::make_shared(); + auto builder = + LargeListViewBuilder(default_memory_pool(), child_builder, list_view(int64())); + ARROW_EXPECT_OK( + builder.AppendValues(offsets.data(), sizes.data(), nelems, valid_bytes.data())); + auto expected_array = builder.Finish(); + ARROW_EXPECT_OK(expected_array); + + EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe())); +#else + ArrowSchemaRelease(&schema); + ArrowArrayRelease(&array); +#endif +} + TEST(ArrayTest, ArrayTestAppendToMapArray) { struct ArrowArray array; struct ArrowSchema schema; diff --git a/src/nanoarrow/common/inline_array.h b/src/nanoarrow/common/inline_array.h index 9fe5e0b70..902126e80 100644 --- a/src/nanoarrow/common/inline_array.h +++ b/src/nanoarrow/common/inline_array.h @@ -143,11 +143,14 @@ static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array) // Initialize any data offset buffer with a single zero for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { + const int is_list_view = + (private_data->storage_type == NANOARROW_TYPE_LIST_VIEW) || + (private_data->storage_type == NANOARROW_TYPE_LARGE_LIST_VIEW); if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET && - private_data->layout.element_size_bits[i] == 64) { + !is_list_view && private_data->layout.element_size_bits[i] == 64) { NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, i), 0)); } else if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET && - private_data->layout.element_size_bits[i] == 32) { + !is_list_view && private_data->layout.element_size_bits[i] == 32) { NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, i), 0)); } } @@ -290,18 +293,27 @@ static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal(struct ArrowArray* a case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE: case NANOARROW_BUFFER_TYPE_VALIDITY: continue; - case NANOARROW_BUFFER_TYPE_DATA_OFFSET: - // Append the current value at the end of the offset buffer for each element - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n)); - - for (int64_t j = 0; j < n; j++) { - ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes * (array->length + j), - size_bytes); + case NANOARROW_BUFFER_TYPE_SIZE: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n)); + continue; + case NANOARROW_BUFFER_TYPE_DATA_OFFSET: { + if (private_data->storage_type == NANOARROW_TYPE_LIST_VIEW || + private_data->storage_type == NANOARROW_TYPE_LARGE_LIST_VIEW) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n)); + } else { + // Append the current value at the end of the offset buffer for each element + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n)); + + for (int64_t j = 0; j < n; j++) { + ArrowBufferAppendUnsafe( + buffer, buffer->data + size_bytes * (array->length + j), size_bytes); + } + // Skip the data buffer + i++; } - // Skip the data buffer - i++; continue; + } case NANOARROW_BUFFER_TYPE_DATA: // Zero out the next bit of memory if (private_data->layout.element_size_bits[i] % 8 == 0) { @@ -310,7 +322,6 @@ static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal(struct ArrowArray* a NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, i, 0, n)); } continue; - case NANOARROW_BUFFER_TYPE_TYPE_ID: case NANOARROW_BUFFER_TYPE_UNION_OFFSET: // These cases return above @@ -752,19 +763,46 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) { switch (private_data->storage_type) { case NANOARROW_TYPE_LIST: - case NANOARROW_TYPE_MAP: + case NANOARROW_TYPE_LIST_VIEW: + case NANOARROW_TYPE_MAP: { child_length = array->children[0]->length; if (child_length > INT32_MAX) { return EOVERFLOW; } NANOARROW_RETURN_NOT_OK( ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), (int32_t)child_length)); + + if (private_data->storage_type == NANOARROW_TYPE_LIST_VIEW) { + struct ArrowBufferView buf_view; + buf_view.data.data = ArrowArrayBuffer(array, 1)->data; + const int64_t array_len = array->length; + if (array_len > INT32_MAX) { + return EOVERFLOW; + } + const int32_t prev_offset = + array_len > 0 ? buf_view.data.as_int32[array_len - 1] : 0; + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( + ArrowArrayBuffer(array, 2), (int32_t)child_length - prev_offset)); + } break; + } case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_LARGE_LIST_VIEW: { child_length = array->children[0]->length; NANOARROW_RETURN_NOT_OK( ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1), child_length)); + if (private_data->storage_type == NANOARROW_TYPE_LARGE_LIST_VIEW) { + struct ArrowBufferView buf_view; + buf_view.data.data = ArrowArrayBuffer(array, 1)->data; + const int64_t array_len = array->length; + const int64_t prev_offset = + array_len > 0 ? buf_view.data.as_int64[array_len - 1] : 0; + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, 2), + child_length - prev_offset)); + } + break; + } case NANOARROW_TYPE_FIXED_SIZE_LIST: child_length = array->children[0]->length; if (child_length != @@ -1046,8 +1084,10 @@ static inline int64_t ArrowArrayViewListChildOffset( const struct ArrowArrayView* array_view, int64_t i) { switch (array_view->storage_type) { case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_LIST_VIEW: return array_view->buffer_views[1].data.as_int32[i]; case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_LARGE_LIST_VIEW: return array_view->buffer_views[1].data.as_int64[i]; default: return -1; diff --git a/src/nanoarrow/common/inline_types.h b/src/nanoarrow/common/inline_types.h index d4fdfba97..1c2047580 100644 --- a/src/nanoarrow/common/inline_types.h +++ b/src/nanoarrow/common/inline_types.h @@ -455,7 +455,9 @@ enum ArrowType { NANOARROW_TYPE_BINARY_VIEW, NANOARROW_TYPE_STRING_VIEW, NANOARROW_TYPE_DECIMAL32, - NANOARROW_TYPE_DECIMAL64 + NANOARROW_TYPE_DECIMAL64, + NANOARROW_TYPE_LIST_VIEW, + NANOARROW_TYPE_LARGE_LIST_VIEW, }; /// \brief Get a string value of an enum ArrowType value @@ -552,6 +554,10 @@ static inline const char* ArrowTypeString(enum ArrowType type) { return "binary_view"; case NANOARROW_TYPE_STRING_VIEW: return "string_view"; + case NANOARROW_TYPE_LIST_VIEW: + return "list_view"; + case NANOARROW_TYPE_LARGE_LIST_VIEW: + return "large_list_view"; default: return NULL; } @@ -630,7 +636,8 @@ enum ArrowBufferType { NANOARROW_BUFFER_TYPE_DATA_OFFSET, NANOARROW_BUFFER_TYPE_DATA, NANOARROW_BUFFER_TYPE_VARIADIC_DATA, - NANOARROW_BUFFER_TYPE_VARIADIC_SIZE + NANOARROW_BUFFER_TYPE_VARIADIC_SIZE, + NANOARROW_BUFFER_TYPE_SIZE, }; /// \brief The maximum number of fixed buffers in an ArrowArrayView or ArrowLayout diff --git a/src/nanoarrow/common/schema.c b/src/nanoarrow/common/schema.c index b0e538c08..881ba41f7 100644 --- a/src/nanoarrow/common/schema.c +++ b/src/nanoarrow/common/schema.c @@ -125,6 +125,10 @@ static const char* ArrowSchemaFormatTemplate(enum ArrowType type) { return "+l"; case NANOARROW_TYPE_LARGE_LIST: return "+L"; + case NANOARROW_TYPE_LIST_VIEW: + return "+vl"; + case NANOARROW_TYPE_LARGE_LIST_VIEW: + return "+vL"; case NANOARROW_TYPE_STRUCT: return "+s"; case NANOARROW_TYPE_MAP: @@ -143,6 +147,8 @@ static int ArrowSchemaInitChildrenIfNeeded(struct ArrowSchema* schema, case NANOARROW_TYPE_LIST: case NANOARROW_TYPE_LARGE_LIST: case NANOARROW_TYPE_FIXED_SIZE_LIST: + case NANOARROW_TYPE_LIST_VIEW: + case NANOARROW_TYPE_LARGE_LIST_VIEW: NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 1)); ArrowSchemaInit(schema->children[0]); NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], "item")); @@ -871,6 +877,26 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view, return EINVAL; } + // views + case 'v': + switch (format[2]) { + case 'l': { + schema_view->storage_type = NANOARROW_TYPE_LIST_VIEW; + schema_view->type = NANOARROW_TYPE_LIST_VIEW; + *format_end_out = format + 3; + return NANOARROW_OK; + } + case 'L': { + schema_view->storage_type = NANOARROW_TYPE_LARGE_LIST_VIEW; + schema_view->type = NANOARROW_TYPE_LARGE_LIST_VIEW; + *format_end_out = format + 3; + return NANOARROW_OK; + } + default: + ArrowErrorSet( + error, "Expected view format string +vl or +vL but found '%s'", format); + return EINVAL; + } default: ArrowErrorSet(error, "Expected nested type format string but found '%s'", format); @@ -1201,7 +1227,9 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_vie return ArrowSchemaViewValidateNChildren(schema_view, 0, error); case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_LIST_VIEW: case NANOARROW_TYPE_LARGE_LIST: + case NANOARROW_TYPE_LARGE_LIST_VIEW: case NANOARROW_TYPE_FIXED_SIZE_LIST: return ArrowSchemaViewValidateNChildren(schema_view, 1, error); diff --git a/src/nanoarrow/common/utils.c b/src/nanoarrow/common/utils.c index 400625f29..eca878d3a 100644 --- a/src/nanoarrow/common/utils.c +++ b/src/nanoarrow/common/utils.c @@ -190,6 +190,24 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) { layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA; layout->buffer_data_type[1] = NANOARROW_TYPE_STRING_VIEW; layout->element_size_bits[1] = 128; + break; + + case NANOARROW_TYPE_LIST_VIEW: + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; + layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; + layout->element_size_bits[1] = 32; + layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_SIZE; + layout->buffer_data_type[2] = NANOARROW_TYPE_INT32; + layout->element_size_bits[2] = 32; + break; + case NANOARROW_TYPE_LARGE_LIST_VIEW: + layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; + layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; + layout->element_size_bits[1] = 64; + layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_SIZE; + layout->buffer_data_type[2] = NANOARROW_TYPE_INT64; + layout->element_size_bits[2] = 64; + break; default: break; diff --git a/src/nanoarrow/testing/testing.cc b/src/nanoarrow/testing/testing.cc index aba4b11f5..bb5305d32 100644 --- a/src/nanoarrow/testing/testing.cc +++ b/src/nanoarrow/testing/testing.cc @@ -1969,6 +1969,17 @@ ArrowErrorCode SetArrayColumnBuffers(const json& value, ArrowArrayView* array_vi } break; } + case NANOARROW_BUFFER_TYPE_SIZE: { + NANOARROW_RETURN_NOT_OK(Check(value.contains("SIZE"), error, "missing key 'SIZE'")); + const auto& offset = value["SIZE"]; + + if (array_view->layout.element_size_bits[buffer_i] == 32) { + NANOARROW_RETURN_NOT_OK(SetBufferInt(offset, buffer, error)); + } else { + NANOARROW_RETURN_NOT_OK(SetBufferInt(offset, buffer, error)); + } + break; + } case NANOARROW_BUFFER_TYPE_DATA: { NANOARROW_RETURN_NOT_OK(Check(value.contains("DATA"), error, "missing key 'DATA'"));