Skip to content

Commit b48bcda

Browse files
authored
fix!: Change ArrowBufferResize() and ArrowBitmapResize() to update size_bytes (#459)
Currently, `ArrowBufferResize()` and `ArrowBitmapResize()` update the *capacity* of the underlying buffer but do not update the *size*. This was documented and is consistent with Arrow C++'s `BufferBuilder` but inconsistent with `std::vector`, Arrow C++'s `PoolBuffer`, and user expectations given the name of the function. After this PR, `ArrowBufferResize()` and `ArrowBitmapResize()` update the `size_bytes` of the underlying buffer. This is a breaking change.
1 parent a451a95 commit b48bcda

File tree

3 files changed

+100
-40
lines changed

3 files changed

+100
-40
lines changed

src/nanoarrow/buffer_inline.h

+42-25
Original file line numberDiff line numberDiff line change
@@ -91,29 +91,29 @@ static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer*
9191
}
9292

9393
static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
94-
int64_t new_capacity_bytes,
94+
int64_t new_size_bytes,
9595
char shrink_to_fit) {
96-
if (new_capacity_bytes < 0) {
96+
if (new_size_bytes < 0) {
9797
return EINVAL;
9898
}
9999

100-
if (new_capacity_bytes > buffer->capacity_bytes || shrink_to_fit) {
101-
buffer->data = buffer->allocator.reallocate(
102-
&buffer->allocator, buffer->data, buffer->capacity_bytes, new_capacity_bytes);
103-
if (buffer->data == NULL && new_capacity_bytes > 0) {
100+
int needs_reallocation = new_size_bytes > buffer->capacity_bytes ||
101+
(shrink_to_fit && new_size_bytes < buffer->capacity_bytes);
102+
103+
if (needs_reallocation) {
104+
buffer->data = buffer->allocator.reallocate(&buffer->allocator, buffer->data,
105+
buffer->capacity_bytes, new_size_bytes);
106+
107+
if (buffer->data == NULL && new_size_bytes > 0) {
104108
buffer->capacity_bytes = 0;
105109
buffer->size_bytes = 0;
106110
return ENOMEM;
107111
}
108112

109-
buffer->capacity_bytes = new_capacity_bytes;
110-
}
111-
112-
// Ensures that when shrinking that size <= capacity
113-
if (new_capacity_bytes < buffer->size_bytes) {
114-
buffer->size_bytes = new_capacity_bytes;
113+
buffer->capacity_bytes = new_size_bytes;
115114
}
116115

116+
buffer->size_bytes = new_size_bytes;
117117
return NANOARROW_OK;
118118
}
119119

@@ -124,8 +124,19 @@ static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer,
124124
return NANOARROW_OK;
125125
}
126126

127-
return ArrowBufferResize(
128-
buffer, _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes), 0);
127+
int64_t new_capacity_bytes =
128+
_ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes);
129+
buffer->data = buffer->allocator.reallocate(&buffer->allocator, buffer->data,
130+
buffer->capacity_bytes, new_capacity_bytes);
131+
132+
if (buffer->data == NULL && new_capacity_bytes > 0) {
133+
buffer->capacity_bytes = 0;
134+
buffer->size_bytes = 0;
135+
return ENOMEM;
136+
}
137+
138+
buffer->capacity_bytes = new_capacity_bytes;
139+
return NANOARROW_OK;
129140
}
130141

131142
static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data,
@@ -468,32 +479,38 @@ static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap*
468479
static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
469480
int64_t additional_size_bits) {
470481
int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits;
471-
if (min_capacity_bits <= (bitmap->buffer.capacity_bytes * 8)) {
482+
int64_t min_capacity_bytes = _ArrowBytesForBits(min_capacity_bits);
483+
int64_t current_size_bytes = bitmap->buffer.size_bytes;
484+
int64_t current_capacity_bytes = bitmap->buffer.capacity_bytes;
485+
486+
if (min_capacity_bytes <= current_capacity_bytes) {
472487
return NANOARROW_OK;
473488
}
474489

475-
NANOARROW_RETURN_NOT_OK(
476-
ArrowBufferReserve(&bitmap->buffer, _ArrowBytesForBits(additional_size_bits)));
490+
int64_t additional_capacity_bytes = min_capacity_bytes - current_size_bytes;
491+
NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(&bitmap->buffer, additional_capacity_bytes));
477492

493+
// Zero out the last byte for deterministic output in the common case
494+
// of reserving a known remaining size. We should have returned above
495+
// if there was not at least one additional byte to allocate; however,
496+
// DCHECK() just to be sure.
497+
NANOARROW_DCHECK(bitmap->buffer.capacity_bytes > current_capacity_bytes);
478498
bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0;
479499
return NANOARROW_OK;
480500
}
481501

482502
static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
483-
int64_t new_capacity_bits,
503+
int64_t new_size_bits,
484504
char shrink_to_fit) {
485-
if (new_capacity_bits < 0) {
505+
if (new_size_bits < 0) {
486506
return EINVAL;
487507
}
488508

489-
int64_t new_capacity_bytes = _ArrowBytesForBits(new_capacity_bits);
509+
int64_t new_size_bytes = _ArrowBytesForBits(new_size_bits);
490510
NANOARROW_RETURN_NOT_OK(
491-
ArrowBufferResize(&bitmap->buffer, new_capacity_bytes, shrink_to_fit));
492-
493-
if (new_capacity_bits < bitmap->size_bits) {
494-
bitmap->size_bits = new_capacity_bits;
495-
}
511+
ArrowBufferResize(&bitmap->buffer, new_size_bytes, shrink_to_fit));
496512

513+
bitmap->size_bits = new_size_bits;
497514
return NANOARROW_OK;
498515
}
499516

src/nanoarrow/buffer_test.cc

+50-2
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,22 @@ TEST(BufferTest, BufferTestBasic) {
8484
EXPECT_EQ(buffer.size_bytes, 12);
8585
EXPECT_STREQ(reinterpret_cast<char*>(buffer.data), "12345678901");
8686

87+
// Resize larger without a reallocation
88+
uint8_t* second_data = buffer.data;
89+
EXPECT_EQ(ArrowBufferResize(&buffer, 13, false), NANOARROW_OK);
90+
EXPECT_EQ(buffer.data, second_data);
91+
EXPECT_EQ(buffer.capacity_bytes, 20);
92+
EXPECT_EQ(buffer.size_bytes, 13);
93+
94+
// Resize larger with a reallocation
95+
EXPECT_EQ(ArrowBufferResize(&buffer, 21, false), NANOARROW_OK);
96+
EXPECT_NE(buffer.data, second_data);
97+
EXPECT_EQ(buffer.capacity_bytes, 21);
98+
EXPECT_EQ(buffer.size_bytes, 21);
99+
87100
// Resize smaller without shrinking
88101
EXPECT_EQ(ArrowBufferResize(&buffer, 5, false), NANOARROW_OK);
89-
EXPECT_EQ(buffer.capacity_bytes, 20);
102+
EXPECT_EQ(buffer.capacity_bytes, 21);
90103
EXPECT_EQ(buffer.size_bytes, 5);
91104
EXPECT_EQ(strncmp(reinterpret_cast<char*>(buffer.data), "12345", 5), 0);
92105

@@ -509,13 +522,48 @@ TEST(BitmapTest, BitmapTestMove) {
509522
ArrowBitmapReset(&bitmap2);
510523
}
511524

525+
TEST(BitmapTest, BitmapTestReserve) {
526+
struct ArrowBitmap bitmap;
527+
ArrowBitmapInit(&bitmap);
528+
529+
// Reserve zero bits
530+
ASSERT_EQ(ArrowBitmapReserve(&bitmap, 0), NANOARROW_OK);
531+
EXPECT_EQ(bitmap.buffer.size_bytes, 0);
532+
EXPECT_EQ(bitmap.buffer.capacity_bytes, 0);
533+
EXPECT_EQ(bitmap.size_bits, 0);
534+
535+
// Reserve <=8 bits
536+
ASSERT_EQ(ArrowBitmapReserve(&bitmap, 2), NANOARROW_OK);
537+
EXPECT_EQ(bitmap.buffer.size_bytes, 0);
538+
EXPECT_EQ(bitmap.buffer.capacity_bytes, 1);
539+
EXPECT_EQ(bitmap.size_bits, 0);
540+
541+
// Reserve <=8 bits and ensure last byte is not zeroed
542+
bitmap.buffer.data[0] = 0xff;
543+
ASSERT_EQ(ArrowBitmapReserve(&bitmap, 8), NANOARROW_OK);
544+
EXPECT_EQ(bitmap.buffer.size_bytes, 0);
545+
EXPECT_EQ(bitmap.buffer.capacity_bytes, 1);
546+
EXPECT_EQ(bitmap.size_bits, 0);
547+
EXPECT_EQ(bitmap.buffer.data[0], 0xff);
548+
549+
// Reserve >8 bits and ensure last byte *is* zeroed
550+
ASSERT_EQ(ArrowBitmapReserve(&bitmap, 9), NANOARROW_OK);
551+
EXPECT_EQ(bitmap.buffer.size_bytes, 0);
552+
EXPECT_EQ(bitmap.buffer.capacity_bytes, 2);
553+
EXPECT_EQ(bitmap.size_bits, 0);
554+
EXPECT_EQ(bitmap.buffer.data[0], 0xff);
555+
EXPECT_EQ(bitmap.buffer.data[1], 0x00);
556+
557+
ArrowBitmapReset(&bitmap);
558+
}
559+
512560
TEST(BitmapTest, BitmapTestResize) {
513561
struct ArrowBitmap bitmap;
514562
ArrowBitmapInit(&bitmap);
515563

516564
// Check normal usage, which is resize to the final length
517565
// after appending a bunch of values
518-
ASSERT_EQ(ArrowBitmapResize(&bitmap, 200, false), NANOARROW_OK);
566+
ASSERT_EQ(ArrowBitmapReserve(&bitmap, 200), NANOARROW_OK);
519567
EXPECT_EQ(bitmap.buffer.size_bytes, 0);
520568
EXPECT_EQ(bitmap.buffer.capacity_bytes, 200 / 8);
521569
EXPECT_EQ(bitmap.size_bits, 0);

src/nanoarrow/nanoarrow.h

+8-13
Original file line numberDiff line numberDiff line change
@@ -620,14 +620,12 @@ static inline void ArrowBufferReset(struct ArrowBuffer* buffer);
620620
/// address and resets buffer.
621621
static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer* dst);
622622

623-
/// \brief Grow or shrink a buffer to a given capacity
623+
/// \brief Grow or shrink a buffer to a given size
624624
///
625-
/// When shrinking the capacity of the buffer, the buffer is only reallocated
626-
/// if shrink_to_fit is non-zero. Calling ArrowBufferResize() does not
627-
/// adjust the buffer's size member except to ensure that the invariant
628-
/// capacity >= size remains true.
625+
/// When shrinking the size of the buffer, the buffer is only reallocated
626+
/// if shrink_to_fit is non-zero.
629627
static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
630-
int64_t new_capacity_bytes,
628+
int64_t new_size_bytes,
631629
char shrink_to_fit);
632630

633631
/// \brief Ensure a buffer has at least a given additional capacity
@@ -757,15 +755,12 @@ static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap*
757755
static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
758756
int64_t additional_size_bits);
759757

760-
/// \brief Grow or shrink a bitmap to a given capacity
758+
/// \brief Grow or shrink a bitmap to a given size
761759
///
762-
/// When shrinking the capacity of the bitmap, the bitmap is only reallocated
763-
/// if shrink_to_fit is non-zero. Calling ArrowBitmapResize() does not
764-
/// adjust the buffer's size member except when shrinking new_capacity_bits
765-
/// to a value less than the current number of bits in the bitmap.
760+
/// When shrinking the size of the bitmap, the bitmap is only reallocated
761+
/// if shrink_to_fit is non-zero.
766762
static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
767-
int64_t new_capacity_bits,
768-
char shrink_to_fit);
763+
int64_t new_size_bits, char shrink_to_fit);
769764

770765
/// \brief Reserve space for and append zero or more of the same boolean value to a bitmap
771766
static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,

0 commit comments

Comments
 (0)