Skip to content

Commit c651e84

Browse files
authored
fix: Ensure we don't call cuMemAlloc with 0 bytesize (#534)
According to the CUDA docs for [cuMemAlloc](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gb82d2a09844a58dd9e744dc31e8aa467): > If bytesize is 0, [cuMemAlloc()](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gb82d2a09844a58dd9e744dc31e8aa467) returns [CUDA_ERROR_INVALID_VALUE](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1ggc6c391505e117393cc2558fff6bfc2e990696c86fcee1f536a1ec7d25867feeb). We end up calling `cuMemAlloc()` with `0` bytesize when allocating device buffers with no null mask. Thus, the following change was causing `nanoarrow_device_cuda_test` to fail: ```diff @@ -207,7 +207,8 @@ TEST_P(StringTypeParameterizedTestFixture, ArrowDeviceCudaArrayViewString) { ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK); ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("abc")), NANOARROW_OK); ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("defg")), NANOARROW_OK); - ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK); + ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("defg")), NANOARROW_OK); + // ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK); ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK); ASSERT_EQ(ArrowDeviceArrayInit(cpu, &device_array, &array, nullptr), NANOARROW_OK); ``` In this PR, I've fixed this by simply skipping the call to `cuMemAlloc`. The resulting buffer will have `nullptr` as its `data` member and `0` as its `size_bytes`, which I believe is the desired outcome. I also modified the test above to include cases with no nulls.
1 parent 05cdc1b commit c651e84

File tree

2 files changed

+54
-24
lines changed

2 files changed

+54
-24
lines changed

src/nanoarrow/nanoarrow_device_cuda.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,11 @@ static ArrowErrorCode ArrowDeviceCudaAllocateBuffer(struct ArrowDevice* device,
124124
switch (device->device_type) {
125125
case ARROW_DEVICE_CUDA: {
126126
CUdeviceptr dptr = 0;
127-
err = cuMemAlloc(&dptr, (size_t)size_bytes);
127+
if (size_bytes > 0) { // cuMemalloc requires non-zero size_bytes
128+
err = cuMemAlloc(&dptr, (size_t)size_bytes);
129+
} else {
130+
err = CUDA_SUCCESS;
131+
}
128132
ptr = (void*)dptr;
129133
op = "cuMemAlloc";
130134
break;

src/nanoarrow/nanoarrow_device_cuda_test.cc

Lines changed: 49 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
#include <errno.h>
19-
2018
#include <cuda.h>
19+
#include <errno.h>
2120
#include <gtest/gtest.h>
21+
#include <tuple>
2222

2323
#include "nanoarrow/nanoarrow_device.h"
2424
#include "nanoarrow/nanoarrow_device_cuda.h"
@@ -185,29 +185,38 @@ TEST(NanoarrowDeviceCuda, DeviceCudaBufferCopy) {
185185
}
186186

187187
class StringTypeParameterizedTestFixture
188-
: public ::testing::TestWithParam<std::pair<ArrowDeviceType, enum ArrowType>> {
188+
: public ::testing::TestWithParam<std::tuple<ArrowDeviceType, enum ArrowType, bool>> {
189189
protected:
190190
std::pair<ArrowDeviceType, enum ArrowType> info;
191191
};
192192

193-
std::pair<ArrowDeviceType, enum ArrowType> DeviceAndType(ArrowDeviceType device_type,
194-
enum ArrowType arrow_type) {
195-
return {device_type, arrow_type};
193+
std::tuple<ArrowDeviceType, enum ArrowType, bool> TestParams(ArrowDeviceType device_type,
194+
enum ArrowType arrow_type,
195+
bool include_null) {
196+
return {device_type, arrow_type, include_null};
196197
}
197198

198199
TEST_P(StringTypeParameterizedTestFixture, ArrowDeviceCudaArrayViewString) {
199200
struct ArrowDevice* cpu = ArrowDeviceCpu();
200-
struct ArrowDevice* gpu = ArrowDeviceCuda(GetParam().first, 0);
201+
struct ArrowDevice* gpu = ArrowDeviceCuda(std::get<0>(GetParam()), 0);
201202
struct ArrowArray array;
202203
struct ArrowDeviceArray device_array;
203204
struct ArrowDeviceArrayView device_array_view;
204-
enum ArrowType string_type = GetParam().second;
205+
enum ArrowType string_type = std::get<1>(GetParam());
206+
bool include_null = std::get<2>(GetParam());
207+
int64_t expected_data_size; // expected
205208

206209
ASSERT_EQ(ArrowArrayInitFromType(&array, string_type), NANOARROW_OK);
207210
ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
208211
ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("abc")), NANOARROW_OK);
209212
ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("defg")), NANOARROW_OK);
210-
ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
213+
if (include_null) {
214+
ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
215+
expected_data_size = 7;
216+
} else {
217+
ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("hjk")), NANOARROW_OK);
218+
expected_data_size = 10;
219+
}
211220
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
212221

213222
ASSERT_EQ(ArrowDeviceArrayInit(cpu, &device_array, &array, nullptr), NANOARROW_OK);
@@ -217,7 +226,7 @@ TEST_P(StringTypeParameterizedTestFixture, ArrowDeviceCudaArrayViewString) {
217226
ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array, nullptr),
218227
NANOARROW_OK);
219228

220-
EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
229+
EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, expected_data_size);
221230
EXPECT_EQ(device_array.array.length, 3);
222231

223232
// Copy required to Cuda
@@ -232,7 +241,7 @@ TEST_P(StringTypeParameterizedTestFixture, ArrowDeviceCudaArrayViewString) {
232241
ASSERT_EQ(device_array2.device_id, gpu->device_id);
233242
ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array2, nullptr),
234243
NANOARROW_OK);
235-
EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
244+
EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, expected_data_size);
236245
EXPECT_EQ(device_array_view.array_view.length, 3);
237246
EXPECT_EQ(device_array2.array.length, 3);
238247

@@ -251,22 +260,39 @@ TEST_P(StringTypeParameterizedTestFixture, ArrowDeviceCudaArrayViewString) {
251260
ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array, nullptr),
252261
NANOARROW_OK);
253262

254-
EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
255-
EXPECT_EQ(memcmp(device_array_view.array_view.buffer_views[2].data.data, "abcdefg", 7),
256-
0);
263+
EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, expected_data_size);
264+
265+
if (include_null) {
266+
EXPECT_EQ(
267+
memcmp(device_array_view.array_view.buffer_views[2].data.data, "abcdefg", 7), 0);
268+
} else {
269+
EXPECT_EQ(
270+
memcmp(device_array_view.array_view.buffer_views[2].data.data, "abcdefghjk", 7),
271+
0);
272+
}
257273

258274
ArrowArrayRelease(&device_array.array);
259275
ArrowDeviceArrayViewReset(&device_array_view);
260276
}
261277

262278
INSTANTIATE_TEST_SUITE_P(
263279
NanoarrowDeviceCuda, StringTypeParameterizedTestFixture,
264-
::testing::Values(DeviceAndType(ARROW_DEVICE_CUDA, NANOARROW_TYPE_STRING),
265-
DeviceAndType(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_STRING),
266-
DeviceAndType(ARROW_DEVICE_CUDA, NANOARROW_TYPE_BINARY),
267-
DeviceAndType(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_BINARY),
268-
DeviceAndType(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_STRING),
269-
DeviceAndType(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_STRING),
270-
DeviceAndType(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_BINARY),
271-
DeviceAndType(ARROW_DEVICE_CUDA_HOST,
272-
NANOARROW_TYPE_LARGE_BINARY)));
280+
::testing::Values(
281+
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_STRING, true),
282+
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_STRING, false),
283+
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_STRING, true),
284+
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_STRING, false),
285+
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_BINARY, true),
286+
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_BINARY, false),
287+
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_BINARY, true),
288+
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_BINARY, false),
289+
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_STRING, true),
290+
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_STRING, false),
291+
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_STRING, true),
292+
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_STRING, false),
293+
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_BINARY, true),
294+
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_BINARY, false),
295+
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_BINARY, true),
296+
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_BINARY, false)
297+
298+
));

0 commit comments

Comments
 (0)