Skip to content

Commit

Permalink
fix: Ensure we don't call cuMemAlloc with 0 bytesize (#534)
Browse files Browse the repository at this point in the history
According to the CUDA docs for
[cuMemAlloc](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gb82d2a09844a58dd9e744dc31e8aa467):

> If bytesize is 0,
[cuMemAlloc()](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gb82d2a09844a58dd9e744dc31e8aa467)
returns
[CUDA_ERROR_INVALID_VALUE](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1ggc6c391505e117393cc2558fff6bfc2e990696c86fcee1f536a1ec7d25867feeb).

We end up calling `cuMemAlloc()` with `0` bytesize when allocating
device buffers with no null mask. Thus, the following change was causing
`nanoarrow_device_cuda_test` to fail:

```diff
@@ -207,7 +207,8 @@ TEST_P(StringTypeParameterizedTestFixture, ArrowDeviceCudaArrayViewString) {
   ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
   ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("abc")), NANOARROW_OK);
   ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("defg")), NANOARROW_OK);
-  ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("defg")), NANOARROW_OK);
+  // ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
   ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
 
   ASSERT_EQ(ArrowDeviceArrayInit(cpu, &device_array, &array, nullptr), NANOARROW_OK);
```

In this PR, I've fixed this by simply skipping the call to `cuMemAlloc`.
The resulting buffer will have `nullptr` as its `data` member and `0` as
its `size_bytes`, which I believe is the desired outcome.

I also modified the test above to include cases with no nulls.
  • Loading branch information
shwina authored Jun 20, 2024
1 parent 05cdc1b commit c651e84
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 24 deletions.
6 changes: 5 additions & 1 deletion src/nanoarrow/nanoarrow_device_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,11 @@ static ArrowErrorCode ArrowDeviceCudaAllocateBuffer(struct ArrowDevice* device,
switch (device->device_type) {
case ARROW_DEVICE_CUDA: {
CUdeviceptr dptr = 0;
err = cuMemAlloc(&dptr, (size_t)size_bytes);
if (size_bytes > 0) { // cuMemalloc requires non-zero size_bytes
err = cuMemAlloc(&dptr, (size_t)size_bytes);
} else {
err = CUDA_SUCCESS;
}
ptr = (void*)dptr;
op = "cuMemAlloc";
break;
Expand Down
72 changes: 49 additions & 23 deletions src/nanoarrow/nanoarrow_device_cuda_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
// specific language governing permissions and limitations
// under the License.

#include <errno.h>

#include <cuda.h>
#include <errno.h>
#include <gtest/gtest.h>
#include <tuple>

#include "nanoarrow/nanoarrow_device.h"
#include "nanoarrow/nanoarrow_device_cuda.h"
Expand Down Expand Up @@ -185,29 +185,38 @@ TEST(NanoarrowDeviceCuda, DeviceCudaBufferCopy) {
}

class StringTypeParameterizedTestFixture
: public ::testing::TestWithParam<std::pair<ArrowDeviceType, enum ArrowType>> {
: public ::testing::TestWithParam<std::tuple<ArrowDeviceType, enum ArrowType, bool>> {
protected:
std::pair<ArrowDeviceType, enum ArrowType> info;
};

std::pair<ArrowDeviceType, enum ArrowType> DeviceAndType(ArrowDeviceType device_type,
enum ArrowType arrow_type) {
return {device_type, arrow_type};
std::tuple<ArrowDeviceType, enum ArrowType, bool> TestParams(ArrowDeviceType device_type,
enum ArrowType arrow_type,
bool include_null) {
return {device_type, arrow_type, include_null};
}

TEST_P(StringTypeParameterizedTestFixture, ArrowDeviceCudaArrayViewString) {
struct ArrowDevice* cpu = ArrowDeviceCpu();
struct ArrowDevice* gpu = ArrowDeviceCuda(GetParam().first, 0);
struct ArrowDevice* gpu = ArrowDeviceCuda(std::get<0>(GetParam()), 0);
struct ArrowArray array;
struct ArrowDeviceArray device_array;
struct ArrowDeviceArrayView device_array_view;
enum ArrowType string_type = GetParam().second;
enum ArrowType string_type = std::get<1>(GetParam());
bool include_null = std::get<2>(GetParam());
int64_t expected_data_size; // expected

ASSERT_EQ(ArrowArrayInitFromType(&array, string_type), NANOARROW_OK);
ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("abc")), NANOARROW_OK);
ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("defg")), NANOARROW_OK);
ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
if (include_null) {
ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
expected_data_size = 7;
} else {
ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("hjk")), NANOARROW_OK);
expected_data_size = 10;
}
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);

ASSERT_EQ(ArrowDeviceArrayInit(cpu, &device_array, &array, nullptr), NANOARROW_OK);
Expand All @@ -217,7 +226,7 @@ TEST_P(StringTypeParameterizedTestFixture, ArrowDeviceCudaArrayViewString) {
ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array, nullptr),
NANOARROW_OK);

EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, expected_data_size);
EXPECT_EQ(device_array.array.length, 3);

// Copy required to Cuda
Expand All @@ -232,7 +241,7 @@ TEST_P(StringTypeParameterizedTestFixture, ArrowDeviceCudaArrayViewString) {
ASSERT_EQ(device_array2.device_id, gpu->device_id);
ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array2, nullptr),
NANOARROW_OK);
EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, expected_data_size);
EXPECT_EQ(device_array_view.array_view.length, 3);
EXPECT_EQ(device_array2.array.length, 3);

Expand All @@ -251,22 +260,39 @@ TEST_P(StringTypeParameterizedTestFixture, ArrowDeviceCudaArrayViewString) {
ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array, nullptr),
NANOARROW_OK);

EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
EXPECT_EQ(memcmp(device_array_view.array_view.buffer_views[2].data.data, "abcdefg", 7),
0);
EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, expected_data_size);

if (include_null) {
EXPECT_EQ(
memcmp(device_array_view.array_view.buffer_views[2].data.data, "abcdefg", 7), 0);
} else {
EXPECT_EQ(
memcmp(device_array_view.array_view.buffer_views[2].data.data, "abcdefghjk", 7),
0);
}

ArrowArrayRelease(&device_array.array);
ArrowDeviceArrayViewReset(&device_array_view);
}

INSTANTIATE_TEST_SUITE_P(
NanoarrowDeviceCuda, StringTypeParameterizedTestFixture,
::testing::Values(DeviceAndType(ARROW_DEVICE_CUDA, NANOARROW_TYPE_STRING),
DeviceAndType(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_STRING),
DeviceAndType(ARROW_DEVICE_CUDA, NANOARROW_TYPE_BINARY),
DeviceAndType(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_BINARY),
DeviceAndType(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_STRING),
DeviceAndType(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_STRING),
DeviceAndType(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_BINARY),
DeviceAndType(ARROW_DEVICE_CUDA_HOST,
NANOARROW_TYPE_LARGE_BINARY)));
::testing::Values(
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_STRING, true),
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_STRING, false),
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_STRING, true),
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_STRING, false),
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_BINARY, true),
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_BINARY, false),
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_BINARY, true),
TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_BINARY, false),
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_STRING, true),
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_STRING, false),
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_STRING, true),
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_STRING, false),
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_BINARY, true),
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_BINARY, false),
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_BINARY, true),
TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_BINARY, false)

));

0 comments on commit c651e84

Please sign in to comment.