From 119218739cd80c54c7063d2fa612a9dee3cc2c60 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 19 Jun 2024 13:33:08 +0000 Subject: [PATCH] fix: Ensure ArrowDeviceArray implementation for AppleMetal passes tests on newer MacOS (#527) This PR fixes the Metal build (which was broken after a previous change), adds it to CI (thanks to the new GitHub M1 runners!) and fixes a failure I see locally (but not on CI) whereby the Metal runtime on some (newer? different?) MacOS environments can wrap arbitrary bytes as an `MTL::Buffer()`. If true, this is awesome! But it does mean that in some cases we can "move" CPU arrays to the GPU without a copy and in some cases we have to copy into page-aligned memory (and that this can only be tested at runtime). --- .github/workflows/build-and-test-device.yaml | 31 ++----------- src/nanoarrow/nanoarrow_device.c | 2 +- src/nanoarrow/nanoarrow_device_metal.cc | 15 +++--- src/nanoarrow/nanoarrow_device_metal.h | 8 ++-- src/nanoarrow/nanoarrow_device_metal_test.cc | 48 ++++++++++++++------ 5 files changed, 49 insertions(+), 55 deletions(-) diff --git a/.github/workflows/build-and-test-device.yaml b/.github/workflows/build-and-test-device.yaml index d1247c305..8362bfc7e 100644 --- a/.github/workflows/build-and-test-device.yaml +++ b/.github/workflows/build-and-test-device.yaml @@ -35,7 +35,7 @@ permissions: jobs: test-c-device: - runs-on: ubuntu-latest + runs-on: ${{ matrix.config.runner }} name: ${{ matrix.config.label }} @@ -43,23 +43,15 @@ jobs: fail-fast: false matrix: config: - - {label: default-build} - - {label: namespaced-build, cmake_args: "-DNANOARROW_NAMESPACE=SomeUserNamespace"} - - {label: bundled-build, cmake_args: "-DNANOARROW_BUNDLE=ON"} + - {runner: ubuntu-latest, label: default-build} + - {runner: ubuntu-latest, label: namespaced-build, cmake_args: "-DNANOARROW_NAMESPACE=SomeUserNamespace"} + - {runner: ubuntu-latest, label: bundled-build, cmake_args: "-DNANOARROW_DEVICE_BUNDLE=ON"} + - {runner: macOS-latest, label: with-metal, cmake_args: "-DNANOARROW_DEVICE_WITH_METAL=ON"} - env: - SUBDIR: '${{ github.workspace }}' - NANOARROW_ARROW_TESTING_DIR: '${{ github.workspace }}/arrow-testing' steps: - uses: actions/checkout@v4 - - name: Checkout arrow-testing - uses: actions/checkout@v4 - with: - repository: apache/arrow-testing - path: arrow-testing - - name: Install memcheck dependencies if: matrix.config.label == 'default-build' run: | @@ -82,9 +74,6 @@ jobs: - name: Build run: | ARROW_PATH="$(pwd)/arrow" - cd $SUBDIR - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib - sudo ldconfig mkdir build cd build cmake .. -DCMAKE_BUILD_TYPE=Debug -DNANOARROW_DEVICE=ON \ @@ -96,8 +85,6 @@ jobs: - name: Check for non-namespaced symbols in namespaced build if: matrix.config.label == 'namespaced-build' run: | - cd $SUBDIR - # Dump all symbols nm --extern-only build/libnanoarrow_device.a @@ -113,20 +100,12 @@ jobs: - name: Run tests run: | - cd $SUBDIR - - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib - sudo ldconfig cd build ctest -T test --output-on-failure . - name: Run tests with valgrind if: matrix.config.label == 'default-build' || matrix.config.label == 'default-noatomics' run: | - cd $SUBDIR - - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib - sudo ldconfig cd build ctest -T memcheck . diff --git a/src/nanoarrow/nanoarrow_device.c b/src/nanoarrow/nanoarrow_device.c index a2ce46655..e5a3e624a 100644 --- a/src/nanoarrow/nanoarrow_device.c +++ b/src/nanoarrow/nanoarrow_device.c @@ -493,5 +493,5 @@ ArrowErrorCode ArrowDeviceArrayMoveToDevice(struct ArrowDeviceArray* src, NANOARROW_RETURN_NOT_OK(device_dst->array_move(device_src, src, device_dst, dst)); } - return ENOTSUP; + return NANOARROW_OK; } diff --git a/src/nanoarrow/nanoarrow_device_metal.cc b/src/nanoarrow/nanoarrow_device_metal.cc index b0d31f1b0..1a2cb29d9 100644 --- a/src/nanoarrow/nanoarrow_device_metal.cc +++ b/src/nanoarrow/nanoarrow_device_metal.cc @@ -144,7 +144,9 @@ struct ArrowDeviceMetalArrayPrivate { static void ArrowDeviceMetalArrayRelease(struct ArrowArray* array) { struct ArrowDeviceMetalArrayPrivate* private_data = (struct ArrowDeviceMetalArrayPrivate*)array->private_data; - private_data->event->release(); + if (private_data->event != nullptr) { + private_data->event->release(); + } ArrowArrayRelease(&private_data->parent); ArrowFree(private_data); array->release = NULL; @@ -224,15 +226,10 @@ static ArrowErrorCode ArrowDeviceMetalBufferMove(struct ArrowDevice* device_src, mtl_buffer->release(); ArrowBufferMove(src, dst); return NANOARROW_OK; + } else { + // Otherwise, return ENOTSUP to signal that a move is not possible + return ENOTSUP; } - - // Otherwise, initialize a new buffer and copy - struct ArrowBuffer tmp; - ArrowDeviceMetalInitBuffer(&tmp); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&tmp, src->data, src->size_bytes)); - ArrowBufferMove(&tmp, dst); - ArrowBufferReset(src); - return NANOARROW_OK; } else if (device_src->device_type == ARROW_DEVICE_METAL && device_dst->device_type == ARROW_DEVICE_METAL) { // Metal -> Metal is always just a move diff --git a/src/nanoarrow/nanoarrow_device_metal.h b/src/nanoarrow/nanoarrow_device_metal.h index 52cda74c1..cc29fa0f0 100644 --- a/src/nanoarrow/nanoarrow_device_metal.h +++ b/src/nanoarrow/nanoarrow_device_metal.h @@ -26,10 +26,10 @@ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalDefaultDevice) #define ArrowDeviceMetalInitDefaultDevice \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitDefaultDevice) -#define ArrowDeviceMetalInitCpuBuffer \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitCpuBuffer) -#define ArrowDeviceMetalInitCpuArrayBuffers \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitCpuArrayBuffers) +#define ArrowDeviceMetalInitBuffer \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitBuffer) +#define ArrowDeviceMetalAlignArrayBuffers \ + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalAlignArrayBuffers) #endif diff --git a/src/nanoarrow/nanoarrow_device_metal_test.cc b/src/nanoarrow/nanoarrow_device_metal_test.cc index 0f4cff1b7..10a9e83f4 100644 --- a/src/nanoarrow/nanoarrow_device_metal_test.cc +++ b/src/nanoarrow/nanoarrow_device_metal_test.cc @@ -99,17 +99,26 @@ TEST(NanoarrowDeviceMetal, DeviceGpuBufferMove) { EXPECT_EQ(buffer.data, nullptr); ArrowBufferReset(&buffer2); - // CPU -> GPU without alignment should trigger a copy and release the input + // CPU -> GPU without alignment may require a copy ArrowBufferInit(&buffer); ASSERT_EQ(ArrowBufferAppend(&buffer, data, sizeof(data)), NANOARROW_OK); old_ptr = buffer.data; - ASSERT_EQ(ArrowDeviceBufferMove(cpu, &buffer, gpu, &buffer2), NANOARROW_OK); - EXPECT_EQ(buffer2.size_bytes, 5); - EXPECT_NE(buffer2.data, old_ptr); - EXPECT_EQ(memcmp(buffer2.data, data, sizeof(data)), 0); - EXPECT_EQ(buffer.data, nullptr); - ArrowBufferReset(&buffer2); + int code = ArrowDeviceBufferMove(cpu, &buffer, gpu, &buffer2); + if (code == NANOARROW_OK) { + // If the move was reported as a success, ensure it happened + ASSERT_EQ(buffer2.data, old_ptr); + EXPECT_EQ(buffer.data, nullptr); + ASSERT_EQ(buffer2.size_bytes, 5); + EXPECT_EQ(memcmp(buffer2.data, data, sizeof(data)), 0); + ArrowBufferReset(&buffer2); + } else { + // Otherwise, ensure the old buffer was left intact + ASSERT_EQ(buffer.data, old_ptr); + EXPECT_EQ(buffer2.data, nullptr); + ASSERT_EQ(buffer.size_bytes, 5); + EXPECT_EQ(memcmp(buffer.data, data, sizeof(data)), 0); + } } TEST(NanoarrowDeviceMetal, DeviceGpuBufferCopy) { @@ -236,23 +245,32 @@ TEST_P(StringTypeParameterizedTestFixture, ArrowDeviceMetalArrayViewString) { EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7); - // Copy required to Metal + // In some MacOS environments, ArrowDeviceArrayMoveToDevice() with buffers allocated + // using ArrowMalloc() will work (i.e., apparently MTL::Buffer can wrap + // arbitrary bytes, but only sometimes) struct ArrowDeviceArray device_array2; device_array2.array.release = nullptr; - ASSERT_EQ(ArrowDeviceArrayMoveToDevice(&device_array, metal, &device_array2), ENOTSUP); - ASSERT_EQ(ArrowDeviceArrayViewCopy(&device_array_view, metal, &device_array2), - NANOARROW_OK); - ArrowArrayRelease(&device_array.array); - ASSERT_NE(device_array2.array.release, nullptr); - ASSERT_EQ(device_array2.device_id, metal->device_id); + int code = ArrowDeviceArrayMoveToDevice(&device_array, metal, &device_array2); + if (code == NANOARROW_OK) { + // If the move was successful, ensure it actually happened + ASSERT_EQ(device_array.array.release, nullptr); + ASSERT_NE(device_array2.array.release, nullptr); + } else { + // If the move was not successful, ensure we can copy to a metal device array + ASSERT_EQ(code, ENOTSUP); + ASSERT_EQ(ArrowDeviceArrayViewCopy(&device_array_view, metal, &device_array2), + NANOARROW_OK); + } + + // Either way, ensure that the device array is reporting correct values ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array2, nullptr), NANOARROW_OK); EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7); EXPECT_EQ(memcmp(device_array_view.array_view.buffer_views[2].data.data, "abcdefg", 7), 0); - // Copy shouldn't be required to the CPU + // Copy shouldn't be required back to the CPU ASSERT_EQ(ArrowDeviceArrayMoveToDevice(&device_array2, cpu, &device_array), NANOARROW_OK); ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array, nullptr),