Skip to content

Commit

Permalink
fix: Ensure ArrowDeviceArray implementation for AppleMetal passes tes…
Browse files Browse the repository at this point in the history
…ts on newer MacOS (#527)

This PR fixes the Metal build (which was broken after a previous
change), adds it to CI (thanks to the new GitHub M1 runners!) and fixes
a failure I see locally (but not on CI) whereby the Metal runtime on
some (newer? different?) MacOS environments can wrap arbitrary bytes as
an `MTL::Buffer()`.

If true, this is awesome! But it does mean that in some cases we can
"move" CPU arrays to the GPU without a copy and in some cases we have to
copy into page-aligned memory (and that this can only be tested at
runtime).
  • Loading branch information
paleolimbot authored Jun 19, 2024
1 parent 97410e2 commit 1192187
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 55 deletions.
31 changes: 5 additions & 26 deletions .github/workflows/build-and-test-device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,31 +35,23 @@ permissions:
jobs:
test-c-device:

runs-on: ubuntu-latest
runs-on: ${{ matrix.config.runner }}

name: ${{ matrix.config.label }}

strategy:
fail-fast: false
matrix:
config:
- {label: default-build}
- {label: namespaced-build, cmake_args: "-DNANOARROW_NAMESPACE=SomeUserNamespace"}
- {label: bundled-build, cmake_args: "-DNANOARROW_BUNDLE=ON"}
- {runner: ubuntu-latest, label: default-build}
- {runner: ubuntu-latest, label: namespaced-build, cmake_args: "-DNANOARROW_NAMESPACE=SomeUserNamespace"}
- {runner: ubuntu-latest, label: bundled-build, cmake_args: "-DNANOARROW_DEVICE_BUNDLE=ON"}
- {runner: macOS-latest, label: with-metal, cmake_args: "-DNANOARROW_DEVICE_WITH_METAL=ON"}

env:
SUBDIR: '${{ github.workspace }}'
NANOARROW_ARROW_TESTING_DIR: '${{ github.workspace }}/arrow-testing'

steps:
- uses: actions/checkout@v4

- name: Checkout arrow-testing
uses: actions/checkout@v4
with:
repository: apache/arrow-testing
path: arrow-testing

- name: Install memcheck dependencies
if: matrix.config.label == 'default-build'
run: |
Expand All @@ -82,9 +74,6 @@ jobs:
- name: Build
run: |
ARROW_PATH="$(pwd)/arrow"
cd $SUBDIR
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
sudo ldconfig
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug -DNANOARROW_DEVICE=ON \
Expand All @@ -96,8 +85,6 @@ jobs:
- name: Check for non-namespaced symbols in namespaced build
if: matrix.config.label == 'namespaced-build'
run: |
cd $SUBDIR
# Dump all symbols
nm --extern-only build/libnanoarrow_device.a
Expand All @@ -113,20 +100,12 @@ jobs:
- name: Run tests
run: |
cd $SUBDIR
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
sudo ldconfig
cd build
ctest -T test --output-on-failure .
- name: Run tests with valgrind
if: matrix.config.label == 'default-build' || matrix.config.label == 'default-noatomics'
run: |
cd $SUBDIR
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
sudo ldconfig
cd build
ctest -T memcheck .
Expand Down
2 changes: 1 addition & 1 deletion src/nanoarrow/nanoarrow_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -493,5 +493,5 @@ ArrowErrorCode ArrowDeviceArrayMoveToDevice(struct ArrowDeviceArray* src,
NANOARROW_RETURN_NOT_OK(device_dst->array_move(device_src, src, device_dst, dst));
}

return ENOTSUP;
return NANOARROW_OK;
}
15 changes: 6 additions & 9 deletions src/nanoarrow/nanoarrow_device_metal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,9 @@ struct ArrowDeviceMetalArrayPrivate {
static void ArrowDeviceMetalArrayRelease(struct ArrowArray* array) {
struct ArrowDeviceMetalArrayPrivate* private_data =
(struct ArrowDeviceMetalArrayPrivate*)array->private_data;
private_data->event->release();
if (private_data->event != nullptr) {
private_data->event->release();
}
ArrowArrayRelease(&private_data->parent);
ArrowFree(private_data);
array->release = NULL;
Expand Down Expand Up @@ -224,15 +226,10 @@ static ArrowErrorCode ArrowDeviceMetalBufferMove(struct ArrowDevice* device_src,
mtl_buffer->release();
ArrowBufferMove(src, dst);
return NANOARROW_OK;
} else {
// Otherwise, return ENOTSUP to signal that a move is not possible
return ENOTSUP;
}

// Otherwise, initialize a new buffer and copy
struct ArrowBuffer tmp;
ArrowDeviceMetalInitBuffer(&tmp);
NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(&tmp, src->data, src->size_bytes));
ArrowBufferMove(&tmp, dst);
ArrowBufferReset(src);
return NANOARROW_OK;
} else if (device_src->device_type == ARROW_DEVICE_METAL &&
device_dst->device_type == ARROW_DEVICE_METAL) {
// Metal -> Metal is always just a move
Expand Down
8 changes: 4 additions & 4 deletions src/nanoarrow/nanoarrow_device_metal.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalDefaultDevice)
#define ArrowDeviceMetalInitDefaultDevice \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitDefaultDevice)
#define ArrowDeviceMetalInitCpuBuffer \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitCpuBuffer)
#define ArrowDeviceMetalInitCpuArrayBuffers \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitCpuArrayBuffers)
#define ArrowDeviceMetalInitBuffer \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalInitBuffer)
#define ArrowDeviceMetalAlignArrayBuffers \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDeviceMetalAlignArrayBuffers)

#endif

Expand Down
48 changes: 33 additions & 15 deletions src/nanoarrow/nanoarrow_device_metal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,17 +99,26 @@ TEST(NanoarrowDeviceMetal, DeviceGpuBufferMove) {
EXPECT_EQ(buffer.data, nullptr);
ArrowBufferReset(&buffer2);

// CPU -> GPU without alignment should trigger a copy and release the input
// CPU -> GPU without alignment may require a copy
ArrowBufferInit(&buffer);
ASSERT_EQ(ArrowBufferAppend(&buffer, data, sizeof(data)), NANOARROW_OK);
old_ptr = buffer.data;
ASSERT_EQ(ArrowDeviceBufferMove(cpu, &buffer, gpu, &buffer2), NANOARROW_OK);
EXPECT_EQ(buffer2.size_bytes, 5);
EXPECT_NE(buffer2.data, old_ptr);
EXPECT_EQ(memcmp(buffer2.data, data, sizeof(data)), 0);
EXPECT_EQ(buffer.data, nullptr);

ArrowBufferReset(&buffer2);
int code = ArrowDeviceBufferMove(cpu, &buffer, gpu, &buffer2);
if (code == NANOARROW_OK) {
// If the move was reported as a success, ensure it happened
ASSERT_EQ(buffer2.data, old_ptr);
EXPECT_EQ(buffer.data, nullptr);
ASSERT_EQ(buffer2.size_bytes, 5);
EXPECT_EQ(memcmp(buffer2.data, data, sizeof(data)), 0);
ArrowBufferReset(&buffer2);
} else {
// Otherwise, ensure the old buffer was left intact
ASSERT_EQ(buffer.data, old_ptr);
EXPECT_EQ(buffer2.data, nullptr);
ASSERT_EQ(buffer.size_bytes, 5);
EXPECT_EQ(memcmp(buffer.data, data, sizeof(data)), 0);
}
}

TEST(NanoarrowDeviceMetal, DeviceGpuBufferCopy) {
Expand Down Expand Up @@ -236,23 +245,32 @@ TEST_P(StringTypeParameterizedTestFixture, ArrowDeviceMetalArrayViewString) {

EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);

// Copy required to Metal
// In some MacOS environments, ArrowDeviceArrayMoveToDevice() with buffers allocated
// using ArrowMalloc() will work (i.e., apparently MTL::Buffer can wrap
// arbitrary bytes, but only sometimes)
struct ArrowDeviceArray device_array2;
device_array2.array.release = nullptr;
ASSERT_EQ(ArrowDeviceArrayMoveToDevice(&device_array, metal, &device_array2), ENOTSUP);
ASSERT_EQ(ArrowDeviceArrayViewCopy(&device_array_view, metal, &device_array2),
NANOARROW_OK);
ArrowArrayRelease(&device_array.array);

ASSERT_NE(device_array2.array.release, nullptr);
ASSERT_EQ(device_array2.device_id, metal->device_id);
int code = ArrowDeviceArrayMoveToDevice(&device_array, metal, &device_array2);
if (code == NANOARROW_OK) {
// If the move was successful, ensure it actually happened
ASSERT_EQ(device_array.array.release, nullptr);
ASSERT_NE(device_array2.array.release, nullptr);
} else {
// If the move was not successful, ensure we can copy to a metal device array
ASSERT_EQ(code, ENOTSUP);
ASSERT_EQ(ArrowDeviceArrayViewCopy(&device_array_view, metal, &device_array2),
NANOARROW_OK);
}

// Either way, ensure that the device array is reporting correct values
ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array2, nullptr),
NANOARROW_OK);
EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
EXPECT_EQ(memcmp(device_array_view.array_view.buffer_views[2].data.data, "abcdefg", 7),
0);

// Copy shouldn't be required to the CPU
// Copy shouldn't be required back to the CPU
ASSERT_EQ(ArrowDeviceArrayMoveToDevice(&device_array2, cpu, &device_array),
NANOARROW_OK);
ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array, nullptr),
Expand Down

0 comments on commit 1192187

Please sign in to comment.