Skip to content

Commit 7485c93

Browse files
Refactor the Arrow tests and simplify importing pybind11. (#5247)
[SC-52967](https://app.shortcut.com/tiledb-inc/story/52967/implement-better-solution-for-pybind11-failures) Continuation of #5238. I updated the build system to import pybind11 using regular CMake methods, and also moved the Arrow tests to a separate executable, to be executed in the standalone tests. Some test failures were fixed by updating the GCS emulator. --- TYPE: NO_HISTORY
1 parent 07d7cad commit 7485c93

File tree

7 files changed

+81
-127
lines changed

7 files changed

+81
-127
lines changed

.github/workflows/build-windows.yml

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -24,27 +24,23 @@ jobs:
2424
include:
2525
- environ: 'azure'
2626
TILEDB_AZURE: ON
27-
TILEDB_ARROW_TESTS: ON
2827
TILEDB_SERIALIZATION: OFF
2928
TILEDB_S3: OFF
3029
TILEDB_WEBP: OFF
3130
- environ: 's3'
3231
TILEDB_S3: ON
33-
TILEDB_ARROW_TESTS: ON
3432
TILEDB_SERIALIZATION: OFF
3533
TILEDB_AZURE: OFF
3634
TILEDB_WEBP: OFF
3735
- environ: 'gcs'
3836
TILEDB_GCS: ON
39-
TILEDB_ARROW_TESTS: ON
4037
TILEDB_SERIALIZATION: OFF
4138
TILEDB_AZURE: OFF
4239
TILEDB_S3: OFF
4340
TILEDB_WEBP: OFF
4441
- environ: 'serialization'
4542
TILEDB_S3: OFF
4643
TILEDB_AZURE: OFF
47-
TILEDB_ARROW_TESTS: OFF
4844
TILEDB_SERIALIZATION: ON
4945
TILEDB_WEBP: ON
5046

@@ -57,7 +53,6 @@ jobs:
5753
TILEDB_AZURE: ${{ matrix.TILEDB_AZURE }} #azure }}
5854
TILEDB_GCS: ${{ matrix.TILEDB_GCS }} #gcs }}
5955
TILEDB_SERIALIZATION: ${{ matrix.TILEDB_SERIALIZATION }} #serialization }}
60-
TILEDB_ARROW_TESTS: ${{ matrix.TILEDB_ARROW_TESTS }}
6156
TILEDB_WEBP: ${{ matrix.TILEDB_WEBP }}
6257
TILEDB_CMAKE_BUILD_TYPE: 'Release'
6358
VCPKG_BINARY_SOURCES: 'clear;x-gha,readwrite'
@@ -110,15 +105,6 @@ jobs:
110105
#but, it seems *we* can populate our own var from our actions, and access it in file upload...
111106
echo TDBLOCALAPPDATA=$env:LOCALAPPDATA >> "$env:GITHUB_ENV"
112107
113-
- name: ARROW python needs
114-
shell: bash
115-
if: ${{ matrix.TILEDB_ARROW_TESTS == 'ON' }}
116-
run: |
117-
set -e pipefail
118-
if [[ "$TILEDB_ARROW_TESTS" == "ON" ]]; then
119-
pip install pyarrow pybind11 numpy
120-
fi
121-
122108
- name: Prepare git
123109
run: git config --global core.autocrlf false
124110

@@ -136,8 +122,6 @@ jobs:
136122
uses: seanmiddleditch/gha-setup-ninja@v4
137123
- name: Prevent vcpkg from building debug variants
138124
run: python $env:GITHUB_WORKSPACE/scripts/ci/patch_vcpkg_triplets.py
139-
- name: Install dependencies from pip
140-
run: python -m pip install pybind11[global]
141125

142126
- name: Configure TileDB
143127
shell: pwsh
@@ -169,6 +153,9 @@ jobs:
169153
if ($env:TILEDB_TOOLS -eq "ON") {
170154
$bootstrapOptions = $bootstrapOptions + " -EnableTools"
171155
}
156+
if ($env:TILEDB_ARROW_TESTS -eq "ON") {
157+
$bootstrapOptions = $bootstrapOptions + " -EnableArrowTests"
158+
}
172159
$CMakeBuildType = $env:TILEDB_CMAKE_BUILD_TYPE
173160
if ($env:TILEDB_DEBUG -eq "ON") {
174161
$bootstrapOptions = $bootstrapOptions + " -EnableDebug"
@@ -185,10 +172,6 @@ jobs:
185172
# if ($env:TILEDB_CI_TSAN -eq "ON") {
186173
# $bootstrapOptions = $bootstrapOptions + " -EnableSanitizer thread -EnableDebug"
187174
# }
188-
# static already added above as initial default
189-
# if ($env:TILEDB_FORCE_BUILD_DEPS" -eq "ON") {
190-
# $bootstrapOptions = $bootstrapOptions + " -EnableBuildDeps"
191-
# }
192175
if ($env:TILEDB_WERROR -eq "OFF") {
193176
$bootstrapOptions = $bootstrapOptions + " -DisableWerror"
194177
}
@@ -258,7 +241,7 @@ jobs:
258241
}
259242
else { #using the node/npm already present in vm
260243
Write-Host "azurite: using vm install nodejs"
261-
#this code path avoids overhead of download/expand/install of alternate nodejs/azurite.
244+
#this code path avoids overhead of download/expand/install of alternate nodejs/azurite.
262245
npm install -g azurite
263246
Write-Host "done with 'npm install -g azurite'"
264247
$azuriteDataPath = (Join-Path $env:TEMP "azuriteData")
@@ -283,23 +266,23 @@ jobs:
283266
Write-Host "cmds: '$cmds'"
284267
Invoke-Expression $cmds
285268
if ($LastExitCode -ne 0) {
286-
Write-Host "Tests failed. tiledb_unit exit status: " $LastExitCocde
287-
$host.SetShouldExit($LastExitCode)
269+
Write-Host "Tests failed. tiledb_unit exit status: " $LastExitCocde
270+
$host.SetShouldExit($LastExitCode)
288271
}
289-
272+
290273
$cmds = "$env:BUILD_BUILDDIRECTORY\tiledb\sm\filesystem\test\unit_vfs -d=yes"
291274
Write-Host "cmds: '$cmds'"
292275
Invoke-Expression $cmds
293276
if ($LastExitCode -ne 0) {
294-
Write-Host "Tests failed. tiledb_vfs exit status: " $LastExitCocde
295-
$host.SetShouldExit($LastExitCode)
277+
Write-Host "Tests failed. tiledb_vfs exit status: " $LastExitCocde
278+
$host.SetShouldExit($LastExitCode)
296279
}
297280
298281
$cmds = "$env:BUILD_BUILDDIRECTORY\test\ci\test_assert.exe -d=yes"
299282
Invoke-Expression $cmds
300283
if ($LastExitCode -ne 0) {
301-
Write-Host "Tests failed. test_assert exit status: " $LastExitCocde
302-
$host.SetShouldExit($LastExitCode)
284+
Write-Host "Tests failed. test_assert exit status: " $LastExitCocde
285+
$host.SetShouldExit($LastExitCode)
303286
}
304287
- name: Build examples
305288
shell: pwsh

.github/workflows/ci-linux_mac.yml

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,6 @@ env:
7272
CMAKE_GENERATOR: ${{ inputs.cmake_generator }}
7373
TILEDB_CI_BACKEND: ${{ inputs.ci_backend }}
7474
TILEDB_CI_OS: ${{ startsWith(inputs.matrix_image, 'ubuntu-') && 'Linux' || 'macOS' }}
75-
# Installing Python does not work on manylinux.
76-
TILEDB_ARROW_TESTS: ${{ !inputs.manylinux && 'ON' || 'OFF' }}
7775
TILEDB_MANYLINUX: ${{ !inputs.manylinux && 'ON' || 'OFF' }}
7876
CXX: ${{ inputs.matrix_compiler_cxx }}
7977
CC: ${{ inputs.matrix_compiler_cc }}
@@ -130,26 +128,11 @@ jobs:
130128
yum install -y redhat-lsb-core centos-release-scl devtoolset-7 perl-IPC-Cmd
131129
echo "source /opt/rh/devtoolset-7/enable" >> ~/.bashrc
132130
133-
# Need this for virtualenv and arrow tests if enabled
134-
- name: 'Install Python'
135-
uses: actions/setup-python@v4
136-
if: ${{ !inputs.manylinux }}
137-
with:
138-
python-version: '3.9'
139-
cache: 'pip'
140-
141131
# This must happen after checkout, because checkout would remove the directory.
142132
- name: Install Ninja
143133
if: inputs.cmake_generator == 'Ninja'
144134
uses: seanmiddleditch/gha-setup-ninja@v4
145135

146-
- name: 'Set up Python dependencies'
147-
if: ${{ !inputs.manylinux }}
148-
run: |
149-
set -e pipefail
150-
python -m pip install --upgrade pip virtualenv
151-
pip install pyarrow pybind11 numpy
152-
153136
- name: 'Brew setup on macOS' # x-ref c8e49ba8f8b9ce
154137
if: ${{ startsWith(matrix.os, 'macos-') == true }}
155138
run: |
@@ -217,7 +200,7 @@ jobs:
217200
./test/ci/test_assert -d yes
218201
./test/tiledb_unit -d yes | awk '/1: ::set-output/{sub(/.*1: /, ""); print; next} 1'
219202
./tiledb/sm/filesystem/test/unit_vfs -d yes | awk '/1: ::set-output/{sub(/.*1: /, ""); print; next} 1'
220-
203+
221204
ctest -R tiledb_timing_unit | awk '/1: ::set-output/{sub(/.*1: /, ""); print; next} 1'
222205
223206
###################################################

.github/workflows/unit-test-runs.yml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,19 @@ jobs:
2727
- name: Install Ninja
2828
uses: seanmiddleditch/gha-setup-ninja@v4
2929

30+
# Need this for virtualenv and arrow tests if enabled
31+
- name: 'Install Python'
32+
uses: actions/setup-python@v5
33+
with:
34+
python-version: '3.9'
35+
cache: 'pip'
36+
37+
- name: 'Set up Python dependencies'
38+
run: |
39+
set -e pipefail
40+
python -m pip install --upgrade pip virtualenv
41+
pip install pyarrow pybind11[global] numpy
42+
3043
- name: Setup MSVC toolset
3144
uses: TheMrMilchmann/setup-msvc-dev@v3
3245
if: ${{ startsWith(matrix.os, 'windows-') }}
@@ -60,7 +73,8 @@ jobs:
6073
-G Ninja \
6174
-DCMAKE_BUILD_TYPE=Debug \
6275
-DTILEDB_SERIALIZATION=ON \
63-
-DTILEDB_ASSERTIONS=ON
76+
-DTILEDB_ASSERTIONS=ON \
77+
-DTILEDB_ARROW_TESTS=ON
6478
# Build all unit tests
6579
cmake --build build --target tests -j4
6680

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,9 @@ if (TILEDB_TESTS)
521521
add_dependencies(tests tiledb_unit)
522522
add_dependencies(tests tiledb_regression)
523523
add_dependencies(tests test_assert)
524+
if(TILEDB_ARROW_TESTS)
525+
add_dependencies(tests unit_arrow)
526+
endif()
524527

525528
# C API support
526529
add_dependencies(tests unit_capi_handle unit_capi_exception_wrapper)

scripts/install-gcs-emu.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ die() {
3131
}
3232

3333
install_gcs(){
34-
git clone --branch v0.36.0 --depth 1 https://github.com/googleapis/storage-testbench.git /tmp/storage-testbench
34+
git clone --branch v0.45.0 --depth 1 https://github.com/googleapis/storage-testbench.git /tmp/storage-testbench
3535
# Create a virtual environment and keep it active
3636
python3 -m venv /tmp/storage-testbench-venv
3737
source /tmp/storage-testbench-venv/bin/activate

scripts/install-run-gcs-emu.ps1

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Param(
2323

2424
$ErrorActionPreference = "Stop"
2525

26-
$version = "v0.39.0"
26+
$version = "v0.45.0"
2727
$testbenchPath = "$env:TEMP\storage-testbench-$version"
2828
$venvPath = "$env:TEMP\storage-testbench-venv"
2929

test/CMakeLists.txt

Lines changed: 49 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -28,62 +28,6 @@
2828

2929
find_package(Catch2 REQUIRED)
3030

31-
# Arrow integration test config and dependencies
32-
# Override from environment if not set in cache
33-
if (NOT DEFINED $CACHE{TILEDB_ARROW_TESTS})
34-
if ($ENV{TILEDB_ARROW_TESTS})
35-
message(STATUS "Enabling Apache Arrow integration test")
36-
# Technically this would be an "option", but we want conditional override from ENV
37-
set(TILEDB_ARROW_TESTS ON CACHE BOOL "Enable Arrow tests (requires Python with pyarrow and numpy)" FORCE)
38-
mark_as_advanced(TILEDB_ARROW_TESTS)
39-
endif()
40-
endif()
41-
42-
if (${TILEDB_ARROW_TESTS})
43-
# Reworked FindPython was introducted in 3.12 with features used below
44-
if (CMAKE_VERSION VERSION_LESS "3.12")
45-
message(FATAL_ERROR "CMake >= 3.12 is required for TileDB Arrow Tests. (found ${CMAKE_VERSION})")
46-
endif()
47-
# Tell CMake to check the Python registry entry last on Windows
48-
set(Python_FIND_REGISTRY "LAST")
49-
# Tell CMake to prefer Python from the PATH
50-
set(Python_FIND_STRATEGY "LOCATION")
51-
find_package(Python COMPONENTS Interpreter Development REQUIRED)
52-
find_package(pybind11)
53-
54-
message(STATUS "Configuring Apache Arrow integration test with Python ${Python_VERSION} (${Python_EXECUTABLE})")
55-
56-
# If we can't find the pybind11 cmake config (not available in pypi yet)
57-
# try to find with the current executable.
58-
if (NOT ${pybind11_FOUND})
59-
# Get the include arguments from the python executable (has "-I" compiler option)
60-
execute_process(COMMAND ${Python_EXECUTABLE} -m pybind11 --includes
61-
OUTPUT_VARIABLE CMD_PYBIND11_INCLUDE
62-
RESULT_VARIABLE CMD_PYBIND11_RESULT
63-
OUTPUT_STRIP_TRAILING_WHITESPACE)
64-
if (${CMD_PYBIND11_RESULT})
65-
message(FATAL_ERROR "Unable to find pybind11 via cmake or 'python3 -m pybind11 --includes'")
66-
endif()
67-
68-
# Convert args to list
69-
separate_arguments(CMD_PARSED_INCLUDES NATIVE_COMMAND ${CMD_PYBIND11_INCLUDE})
70-
# Remove the "-I" from each include
71-
foreach(INCL_PATH IN LISTS CMD_PARSED_INCLUDES)
72-
string(REPLACE "-I" "" INCL_PATH ${INCL_PATH})
73-
list(APPEND PYBIND11_INCLUDE_DIRECTORIES ${INCL_PATH})
74-
endforeach()
75-
76-
file(TO_CMAKE_PATH "${Python_SITELIB}" SAFE_Python_SITELIB)
77-
set(pybind11_FOUND TRUE CACHE BOOL "pybind11 include path found")
78-
add_library(pybind11::embed INTERFACE IMPORTED)
79-
target_include_directories(pybind11::embed INTERFACE ${PYBIND11_INCLUDE_DIRECTORIES})
80-
target_link_libraries(pybind11::embed INTERFACE Python::Python)
81-
target_compile_definitions(pybind11::embed INTERFACE -DTILEDB_PYTHON_SITELIB_PATH="${SAFE_Python_SITELIB}")
82-
endif()
83-
file(TO_CMAKE_PATH ${CMAKE_CURRENT_BINARY_DIR} SAFE_CURRENT_BINARY_DIR)
84-
target_compile_definitions(pybind11::embed INTERFACE -DTILEDB_PYTHON_UNIT_PATH="${SAFE_CURRENT_BINARY_DIR}")
85-
endif()
86-
8731
# Include TileDB core header directories
8832
set(TILEDB_CORE_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..")
8933
# Include the C API directory so that the C++ 'tiledb' file can directly
@@ -259,17 +203,15 @@ if (TILEDB_SERIALIZATION)
259203
)
260204
endif()
261205

262-
if (TILEDB_ARROW_TESTS)
263-
list(APPEND TILEDB_UNIT_TEST_SOURCES
264-
src/unit-arrow.cc
265-
${CMAKE_SOURCE_DIR}/tiledb/sm/cpp_api/arrow_io_impl.h
266-
)
267-
endif()
268-
269206
if (TILEDB_VERBOSE)
270207
add_definitions(-DTILEDB_VERBOSE)
271208
endif()
272209

210+
# We want tests to continue as normal even as the API is changing,
211+
# so don't warn for deprecations, since they'll be escalated to errors.
212+
if (NOT MSVC)
213+
add_compile_options(-Wno-deprecated-declarations)
214+
endif()
273215

274216
# unit test executable
275217
add_executable(
@@ -279,16 +221,8 @@ add_executable(
279221
"src/unit.cc"
280222
)
281223

282-
add_dependencies(tiledb_unit tiledb_test_support_lib)
283-
284224
target_compile_options(tiledb_unit PRIVATE "$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")
285225

286-
# We want tests to continue as normal even as the API is changing,
287-
# so don't warn for deprecations, since they'll be escalated to errors.
288-
if (NOT MSVC)
289-
target_compile_options(tiledb_unit PRIVATE -Wno-deprecated-declarations)
290-
endif()
291-
292226
target_include_directories(
293227
tiledb_unit BEFORE PRIVATE
294228
${TILEDB_CORE_INCLUDE_DIR}
@@ -329,13 +263,6 @@ if (TILEDB_SERIALIZATION)
329263
target_compile_definitions(tiledb_unit PRIVATE -DTILEDB_SERIALIZATION)
330264
endif()
331265

332-
if (TILEDB_ARROW_TESTS)
333-
target_link_libraries(tiledb_unit PRIVATE pybind11::embed)
334-
335-
# install the python helper next to the executable for import
336-
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/unit_arrow.py" "${CMAKE_CURRENT_BINARY_DIR}" COPYONLY)
337-
endif()
338-
339266
if (TILEDB_WEBP)
340267
target_compile_definitions(tiledb_unit PRIVATE -DTILEDB_WEBP)
341268
find_package(ZLIB) # We need PNG to use our Zlib so that static link works correctly if applicable
@@ -363,6 +290,50 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Linux" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU
363290
)
364291
endif()
365292

293+
if (TILEDB_ARROW_TESTS)
294+
# Tell CMake to check the Python registry entry last on Windows
295+
set(Python_FIND_REGISTRY "LAST")
296+
# Tell CMake to prefer Python from the PATH
297+
set(Python_FIND_STRATEGY "LOCATION")
298+
find_package(Python COMPONENTS Interpreter Development REQUIRED)
299+
find_package(pybind11 REQUIRED)
300+
message(STATUS "Configuring Apache Arrow integration test with Python ${Python_VERSION} (${Python_EXECUTABLE})")
301+
302+
add_executable(
303+
unit_arrow EXCLUDE_FROM_ALL
304+
$<TARGET_OBJECTS:TILEDB_CORE_OBJECTS>
305+
src/unit-arrow.cc
306+
${CMAKE_SOURCE_DIR}/tiledb/sm/cpp_api/arrow_io_impl.h
307+
)
308+
309+
target_link_libraries(unit_arrow
310+
PUBLIC
311+
TILEDB_CORE_OBJECTS_ILIB
312+
TILEDB_CORE_OBJECTS
313+
Catch2::Catch2WithMain
314+
pybind11::embed
315+
tiledb_test_support_lib
316+
configuration_definitions
317+
)
318+
319+
file(TO_CMAKE_PATH ${CMAKE_CURRENT_BINARY_DIR} SAFE_CURRENT_BINARY_DIR)
320+
target_compile_definitions(unit_arrow PRIVATE -DTILEDB_PYTHON_UNIT_PATH="${SAFE_CURRENT_BINARY_DIR}")
321+
# install the python helper next to the executable for import
322+
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/unit_arrow.py" "${CMAKE_CURRENT_BINARY_DIR}" COPYONLY)
323+
324+
target_include_directories(
325+
unit_arrow BEFORE PRIVATE
326+
${TILEDB_CORE_INCLUDE_DIR}
327+
${TILEDB_EXPORT_HEADER_DIR}
328+
)
329+
330+
add_test(
331+
NAME "unit_arrow"
332+
COMMAND unit_arrow --durations=yes
333+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
334+
)
335+
endif()
336+
366337
# Only produce timing tests for UNIX based systems (faketime constraint)
367338
find_library(
368339
LIBFAKETIME

0 commit comments

Comments
 (0)