Skip to content

Commit

Permalink
conda: update to cudf 0.15, arrow 0.17.1, pandas 1.1 (#351)
Browse files Browse the repository at this point in the history
* Replace pandas.util.testing with new pandas.testing

* conda: update to cudf 0.15, arrow 0.17.1, pandas 1.1

* Import Context, IpcMemHandle directly from pyarrow

Also update move to arrow.ipc.read_schema (from arrow.read_schema).

* Replace implicit conversions to arrow with explicit ones

* Bump cuda runtime to 11.0

* Support testing Python 3.6 w/o GPU support.

* Add tests for Python 3.8

* Bump pyarrow to 0.17 in setup.py

* Switch container and test setup for pip

Co-authored-by: Joel Clay <[email protected]>
  • Loading branch information
andrewseidl and jclay authored Dec 2, 2020
1 parent 03f7f5e commit 02fad48
Show file tree
Hide file tree
Showing 10 changed files with 105 additions and 45 deletions.
77 changes: 70 additions & 7 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ def precommit_container_name = "pymapd-precommit-$BUILD_NUMBER"
def db_container_image = "omnisci/core-os-cuda-dev:master"
//def db_container_image = "omnisci/core-os-cuda"
def db_container_name = "pymapd-db-$BUILD_NUMBER"
def testscript_container_image = "rapidsai/rapidsai:0.8-cuda10.0-runtime-ubuntu18.04-gcc7-py3.6"
def testscript_container_image = "rapidsai/rapidsai:0.16-cuda11.0-base-ubuntu18.04-py3.7"
def testscript_container_name = "pymapd-pytest-$BUILD_NUMBER"
def stage_succeeded
def git_commit
Expand Down Expand Up @@ -113,6 +113,9 @@ pipeline {
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
script { stage_succeeded = false }
setBuildStatus("Running tests", "PENDING", "$STAGE_NAME", git_commit);
// GPU support via cudf requires Python 3.7 and above.
// here we test against a cpu-only environment using the
// CPU_ONLY flag.
sh """
docker run \
-d \
Expand Down Expand Up @@ -142,8 +145,8 @@ pipeline {
--name $testscript_container_name \
$testscript_container_image \
bash -c '\
PYTHON=3.6 ./ci/install-test-deps-conda.sh && \
source activate /conda/envs/omnisci-gpu-dev && \
PYTHON=3.6 CPU_ONLY=true ./ci/install-test-deps-conda.sh && \
source activate /conda/envs/omnisci-dev && \
pytest tests'
docker rm -f $testscript_container_name || true
Expand Down Expand Up @@ -228,7 +231,7 @@ pipeline {
}
}
}
stage('Pytest - pip python3.6') {
stage('Pytest - conda python3.8') {
steps {
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
script { stage_succeeded = false }
Expand Down Expand Up @@ -262,9 +265,69 @@ pipeline {
--name $testscript_container_name \
$testscript_container_image \
bash -c '\
. ~/.bashrc && \
conda install python=3.6 -y && \
./ci/install-test-deps-pip.sh && \
PYTHON=3.8 ./ci/install-test-deps-conda.sh && \
source activate /conda/envs/omnisci-gpu-dev && \
pytest tests'
docker rm -f $testscript_container_name || true
docker rm -f $db_container_name || true
"""
script { stage_succeeded = true }
}
}
post {
always {
script {
if (stage_succeeded == true) {
setBuildStatus("Build succeeded", "SUCCESS", "$STAGE_NAME", git_commit);
} else {
sh """
docker rm -f $testscript_container_name || true
docker rm -f $db_container_name || true
"""
setBuildStatus("Build failed", "FAILURE", "$STAGE_NAME", git_commit);
}
}
}
}
}
stage('Pytest - pip python3.6') {
steps {
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
script { stage_succeeded = false }
setBuildStatus("Running tests", "PENDING", "$STAGE_NAME", git_commit);
sh """
docker run \
-d \
--rm \
--runtime=nvidia \
--ipc="shareable" \
--network="pytest" \
-p 6273 \
--name $db_container_name \
$db_container_image \
bash -c "/omnisci/startomnisci \
--non-interactive \
--data /omnisci-storage/data \
--config /omnisci-storage/omnisci.conf \
--enable-runtime-udf \
--enable-table-functions \
"
sleep 3
docker run \
--rm \
--runtime=nvidia \
--ipc="container:${db_container_name}" \
--network="pytest" \
--entrypoint="" \
-v $WORKSPACE:/pymapd \
--workdir="/pymapd" \
--name $testscript_container_name \
condaforge/linux-anvil-comp7 \
bash -c '\
PYTHON=3.6 ./ci/install-test-deps-pip.sh && \
source /opt/conda/bin/activate /opt/conda/envs/omnisci-dev-pip && \
pytest tests'
docker rm -f $testscript_container_name || true
Expand Down
15 changes: 11 additions & 4 deletions ci/install-test-deps-conda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,23 @@ echo
echo "[conda build]"
conda install -q conda-build anaconda-client conda-verify --yes

if [ "$CPU_ONLY" = true ] ; then
ENV_FILE=./environment.yml
ENV_NAME=omnisci-dev
else
ENV_FILE=./environment_gpu.yml
ENV_NAME=omnisci-gpu-dev
fi

# create a copy of the environment file, replacing
# with the python version we specify.
sed -E "s/- python[^[:alpha:]]+$/- python=$PYTHON/" ./environment_gpu.yml > /tmp/environment_gpu_${PYTHON}.yml
sed -E "s/- python[^[:alpha:]]+$/- python=$PYTHON/" ${ENV_FILE} > /tmp/${ENV_NAME}_${PYTHON}.yml

conda env create -f /tmp/environment_gpu_${PYTHON}.yml
conda env create -f /tmp/${ENV_NAME}_${PYTHON}.yml

conda activate omnisci-gpu-dev
conda activate ${ENV_NAME}

pip install -e .
conda list omnisci-gpu-dev
conda list ${ENV_NAME}
echo
exit 0
10 changes: 8 additions & 2 deletions ci/install-test-deps-pip.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
#!/bin/bash

set -e
set -x
# To be run from root of repo

pip install -r ci/requirements.txt
# GCC needed to build thrift wheel
# git needed for setup.py scm version
/opt/conda/bin/conda create -n omnisci-dev-pip python=${PYTHON} git gcc_linux-64 gxx_linux-64 --yes

pip install -e .
source /opt/conda/bin/activate omnisci-dev-pip

pip install -e '.[test]'
16 changes: 0 additions & 16 deletions ci/requirements.txt

This file was deleted.

6 changes: 3 additions & 3 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ channels:
- defaults
dependencies:
- thrift=0.13.0
- pyarrow==0.15.0.*
- pandas>=0.25,<0.26
- python>3.6.6,<3.8
- pyarrow==0.17.1.*
- pandas==1.1.*
- python>3.6.6,<3.9
# related: https://github.com/conda-forge/pillow-feedstock/issues/73
- libtiff=4.0.10
- geopandas
Expand Down
10 changes: 5 additions & 5 deletions environment_gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ channels:
- rapidsai
dependencies:
- thrift=0.13.0
- cudf=0.14
- cudatoolkit=10.1
- pyarrow==0.15.0.*
- pandas>=0.25,<0.26
- python>3.6.6,<3.8
- cudf=0.15
- cudatoolkit=11.0
- pyarrow==0.17.1.*
- pandas==1.1.*
- python>3.6.6,<3.9
# related: https://github.com/conda-forge/pillow-feedstock/issues/73
- libtiff=4.0.10
- geopandas
Expand Down
6 changes: 3 additions & 3 deletions pymapd/_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def _parse_tdf_gpu(tdf):
import pyarrow as pa
from cudf.comm.gpuarrow import GpuArrowReader
from cudf.core.dataframe import DataFrame
from cudf._lib.arrow._cuda import Context, IpcMemHandle
from pyarrow._cuda import Context, IpcMemHandle
from numba import cuda

ipc_handle = IpcMemHandle.from_buffer(pa.py_buffer(tdf.df_handle))
Expand All @@ -189,7 +189,7 @@ def _parse_tdf_gpu(tdf):
schema_buffer, shm_ptr = load_buffer(tdf.sm_handle, tdf.sm_size)

buffer = pa.BufferReader(schema_buffer)
schema = pa.read_schema(buffer)
schema = pa.ipc.read_schema(buffer)

# Dictionary Memo functionality used to
# deserialize on the C++ side is not
Expand Down Expand Up @@ -230,7 +230,7 @@ def _parse_tdf_gpu(tdf):

for k, v in reader.to_dict().items():
if k in dict_memo:
df[k] = pa.DictionaryArray.from_arrays(v, dict_memo[k])
df[k] = pa.DictionaryArray.from_arrays(v.to_arrow(), dict_memo[k])
else:
df[k] = v

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
long_description = f.read()

install_requires = [
'pyarrow == 0.15.0',
'pyarrow >= 0.17,<0.18',
'thrift == 0.13.0',
'shapely',
'sqlalchemy >= 1.3',
Expand Down
6 changes: 3 additions & 3 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import numpy as np
import pyarrow as pa
from pandas.api.types import is_object_dtype, is_categorical_dtype
import pandas.util.testing as tm
import pandas.testing as tm
import shapely
from shapely.geometry import Point, LineString, Polygon, MultiPolygon
import textwrap
Expand Down Expand Up @@ -337,8 +337,8 @@ def test_select_text_ipc_gpu(self, con):
assert isinstance(result, DataFrame)

assert len(result) == 8
assert set(result['trans']) == set(["BUY"])
assert set(result['symbol']) == symbols
assert set(result['trans'].to_arrow()) == set(["BUY"])
assert set(result['symbol'].to_arrow()) == symbols
c.execute('drop table if exists stocks;')

@pytest.mark.skipif(no_gpu(), reason="No GPU available")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_runtime_udf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
import pandas as pd
import numpy as np
import pandas.util.testing as tm
import pandas.testing as tm

pytest.importorskip('rbc')

Expand Down

0 comments on commit 02fad48

Please sign in to comment.