From 1c32a83cb9375ab68145e8825150f0b17f172eba Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 17 Apr 2023 08:04:13 -0700 Subject: [PATCH 1/2] Pin independent cluster pandas to <2 --- .github/cluster.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/cluster.yml b/.github/cluster.yml index 694caceb5..e013070a0 100644 --- a/.github/cluster.yml +++ b/.github/cluster.yml @@ -10,7 +10,7 @@ services: environment: USE_MAMBA: "true" # p2p shuffling requires pyarrow>=7.0.0 - EXTRA_CONDA_PACKAGES: "pyarrow>=7.0.0" + EXTRA_CONDA_PACKAGES: "pyarrow>=7.0.0 pandas>=1.4.0,<2" dask-worker: container_name: dask-worker image: daskdev/dask:dev-py3.9 @@ -18,6 +18,6 @@ services: environment: USE_MAMBA: "true" # TODO: remove pandas constraint once Dask images are updated - EXTRA_CONDA_PACKAGES: "cloudpickle>=2.1.0 pyarrow>=6.0.1 libstdcxx-ng>=12.1.0 pandas>=1.5.0" + EXTRA_CONDA_PACKAGES: "cloudpickle>=2.1.0 pyarrow>=6.0.1 libstdcxx-ng>=12.1.0 pandas>=1.4.0,<2" volumes: - /tmp:/tmp From f218ea45cc1e617a0886fbaff9204391d612430a Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 17 Apr 2023 08:56:50 -0700 Subject: [PATCH 2/2] Refactor cluster config files to make environments a little more readable --- .github/cluster-upstream.yml | 23 ------------------ .github/workflows/test-upstream.yml | 4 ++-- .github/workflows/test.yml | 4 ++-- .../cluster/environment.yml | 7 ++++++ .../cluster/stable.yml | 11 ++++----- continuous_integration/cluster/upstream.yml | 24 +++++++++++++++++++ 6 files changed, 40 insertions(+), 33 deletions(-) delete mode 100644 .github/cluster-upstream.yml create mode 100644 continuous_integration/cluster/environment.yml rename .github/cluster.yml => continuous_integration/cluster/stable.yml (53%) create mode 100644 continuous_integration/cluster/upstream.yml diff --git a/.github/cluster-upstream.yml b/.github/cluster-upstream.yml deleted file mode 100644 index e23851616..000000000 --- a/.github/cluster-upstream.yml +++ /dev/null @@ -1,23 +0,0 @@ -# Docker-compose setup used during tests -version: '3' -services: - dask-scheduler: - container_name: dask-scheduler - image: daskdev/dask:dev-py3.9 - command: dask-scheduler - environment: - USE_MAMBA: "true" - # TODO: remove pandas constraint once Dask images are updated - EXTRA_CONDA_PACKAGES: "dask/label/dev::dask cloudpickle>=2.1.0 pandas>=1.5.0" - ports: - - "8786:8786" - dask-worker: - container_name: dask-worker - image: daskdev/dask:dev-py3.9 - command: dask-worker dask-scheduler:8786 - environment: - USE_MAMBA: "true" - # TODO: remove pandas constraint once Dask images are updated - EXTRA_CONDA_PACKAGES: "dask/label/dev::dask cloudpickle>=2.1.0 pyarrow>=6.0.1 libstdcxx-ng>=12.1.0 pandas>=1.5.0" - volumes: - - /tmp:/tmp diff --git a/.github/workflows/test-upstream.yml b/.github/workflows/test-upstream.yml index 9a482190c..4c0c7f5c3 100644 --- a/.github/workflows/test-upstream.yml +++ b/.github/workflows/test-upstream.yml @@ -129,9 +129,9 @@ jobs: - name: run a dask cluster run: | if [[ $which_upstream == "Dask" ]]; then - docker-compose -f .github/cluster-upstream.yml up -d + docker-compose -f continuous_integration/cluster/upstream.yml up -d else - docker-compose -f .github/cluster.yml up -d + docker-compose -f continuous_integration/cluster/stable.yml up -d fi # periodically ping logs until a connection has been established; assume failure after 2 minutes diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5a9599f56..17c81a973 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -125,9 +125,9 @@ jobs: UPSTREAM: ${{ needs.detect-ci-trigger.outputs.triggered }} run: | if [[ $UPSTREAM == "true" ]]; then - docker-compose -f .github/cluster-upstream.yml up -d + docker-compose -f continuous_integration/cluster/upstream.yml up -d else - docker-compose -f .github/cluster.yml up -d + docker-compose -f continuous_integration/cluster/stable.yml up -d fi # periodically ping logs until a connection has been established; assume failure after 2 minutes diff --git a/continuous_integration/cluster/environment.yml b/continuous_integration/cluster/environment.yml new file mode 100644 index 000000000..29d335f68 --- /dev/null +++ b/continuous_integration/cluster/environment.yml @@ -0,0 +1,7 @@ +name: base +channels: +- conda-forge +- nodefaults +dependencies: +# dask serialization needs core libraries to be consistent on client/cluster +- pandas>=1.4.0,<2 diff --git a/.github/cluster.yml b/continuous_integration/cluster/stable.yml similarity index 53% rename from .github/cluster.yml rename to continuous_integration/cluster/stable.yml index e013070a0..947adaae5 100644 --- a/.github/cluster.yml +++ b/continuous_integration/cluster/stable.yml @@ -4,20 +4,19 @@ services: dask-scheduler: container_name: dask-scheduler image: daskdev/dask:dev-py3.9 - command: dask-scheduler + command: dask scheduler ports: - "8786:8786" environment: USE_MAMBA: "true" - # p2p shuffling requires pyarrow>=7.0.0 - EXTRA_CONDA_PACKAGES: "pyarrow>=7.0.0 pandas>=1.4.0,<2" + volumes: + - ./environment.yml:/opt/app/environment.yml dask-worker: container_name: dask-worker image: daskdev/dask:dev-py3.9 - command: dask-worker dask-scheduler:8786 + command: dask worker dask-scheduler:8786 environment: USE_MAMBA: "true" - # TODO: remove pandas constraint once Dask images are updated - EXTRA_CONDA_PACKAGES: "cloudpickle>=2.1.0 pyarrow>=6.0.1 libstdcxx-ng>=12.1.0 pandas>=1.4.0,<2" volumes: + - ./environment.yml:/opt/app/environment.yml - /tmp:/tmp diff --git a/continuous_integration/cluster/upstream.yml b/continuous_integration/cluster/upstream.yml new file mode 100644 index 000000000..256995ac0 --- /dev/null +++ b/continuous_integration/cluster/upstream.yml @@ -0,0 +1,24 @@ +# Docker-compose setup used during tests +version: '3' +services: + dask-scheduler: + container_name: dask-scheduler + image: daskdev/dask:dev-py3.9 + command: dask scheduler + ports: + - "8786:8786" + environment: + USE_MAMBA: "true" + EXTRA_CONDA_PACKAGES: "dask/label/dev::dask" + volumes: + - ./environment.yml:/opt/app/environment.yml + dask-worker: + container_name: dask-worker + image: daskdev/dask:dev-py3.9 + command: dask worker dask-scheduler:8786 + environment: + USE_MAMBA: "true" + EXTRA_CONDA_PACKAGES: "dask/label/dev::dask" + volumes: + - ./environment.yml:/opt/app/environment.yml + - /tmp:/tmp