From a03a8fb64f2d46763acd02ef779aafc2186e198a Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 31 Jan 2025 20:46:45 +0800 Subject: [PATCH 1/3] chore: bump pytorch to 2.6.0 --- .devcontainer/download_libtorch.sh | 2 +- .github/workflows/test_cuda.yml | 4 ++-- backend/find_pytorch.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.devcontainer/download_libtorch.sh b/.devcontainer/download_libtorch.sh index d78b559997..ef68a2e615 100755 --- a/.devcontainer/download_libtorch.sh +++ b/.devcontainer/download_libtorch.sh @@ -4,5 +4,5 @@ set -ev SCRIPT_PATH=$(dirname $(realpath -s $0)) cd ${SCRIPT_PATH}/.. -wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.5.0%2Bcpu.zip -O ~/libtorch.zip +wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcpu.zip -O ~/libtorch.zip unzip ~/libtorch.zip diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index 324987f354..cd2761f393 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -47,7 +47,7 @@ jobs: && sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3 if: false # skip as we use nvidia image - run: python -m pip install -U uv - - run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.5.0" "jax[cuda12]" + - run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.6.0" "jax[cuda12]" - run: | export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])') export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') @@ -68,7 +68,7 @@ jobs: run: source/tests/infer/convert-models.sh - name: Download libtorch run: | - wget https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.5.0%2Bcu124.zip -O libtorch.zip + wget https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcu124.zip -O libtorch.zip unzip libtorch.zip - run: | export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch diff --git a/backend/find_pytorch.py b/backend/find_pytorch.py index 125fd6a389..a309e3b9e9 100644 --- a/backend/find_pytorch.py +++ b/backend/find_pytorch.py @@ -116,7 +116,7 @@ def get_pt_requirement(pt_version: str = "") -> dict: cuda_version = os.environ.get("CUDA_VERSION", "12.2") if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"): # CUDA 12.2, cudnn 9 - pt_version = "2.5.0" + pt_version = "2.6.0" elif cuda_version in SpecifierSet(">=11,<12"): # CUDA 11.8, cudnn 8 pt_version = "2.3.1" From 179303e55bc3a43b4c6ba89c2da5171db8c8c9f5 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 31 Jan 2025 21:04:25 +0800 Subject: [PATCH 2/3] import torch._dynamo before set device Signed-off-by: Jinzhe Zeng --- source/tests/pt/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/source/tests/pt/__init__.py b/source/tests/pt/__init__.py index 1a6de0591a..045a752b96 100644 --- a/source/tests/pt/__init__.py +++ b/source/tests/pt/__init__.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import torch +import torch._dynamo torch.set_num_threads(1) torch.set_num_interop_threads(1) From 8925d3ab22e18749b17e7281087f4568ad73ea86 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sun, 2 Feb 2025 23:47:46 +0800 Subject: [PATCH 3/3] always reinstall Xref: https://github.com/astral-sh/uv/issues/2152#issuecomment-1976703830 --- .github/workflows/test_cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index cd2761f393..31c204430e 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -52,7 +52,7 @@ jobs: export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])') export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') source/install/uv_with_retry.sh pip install --system --pre https://paddle-whl.bj.bcebos.com/nightly/cu123/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0.dev20241126-cp311-cp311-linux_x86_64.whl - source/install/uv_with_retry.sh pip install --system -v -e .[gpu,test,lmp,cu12,torch,jax] mpi4py + source/install/uv_with_retry.sh pip install --system -v -e .[gpu,test,lmp,cu12,torch,jax] mpi4py --reinstall-package deepmd-kit env: DP_VARIANT: cuda DP_ENABLE_NATIVE_OPTIMIZATION: 1