From 986ffbbd0e8f659ee5479ec841d9f9203a9a9bbb Mon Sep 17 00:00:00 2001 From: Fedir Zadniprovskyi Date: Sun, 26 Jan 2025 15:11:48 -0800 Subject: [PATCH 1/2] feat: add cuda 12.4.1 support (#276) --- .github/workflows/docker-build-and-push.yaml | 4 +++- Dockerfile | 5 ++++- compose.cuda.yaml | 3 ++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker-build-and-push.yaml b/.github/workflows/docker-build-and-push.yaml index 7d2e550b..06fcb77b 100644 --- a/.github/workflows/docker-build-and-push.yaml +++ b/.github/workflows/docker-build-and-push.yaml @@ -16,10 +16,12 @@ jobs: fail-fast: false matrix: # https://hub.docker.com/r/nvidia/cuda/tags - base-image: ['nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04', 'ubuntu:24.04'] + base-image: ['nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04', 'nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04', 'ubuntu:24.04'] include: - base-image: nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 tag-suffix: -cuda + - base-image: nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 + tag-suffix: -cuda-12.4.1 - base-image: ubuntu:24.04 tag-suffix: -cpu steps: diff --git a/Dockerfile b/Dockerfile index 3a7d7f53..1f61eea3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,18 +6,21 @@ LABEL org.opencontainers.image.licenses="MIT" # `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files # hadolint ignore=DL3008 RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends curl ffmpeg python3.12 && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends curl ffmpeg && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* # "ubuntu" is the default user on ubuntu images with UID=1000. This user is used for two reasons: # 1. It's generally a good practice to run containers as non-root users. See https://www.docker.com/blog/understanding-the-docker-user-instruction/ # 2. Docker Spaces on HuggingFace don't support running containers as root. See https://huggingface.co/docs/hub/en/spaces-sdks-docker#permissions +# NOTE: the following command was added since nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 doesn't have the `ubuntu` user +RUN useradd --create-home --shell /bin/bash --uid 1000 ubuntu || true USER ubuntu ENV HOME=/home/ubuntu \ PATH=/home/ubuntu/.local/bin:$PATH WORKDIR $HOME/speaches # https://docs.astral.sh/uv/guides/integration/docker/#installing-uv COPY --chown=ubuntu --from=ghcr.io/astral-sh/uv:0.5.18 /uv /bin/uv +# NOTE: per https://docs.astral.sh/uv/guides/install-python, `uv` will automatically install the necessary python version # https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers # https://docs.astral.sh/uv/guides/integration/docker/#compiling-bytecode # TODO: figure out if `/home/ubuntu/.cache/uv` should be used instead of `/root/.cache/uv` diff --git a/compose.cuda.yaml b/compose.cuda.yaml index 39b238de..0d3b8482 100644 --- a/compose.cuda.yaml +++ b/compose.cuda.yaml @@ -5,10 +5,11 @@ services: extends: file: compose.yaml service: speaches + # NOTE: slightly older cuda version is available under 'latest-cuda-12.4.1' tag image: ghcr.io/speaches-ai/speaches:latest-cuda build: args: - BASE_IMAGE: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04 + BASE_IMAGE: nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 environment: - WHISPER__MODEL=Systran/faster-whisper-large-v3 volumes: From cf40aed0162d65cdd64327fc302bc33183330d74 Mon Sep 17 00:00:00 2001 From: Fedir Zadniprovskyi Date: Sun, 26 Jan 2025 15:30:26 -0800 Subject: [PATCH 2/2] fix: onnxruntime-gpu not available on arm --- pyproject.toml | 2 ++ uv.lock | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 329dbfb6..a2101746 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,6 +147,8 @@ markers = [ [tool.uv] constraint-dependencies = ["llvmlite>=0.40.0"] +# TODO: create an issue on https://github.com/thewh1teagle/kokoro-onnx regarding this +override-dependencies = ["onnxruntime-gpu>=1.20.1 ; platform_machine == 'x86_64'"] [tool.uv.sources] piper-phonemize = { url = "https://github.com/fedirz/piper-phonemize/raw/refs/heads/master/dist/piper_phonemize-1.2.0-cp312-cp312-manylinux_2_28_x86_64.whl" } diff --git a/uv.lock b/uv.lock index e9102d14..ea5044fb 100644 --- a/uv.lock +++ b/uv.lock @@ -3,6 +3,7 @@ requires-python = "==3.12.*" [manifest] constraints = [{ name = "llvmlite", specifier = ">=0.40.0" }] +overrides = [{ name = "onnxruntime-gpu", marker = "platform_machine == 'x86_64'", specifier = ">=1.20.1" }] [[manifest.dependency-metadata]] name = "piper-tts" @@ -878,7 +879,7 @@ wheels = [ [package.optional-dependencies] gpu = [ - { name = "onnxruntime-gpu", marker = "sys_platform != 'darwin'" }, + { name = "onnxruntime-gpu", marker = "platform_machine == 'x86_64'" }, ] [[package]]