From 74d0c5a880091b1713a68bb39d301e34544146d3 Mon Sep 17 00:00:00 2001
From: Li Wan
Date: Mon, 11 Nov 2024 15:03:05 +1100
Subject: [PATCH] Add ffmpeg cuda support and download nltk tokenizer (#27)
---
Dockerfile | 13 ++++-
scripts/install_ffmpeg_cuda.sh | 80 +++++++++++++++++++++++++++++
scripts/install_punkt_tokenizers.sh | 9 +++-
3 files changed, 100 insertions(+), 2 deletions(-)
create mode 100644 scripts/install_ffmpeg_cuda.sh
diff --git a/Dockerfile b/Dockerfile
index 8af6484..d7d575c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -31,9 +31,17 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Setup scripts and execute them
COPY scripts scripts
RUN bash scripts/install_redis.sh && \
- bash scripts/install_ffmpeg.sh && \
bash scripts/install_punkt_tokenizers.sh
+# Install ffmpeg based on the architecture
+RUN if [ "${TARGETPLATFORM}" = "linux/arm64" ]; then \
+ bash /scripts/install_ffmpeg.sh; \
+ elif [ "${TARGETPLATFORM}" = "linux/amd64" ]; then \
+ bash /scripts/install_ffmpeg_cuda.sh; \
+ else \
+ echo "Unsupported platform: ${TARGETARCH}" && exit 1; \
+ fi
+
# Install Vespa and pin the version. All versions can be found using `dns list vespa`
# This is installed as a separate docker layer since we need to upgrade vespa regularly
RUN dnf config-manager --add-repo https://raw.githubusercontent.com/vespa-engine/vespa/master/dist/vespa-engine.repo && \
@@ -48,3 +56,6 @@ ENV VESPA_LOG_STDOUT="true"
ENV VESPA_LOG_FORMAT="vespa"
ENV VESPA_CLI_HOME=/tmp/.vespa
ENV VESPA_CLI_CACHE_DIR=/tmp/.cache/vespa
+ENV NVIDIA_DRIVER_CAPABILITIES=utility,compute,video
+# expose nltk data to all users
+ENV NLTK_DATA=/root/nltk_data
\ No newline at end of file
diff --git a/scripts/install_ffmpeg_cuda.sh b/scripts/install_ffmpeg_cuda.sh
new file mode 100644
index 0000000..6091377
--- /dev/null
+++ b/scripts/install_ffmpeg_cuda.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+set -euo pipefail
+set -x
+
+# Step 1: Install CUDA Toolkit
+# Add the NVIDIA repository for CUDA and install CUDA toolkit version 12.6
+dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
+dnf clean all
+dnf -y install cuda-toolkit-12-6
+# Set CUDA environment variables for PATH and LD_LIBRARY_PATH
+export PATH=/usr/local/cuda/bin:${PATH}
+export LD_LIBRARY_PATH=/usr/local/cuda/lib64
+# Verify nvcc installation (CUDA compiler)
+nvcc --version
+# Step 2: Install dependencies required for FFmpeg
+# Install libraries and tools required for FFmpeg compilation
+dnf install -y libtool \
+ glibc \
+ glibc-devel \
+ numactl \
+ numactl-devel \
+ openssl \
+ yasm \
+ pkg-config \
+ openssl-devel \
+ git \
+ gcc \
+ make \
+ gcc-c++ \
+ kernel-headers \
+ automake
+# Step 3: Install x264
+# Add the RPM Fusion repository and install x264 and its development files
+dnf install -y https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm
+dnf install -y x264 x264-devel
+# Step 4: Install NVENC codec headers
+# Install the necessary development tools and clone the nv-codec-headers repository
+# Clone the nv-codec-headers repository and install it
+git clone https://git.videolan.org/git/ffmpeg/nv-codec-headers.git
+cd nv-codec-headers
+git checkout 9934f17316b66ce6de12f3b82203a298bc9351d8 # Fix the version
+make
+make install
+cd ..
+# Set PKG_CONFIG_PATH for pkg-config to find the newly installed nv-codec-headers
+export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH
+# Add /usr/local/lib to the library search path and update the dynamic linker cache
+echo "/usr/local/lib" | tee -a /etc/ld.so.conf
+ldconfig
+# Step 5: Install FFmpeg
+# Clone the FFmpeg repository
+git clone https://git.ffmpeg.org/ffmpeg.git
+# Configure and compile FFmpeg with necessary flags for NVIDIA, x264, and other libraries
+cd ffmpeg
+git checkout faa366003b58ba26484070ca408be4b9d5473a73 # Fix the version
+./configure --enable-nonfree \
+ --enable-cuda-nvcc \
+ --enable-libnpp \
+ --enable-libx264 \
+ --enable-openssl \
+ --enable-nvenc \
+ --enable-gpl \
+ --extra-cflags=-I/usr/local/cuda/include \
+ --extra-ldflags=-L/usr/local/cuda/lib64 \
+ --disable-static \
+ --enable-shared
+# Compile and install FFmpeg
+make -j $(nproc)
+make install
+# Do some cleanup
+rm -rf /nv-codec-headers
+rm -rf /ffmpeg
+dnf remove -y \
+ make \
+ git \
+ automake
+dnf clean all
+set +x
+
diff --git a/scripts/install_punkt_tokenizers.sh b/scripts/install_punkt_tokenizers.sh
index 684a8a6..0970e6e 100644
--- a/scripts/install_punkt_tokenizers.sh
+++ b/scripts/install_punkt_tokenizers.sh
@@ -1,7 +1,14 @@
#!/bin/bash
# This script is meant to be run at buildtime.
+set -euo pipefail
+set -x
mkdir -p /root/nltk_data/tokenizers
curl https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip -o /root/nltk_data/tokenizers/punkt.zip
unzip /root/nltk_data/tokenizers/punkt.zip -d /root/nltk_data/tokenizers/
-rm /root/nltk_data/tokenizers/punkt.zip
\ No newline at end of file
+rm /root/nltk_data/tokenizers/punkt.zip
+
+curl https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip -o /root/nltk_data/tokenizers/punkt_tab.zip
+unzip /root/nltk_data/tokenizers/punkt_tab.zip -d /root/nltk_data/tokenizers/
+rm /root/nltk_data/tokenizers/punkt_tab.zip
+set +x
\ No newline at end of file