From 74d0c5a880091b1713a68bb39d301e34544146d3 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Mon, 11 Nov 2024 15:03:05 +1100 Subject: [PATCH] Add ffmpeg cuda support and download nltk tokenizer (#27) --- Dockerfile | 13 ++++- scripts/install_ffmpeg_cuda.sh | 80 +++++++++++++++++++++++++++++ scripts/install_punkt_tokenizers.sh | 9 +++- 3 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 scripts/install_ffmpeg_cuda.sh diff --git a/Dockerfile b/Dockerfile index 8af6484..d7d575c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,9 +31,17 @@ RUN pip3 install --no-cache-dir -r requirements.txt # Setup scripts and execute them COPY scripts scripts RUN bash scripts/install_redis.sh && \ - bash scripts/install_ffmpeg.sh && \ bash scripts/install_punkt_tokenizers.sh +# Install ffmpeg based on the architecture +RUN if [ "${TARGETPLATFORM}" = "linux/arm64" ]; then \ + bash /scripts/install_ffmpeg.sh; \ + elif [ "${TARGETPLATFORM}" = "linux/amd64" ]; then \ + bash /scripts/install_ffmpeg_cuda.sh; \ + else \ + echo "Unsupported platform: ${TARGETARCH}" && exit 1; \ + fi + # Install Vespa and pin the version. All versions can be found using `dns list vespa` # This is installed as a separate docker layer since we need to upgrade vespa regularly RUN dnf config-manager --add-repo https://raw.githubusercontent.com/vespa-engine/vespa/master/dist/vespa-engine.repo && \ @@ -48,3 +56,6 @@ ENV VESPA_LOG_STDOUT="true" ENV VESPA_LOG_FORMAT="vespa" ENV VESPA_CLI_HOME=/tmp/.vespa ENV VESPA_CLI_CACHE_DIR=/tmp/.cache/vespa +ENV NVIDIA_DRIVER_CAPABILITIES=utility,compute,video +# expose nltk data to all users +ENV NLTK_DATA=/root/nltk_data \ No newline at end of file diff --git a/scripts/install_ffmpeg_cuda.sh b/scripts/install_ffmpeg_cuda.sh new file mode 100644 index 0000000..6091377 --- /dev/null +++ b/scripts/install_ffmpeg_cuda.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +set -euo pipefail +set -x + +# Step 1: Install CUDA Toolkit +# Add the NVIDIA repository for CUDA and install CUDA toolkit version 12.6 +dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo +dnf clean all +dnf -y install cuda-toolkit-12-6 +# Set CUDA environment variables for PATH and LD_LIBRARY_PATH +export PATH=/usr/local/cuda/bin:${PATH} +export LD_LIBRARY_PATH=/usr/local/cuda/lib64 +# Verify nvcc installation (CUDA compiler) +nvcc --version +# Step 2: Install dependencies required for FFmpeg +# Install libraries and tools required for FFmpeg compilation +dnf install -y libtool \ + glibc \ + glibc-devel \ + numactl \ + numactl-devel \ + openssl \ + yasm \ + pkg-config \ + openssl-devel \ + git \ + gcc \ + make \ + gcc-c++ \ + kernel-headers \ + automake +# Step 3: Install x264 +# Add the RPM Fusion repository and install x264 and its development files +dnf install -y https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm +dnf install -y x264 x264-devel +# Step 4: Install NVENC codec headers +# Install the necessary development tools and clone the nv-codec-headers repository +# Clone the nv-codec-headers repository and install it +git clone https://git.videolan.org/git/ffmpeg/nv-codec-headers.git +cd nv-codec-headers +git checkout 9934f17316b66ce6de12f3b82203a298bc9351d8 # Fix the version +make +make install +cd .. +# Set PKG_CONFIG_PATH for pkg-config to find the newly installed nv-codec-headers +export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH +# Add /usr/local/lib to the library search path and update the dynamic linker cache +echo "/usr/local/lib" | tee -a /etc/ld.so.conf +ldconfig +# Step 5: Install FFmpeg +# Clone the FFmpeg repository +git clone https://git.ffmpeg.org/ffmpeg.git +# Configure and compile FFmpeg with necessary flags for NVIDIA, x264, and other libraries +cd ffmpeg +git checkout faa366003b58ba26484070ca408be4b9d5473a73 # Fix the version +./configure --enable-nonfree \ + --enable-cuda-nvcc \ + --enable-libnpp \ + --enable-libx264 \ + --enable-openssl \ + --enable-nvenc \ + --enable-gpl \ + --extra-cflags=-I/usr/local/cuda/include \ + --extra-ldflags=-L/usr/local/cuda/lib64 \ + --disable-static \ + --enable-shared +# Compile and install FFmpeg +make -j $(nproc) +make install +# Do some cleanup +rm -rf /nv-codec-headers +rm -rf /ffmpeg +dnf remove -y \ + make \ + git \ + automake +dnf clean all +set +x + diff --git a/scripts/install_punkt_tokenizers.sh b/scripts/install_punkt_tokenizers.sh index 684a8a6..0970e6e 100644 --- a/scripts/install_punkt_tokenizers.sh +++ b/scripts/install_punkt_tokenizers.sh @@ -1,7 +1,14 @@ #!/bin/bash # This script is meant to be run at buildtime. +set -euo pipefail +set -x mkdir -p /root/nltk_data/tokenizers curl https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip -o /root/nltk_data/tokenizers/punkt.zip unzip /root/nltk_data/tokenizers/punkt.zip -d /root/nltk_data/tokenizers/ -rm /root/nltk_data/tokenizers/punkt.zip \ No newline at end of file +rm /root/nltk_data/tokenizers/punkt.zip + +curl https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip -o /root/nltk_data/tokenizers/punkt_tab.zip +unzip /root/nltk_data/tokenizers/punkt_tab.zip -d /root/nltk_data/tokenizers/ +rm /root/nltk_data/tokenizers/punkt_tab.zip +set +x \ No newline at end of file