Skip to content

Commit

Permalink
Add ffmpeg cuda support and download nltk tokenizer (#27)
Browse files Browse the repository at this point in the history
  • Loading branch information
wanliAlex authored Nov 11, 2024
1 parent 1bd5cf0 commit 74d0c5a
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 2 deletions.
13 changes: 12 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,17 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Setup scripts and execute them
COPY scripts scripts
RUN bash scripts/install_redis.sh && \
bash scripts/install_ffmpeg.sh && \
bash scripts/install_punkt_tokenizers.sh

# Install ffmpeg based on the architecture
RUN if [ "${TARGETPLATFORM}" = "linux/arm64" ]; then \
bash /scripts/install_ffmpeg.sh; \
elif [ "${TARGETPLATFORM}" = "linux/amd64" ]; then \
bash /scripts/install_ffmpeg_cuda.sh; \
else \
echo "Unsupported platform: ${TARGETARCH}" && exit 1; \
fi

# Install Vespa and pin the version. All versions can be found using `dns list vespa`
# This is installed as a separate docker layer since we need to upgrade vespa regularly
RUN dnf config-manager --add-repo https://raw.githubusercontent.com/vespa-engine/vespa/master/dist/vespa-engine.repo && \
Expand All @@ -48,3 +56,6 @@ ENV VESPA_LOG_STDOUT="true"
ENV VESPA_LOG_FORMAT="vespa"
ENV VESPA_CLI_HOME=/tmp/.vespa
ENV VESPA_CLI_CACHE_DIR=/tmp/.cache/vespa
ENV NVIDIA_DRIVER_CAPABILITIES=utility,compute,video
# expose nltk data to all users
ENV NLTK_DATA=/root/nltk_data
80 changes: 80 additions & 0 deletions scripts/install_ffmpeg_cuda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/bin/bash

set -euo pipefail
set -x

# Step 1: Install CUDA Toolkit
# Add the NVIDIA repository for CUDA and install CUDA toolkit version 12.6
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
dnf clean all
dnf -y install cuda-toolkit-12-6
# Set CUDA environment variables for PATH and LD_LIBRARY_PATH
export PATH=/usr/local/cuda/bin:${PATH}
export LD_LIBRARY_PATH=/usr/local/cuda/lib64
# Verify nvcc installation (CUDA compiler)
nvcc --version
# Step 2: Install dependencies required for FFmpeg
# Install libraries and tools required for FFmpeg compilation
dnf install -y libtool \
glibc \
glibc-devel \
numactl \
numactl-devel \
openssl \
yasm \
pkg-config \
openssl-devel \
git \
gcc \
make \
gcc-c++ \
kernel-headers \
automake
# Step 3: Install x264
# Add the RPM Fusion repository and install x264 and its development files
dnf install -y https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm
dnf install -y x264 x264-devel
# Step 4: Install NVENC codec headers
# Install the necessary development tools and clone the nv-codec-headers repository
# Clone the nv-codec-headers repository and install it
git clone https://git.videolan.org/git/ffmpeg/nv-codec-headers.git
cd nv-codec-headers
git checkout 9934f17316b66ce6de12f3b82203a298bc9351d8 # Fix the version
make
make install
cd ..
# Set PKG_CONFIG_PATH for pkg-config to find the newly installed nv-codec-headers
export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH
# Add /usr/local/lib to the library search path and update the dynamic linker cache
echo "/usr/local/lib" | tee -a /etc/ld.so.conf
ldconfig
# Step 5: Install FFmpeg
# Clone the FFmpeg repository
git clone https://git.ffmpeg.org/ffmpeg.git
# Configure and compile FFmpeg with necessary flags for NVIDIA, x264, and other libraries
cd ffmpeg
git checkout faa366003b58ba26484070ca408be4b9d5473a73 # Fix the version
./configure --enable-nonfree \
--enable-cuda-nvcc \
--enable-libnpp \
--enable-libx264 \
--enable-openssl \
--enable-nvenc \
--enable-gpl \
--extra-cflags=-I/usr/local/cuda/include \
--extra-ldflags=-L/usr/local/cuda/lib64 \
--disable-static \
--enable-shared
# Compile and install FFmpeg
make -j $(nproc)
make install
# Do some cleanup
rm -rf /nv-codec-headers
rm -rf /ffmpeg
dnf remove -y \
make \
git \
automake
dnf clean all
set +x

9 changes: 8 additions & 1 deletion scripts/install_punkt_tokenizers.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
#!/bin/bash
# This script is meant to be run at buildtime.
set -euo pipefail
set -x

mkdir -p /root/nltk_data/tokenizers
curl https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip -o /root/nltk_data/tokenizers/punkt.zip
unzip /root/nltk_data/tokenizers/punkt.zip -d /root/nltk_data/tokenizers/
rm /root/nltk_data/tokenizers/punkt.zip
rm /root/nltk_data/tokenizers/punkt.zip

curl https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip -o /root/nltk_data/tokenizers/punkt_tab.zip
unzip /root/nltk_data/tokenizers/punkt_tab.zip -d /root/nltk_data/tokenizers/
rm /root/nltk_data/tokenizers/punkt_tab.zip
set +x

0 comments on commit 74d0c5a

Please sign in to comment.