Skip to content

Commit 74d0c5a

Browse files
authored
Add ffmpeg cuda support and download nltk tokenizer (#27)
1 parent 1bd5cf0 commit 74d0c5a

File tree

3 files changed

+100
-2
lines changed

3 files changed

+100
-2
lines changed

Dockerfile

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,17 @@ RUN pip3 install --no-cache-dir -r requirements.txt
3131
# Setup scripts and execute them
3232
COPY scripts scripts
3333
RUN bash scripts/install_redis.sh && \
34-
bash scripts/install_ffmpeg.sh && \
3534
bash scripts/install_punkt_tokenizers.sh
3635

36+
# Install ffmpeg based on the architecture
37+
RUN if [ "${TARGETPLATFORM}" = "linux/arm64" ]; then \
38+
bash /scripts/install_ffmpeg.sh; \
39+
elif [ "${TARGETPLATFORM}" = "linux/amd64" ]; then \
40+
bash /scripts/install_ffmpeg_cuda.sh; \
41+
else \
42+
echo "Unsupported platform: ${TARGETARCH}" && exit 1; \
43+
fi
44+
3745
# Install Vespa and pin the version. All versions can be found using `dns list vespa`
3846
# This is installed as a separate docker layer since we need to upgrade vespa regularly
3947
RUN dnf config-manager --add-repo https://raw.githubusercontent.com/vespa-engine/vespa/master/dist/vespa-engine.repo && \
@@ -48,3 +56,6 @@ ENV VESPA_LOG_STDOUT="true"
4856
ENV VESPA_LOG_FORMAT="vespa"
4957
ENV VESPA_CLI_HOME=/tmp/.vespa
5058
ENV VESPA_CLI_CACHE_DIR=/tmp/.cache/vespa
59+
ENV NVIDIA_DRIVER_CAPABILITIES=utility,compute,video
60+
# expose nltk data to all users
61+
ENV NLTK_DATA=/root/nltk_data

scripts/install_ffmpeg_cuda.sh

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#!/bin/bash
2+
3+
set -euo pipefail
4+
set -x
5+
6+
# Step 1: Install CUDA Toolkit
7+
# Add the NVIDIA repository for CUDA and install CUDA toolkit version 12.6
8+
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
9+
dnf clean all
10+
dnf -y install cuda-toolkit-12-6
11+
# Set CUDA environment variables for PATH and LD_LIBRARY_PATH
12+
export PATH=/usr/local/cuda/bin:${PATH}
13+
export LD_LIBRARY_PATH=/usr/local/cuda/lib64
14+
# Verify nvcc installation (CUDA compiler)
15+
nvcc --version
16+
# Step 2: Install dependencies required for FFmpeg
17+
# Install libraries and tools required for FFmpeg compilation
18+
dnf install -y libtool \
19+
glibc \
20+
glibc-devel \
21+
numactl \
22+
numactl-devel \
23+
openssl \
24+
yasm \
25+
pkg-config \
26+
openssl-devel \
27+
git \
28+
gcc \
29+
make \
30+
gcc-c++ \
31+
kernel-headers \
32+
automake
33+
# Step 3: Install x264
34+
# Add the RPM Fusion repository and install x264 and its development files
35+
dnf install -y https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm
36+
dnf install -y x264 x264-devel
37+
# Step 4: Install NVENC codec headers
38+
# Install the necessary development tools and clone the nv-codec-headers repository
39+
# Clone the nv-codec-headers repository and install it
40+
git clone https://git.videolan.org/git/ffmpeg/nv-codec-headers.git
41+
cd nv-codec-headers
42+
git checkout 9934f17316b66ce6de12f3b82203a298bc9351d8 # Fix the version
43+
make
44+
make install
45+
cd ..
46+
# Set PKG_CONFIG_PATH for pkg-config to find the newly installed nv-codec-headers
47+
export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH
48+
# Add /usr/local/lib to the library search path and update the dynamic linker cache
49+
echo "/usr/local/lib" | tee -a /etc/ld.so.conf
50+
ldconfig
51+
# Step 5: Install FFmpeg
52+
# Clone the FFmpeg repository
53+
git clone https://git.ffmpeg.org/ffmpeg.git
54+
# Configure and compile FFmpeg with necessary flags for NVIDIA, x264, and other libraries
55+
cd ffmpeg
56+
git checkout faa366003b58ba26484070ca408be4b9d5473a73 # Fix the version
57+
./configure --enable-nonfree \
58+
--enable-cuda-nvcc \
59+
--enable-libnpp \
60+
--enable-libx264 \
61+
--enable-openssl \
62+
--enable-nvenc \
63+
--enable-gpl \
64+
--extra-cflags=-I/usr/local/cuda/include \
65+
--extra-ldflags=-L/usr/local/cuda/lib64 \
66+
--disable-static \
67+
--enable-shared
68+
# Compile and install FFmpeg
69+
make -j $(nproc)
70+
make install
71+
# Do some cleanup
72+
rm -rf /nv-codec-headers
73+
rm -rf /ffmpeg
74+
dnf remove -y \
75+
make \
76+
git \
77+
automake
78+
dnf clean all
79+
set +x
80+

scripts/install_punkt_tokenizers.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
11
#!/bin/bash
22
# This script is meant to be run at buildtime.
3+
set -euo pipefail
4+
set -x
35

46
mkdir -p /root/nltk_data/tokenizers
57
curl https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip -o /root/nltk_data/tokenizers/punkt.zip
68
unzip /root/nltk_data/tokenizers/punkt.zip -d /root/nltk_data/tokenizers/
7-
rm /root/nltk_data/tokenizers/punkt.zip
9+
rm /root/nltk_data/tokenizers/punkt.zip
10+
11+
curl https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip -o /root/nltk_data/tokenizers/punkt_tab.zip
12+
unzip /root/nltk_data/tokenizers/punkt_tab.zip -d /root/nltk_data/tokenizers/
13+
rm /root/nltk_data/tokenizers/punkt_tab.zip
14+
set +x

0 commit comments

Comments
 (0)