Skip to content

Commit 3e1f4b5

Browse files
authored
Added doc for nvdec (#335)
1 parent 91f1a6f commit 3e1f4b5

File tree

3 files changed

+270
-20
lines changed

3 files changed

+270
-20
lines changed

.github/workflows/docs.yaml

Lines changed: 86 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,45 +5,111 @@ on:
55
branches: [ main ]
66
pull_request:
77

8+
permissions:
9+
id-token: write
10+
contents: write
11+
812
defaults:
913
run:
1014
shell: bash -l -eo pipefail {0}
1115

1216
jobs:
17+
generate-matrix:
18+
uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
19+
with:
20+
package-type: wheel
21+
os: linux
22+
test-infra-repository: pytorch/test-infra
23+
test-infra-ref: main
24+
with-cpu: disable
25+
with-xpu: disable
26+
with-rocm: disable
27+
with-cuda: enable
28+
build-python-only: "disable"
1329
build:
14-
runs-on: ubuntu-latest
30+
needs: generate-matrix
31+
strategy:
32+
fail-fast: false
33+
name: Build and Upload wheel
34+
uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
35+
with:
36+
repository: pytorch/torchcodec
37+
ref: ""
38+
test-infra-repository: pytorch/test-infra
39+
test-infra-ref: main
40+
build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
41+
post-script: packaging/post_build_script.sh
42+
smoke-test-script: packaging/fake_smoke_test.py
43+
package-name: torchcodec
44+
trigger-event: ${{ github.event_name }}
45+
build-platform: "python-build-package"
46+
build-command: "BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1 ENABLE_CUDA=1 python -m build --wheel -vvv --no-isolation"
47+
48+
build-docs:
49+
runs-on: linux.4xlarge.nvidia.gpu
1550
strategy:
1651
fail-fast: false
52+
matrix:
53+
# 3.9 corresponds to the minimum python version for which we build
54+
# the wheel unless the label cliflow/binaries/all is present in the
55+
# PR.
56+
python-version: ['3.9']
57+
cuda-version: ['12.4']
58+
ffmpeg-version-for-tests: ['7']
59+
container:
60+
image: "pytorch/manylinux-builder:cuda${{ matrix.cuda-version }}"
61+
options: "--gpus all -e NVIDIA_DRIVER_CAPABILITIES=video,compute,utility"
62+
needs: build
1763
steps:
18-
- name: Check out repo
19-
uses: actions/checkout@v3
20-
- name: Setup conda env
21-
uses: conda-incubator/setup-miniconda@v2
64+
- name: Setup env vars
65+
run: |
66+
cuda_version_without_periods=$(echo "${{ matrix.cuda-version }}" | sed 's/\.//g')
67+
echo cuda_version_without_periods=${cuda_version_without_periods} >> $GITHUB_ENV
68+
- uses: actions/download-artifact@v3
2269
with:
23-
auto-update-conda: true
24-
miniconda-version: "latest"
25-
activate-environment: test
26-
python-version: '3.12'
70+
name: pytorch_torchcodec__3.9_cu${{ env.cuda_version_without_periods }}_x86_64
71+
path: pytorch/torchcodec/dist/
72+
- name: Setup miniconda using test-infra
73+
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
74+
with:
75+
python-version: ${{ matrix.python-version }}
76+
#
77+
# For some reason nvidia::libnpp=12.4 doesn't install but nvidia/label/cuda-12.4.0::libnpp does.
78+
# So we use the latter convention for libnpp.
79+
# We install conda packages at the start because otherwise conda may have conflicts with dependencies.
80+
default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }}"
81+
- name: Check env
82+
run: |
83+
${CONDA_RUN} env
84+
${CONDA_RUN} conda info
85+
${CONDA_RUN} nvidia-smi
86+
${CONDA_RUN} conda list
87+
- name: Assert ffmpeg exists
88+
run: |
89+
${CONDA_RUN} ffmpeg -buildconf
2790
- name: Update pip
28-
run: python -m pip install --upgrade pip
29-
- name: Install dependencies and FFmpeg
91+
run: ${CONDA_RUN} python -m pip install --upgrade pip
92+
- name: Install PyTorch
3093
run: |
31-
# TODO: torchvision and torchaudio shouldn't be needed. They were only added
32-
# to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
33-
python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
34-
conda install "ffmpeg=7.0.1" pkg-config -c conda-forge
35-
ffmpeg -version
36-
- name: Build and install torchcodec
94+
${CONDA_RUN} python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu${{ env.cuda_version_without_periods }}
95+
${CONDA_RUN} python -c 'import torch; print(f"{torch.__version__}"); print(f"{torch.__file__}"); print(f"{torch.cuda.is_available()=}")'
96+
- name: Install torchcodec from the wheel
3797
run: |
38-
python -m pip install -e ".[dev]" --no-build-isolation -vvv
98+
wheel_path=`find pytorch/torchcodec/dist -type f -name "*.whl"`
99+
echo Installing $wheel_path
100+
${CONDA_RUN} python -m pip install $wheel_path -vvv
101+
102+
- name: Check out repo
103+
uses: actions/checkout@v3
104+
39105
- name: Install doc dependencies
40106
run: |
41107
cd docs
42-
python -m pip install -r requirements.txt
108+
${CONDA_RUN} python -m pip install -r requirements.txt
43109
- name: Build docs
44110
run: |
45111
cd docs
46-
make html
112+
${CONDA_RUN} make html
47113
- uses: actions/upload-artifact@v3
48114
with:
49115
name: Built-Docs

docs/source/index.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@ We achieve these capabilities through:
5050

5151
How to sample video clips
5252

53+
.. grid-item-card:: :octicon:`file-code;1em`
54+
GPU decoding using TorchCodec
55+
:img-top: _static/img/card-background.svg
56+
:link: generated_examples/basic_cuda_example.html
57+
:link-type: url
58+
59+
A simple example demonstrating CUDA GPU decoding
60+
5361
.. toctree::
5462
:maxdepth: 1
5563
:caption: TorchCodec documentation

examples/basic_cuda_example.py

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
"""
7+
Accelerated video decoding on GPUs with CUDA and NVDEC
8+
================================================================
9+
10+
TorchCodec can use supported Nvidia hardware (see support matrix
11+
`here <https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new>`_) to speed-up
12+
video decoding. This is called "CUDA Decoding" and it uses Nvidia's
13+
`NVDEC hardware decoder <https://developer.nvidia.com/video-codec-sdk>`_
14+
and CUDA kernels to respectively decompress and convert to RGB.
15+
CUDA Decoding can be faster than CPU Decoding for the actual decoding step and also for
16+
subsequent transform steps like scaling, cropping or rotating. This is because the decode step leaves
17+
the decoded tensor in GPU memory so the GPU doesn't have to fetch from main memory before
18+
running the transform steps. Encoded packets are often much smaller than decoded frames so
19+
CUDA decoding also uses less PCI-e bandwidth.
20+
21+
CUDA Decoding can offer speed-up over CPU Decoding in a few scenarios:
22+
23+
#. You are decoding a large resolution video
24+
#. You are decoding a large batch of videos that's saturating the CPU
25+
#. You want to do whole-image transforms like scaling or convolutions on the decoded tensors
26+
after decoding
27+
#. Your CPU is saturated and you want to free it up for other work
28+
29+
30+
Here are situations where CUDA Decoding may not make sense:
31+
32+
#. You want bit-exact results compared to CPU Decoding
33+
#. You have small resolution videos and the PCI-e transfer latency is large
34+
#. Your GPU is already busy and CPU is not
35+
36+
It's best to experiment with CUDA Decoding to see if it improves your use-case. With
37+
TorchCodec you can simply pass in a device parameter to the
38+
:class:`~torchcodec.decoders.VideoDecoder` class to use CUDA Decoding.
39+
40+
41+
In order to use CUDA Decoding will need the following installed in your environment:
42+
43+
#. An Nvidia GPU that supports decoding the video format you want to decode. See
44+
the support matrix `here <https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new>`_
45+
#. `CUDA-enabled pytorch <https://pytorch.org/get-started/locally/>`_
46+
#. FFmpeg binaries that support
47+
`NVDEC-enabled <https://docs.nvidia.com/video-technologies/video-codec-sdk/12.0/ffmpeg-with-nvidia-gpu/index.html>`_
48+
codecs
49+
#. libnpp and nvrtc (these are usually installed when you install the full cuda-toolkit)
50+
51+
52+
FFmpeg versions 5, 6 and 7 from conda-forge are built with
53+
`NVDEC support <https://docs.nvidia.com/video-technologies/video-codec-sdk/12.0/ffmpeg-with-nvidia-gpu/index.html>`_
54+
and you can install them with conda. For example, to install FFmpeg version 7:
55+
56+
57+
.. code-block:: bash
58+
59+
conda install ffmpeg=7 -c conda-forge
60+
conda install libnpp cuda-nvrtc -c nvidia
61+
62+
63+
"""
64+
65+
# %%
66+
# Checking if Pytorch has CUDA enabled
67+
# -------------------------------------
68+
#
69+
# .. note::
70+
#
71+
# This tutorial requires FFmpeg libraries compiled with CUDA support.
72+
#
73+
#
74+
import torch
75+
76+
print(f"{torch.__version__=}")
77+
print(f"{torch.cuda.is_available()=}")
78+
print(f"{torch.cuda.get_device_properties(0)=}")
79+
80+
81+
# %%
82+
# Downloading the video
83+
# -------------------------------------
84+
#
85+
# We will use the following video which has the following properties:
86+
#
87+
# - Codec: H.264
88+
# - Resolution: 960x540
89+
# - FPS: 29.97
90+
# - Pixel format: YUV420P
91+
#
92+
# .. raw:: html
93+
#
94+
# <video style="max-width: 100%" controls>
95+
# <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4" type="video/mp4">
96+
# </video>
97+
import urllib.request
98+
99+
video_file = "video.mp4"
100+
urllib.request.urlretrieve(
101+
"https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4",
102+
video_file,
103+
)
104+
105+
106+
# %%
107+
# CUDA Decoding using VideoDecoder
108+
# -------------------------------------
109+
#
110+
# To use CUDA decoder, you need to pass in a cuda device to the decoder.
111+
#
112+
from torchcodec.decoders import VideoDecoder
113+
114+
decoder = VideoDecoder(video_file, device="cuda")
115+
frame = decoder[0]
116+
117+
# %%
118+
#
119+
# The video frames are decoded and returned as tensor of NCHW format.
120+
121+
print(frame.shape, frame.dtype)
122+
123+
# %%
124+
#
125+
# The video frames are left on the GPU memory.
126+
127+
print(frame.data.device)
128+
129+
130+
# %%
131+
# Visualizing Frames
132+
# -------------------------------------
133+
#
134+
# Let's look at the frames decoded by CUDA decoder and compare them
135+
# against equivalent results from the CPU decoders.
136+
timestamps = [12, 19, 45, 131, 180]
137+
cpu_decoder = VideoDecoder(video_file, device="cpu")
138+
cuda_decoder = VideoDecoder(video_file, device="cuda")
139+
cpu_frames = cpu_decoder.get_frames_played_at(timestamps).data
140+
cuda_frames = cuda_decoder.get_frames_played_at(timestamps).data
141+
142+
143+
def plot_cpu_and_cuda_frames(cpu_frames: torch.Tensor, cuda_frames: torch.Tensor):
144+
try:
145+
import matplotlib.pyplot as plt
146+
from torchvision.transforms.v2.functional import to_pil_image
147+
except ImportError:
148+
print("Cannot plot, please run `pip install torchvision matplotlib`")
149+
return
150+
n_rows = len(timestamps)
151+
fig, axes = plt.subplots(n_rows, 2, figsize=[12.8, 16.0])
152+
for i in range(n_rows):
153+
axes[i][0].imshow(to_pil_image(cpu_frames[i].to("cpu")))
154+
axes[i][1].imshow(to_pil_image(cuda_frames[i].to("cpu")))
155+
156+
axes[0][0].set_title("CPU decoder", fontsize=24)
157+
axes[0][1].set_title("CUDA decoder", fontsize=24)
158+
plt.setp(axes, xticks=[], yticks=[])
159+
plt.tight_layout()
160+
161+
162+
plot_cpu_and_cuda_frames(cpu_frames, cuda_frames)
163+
164+
# %%
165+
#
166+
# They look visually similar to the human eye but there may be subtle
167+
# differences because CUDA math is not bit-exact with respect to CPU math.
168+
#
169+
frames_equal = torch.equal(cpu_frames.to("cuda"), cuda_frames)
170+
mean_abs_diff = torch.mean(
171+
torch.abs(cpu_frames.float().to("cuda") - cuda_frames.float())
172+
)
173+
max_abs_diff = torch.max(torch.abs(cpu_frames.to("cuda").float() - cuda_frames.float()))
174+
print(f"{frames_equal=}")
175+
print(f"{mean_abs_diff=}")
176+
print(f"{max_abs_diff=}")

0 commit comments

Comments
 (0)