From 3ff6292119209868eeeba590a661b6a4e92b2af3 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 12:14:53 -0800
Subject: [PATCH 01/51] Added doc for nvdec

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 545ddf9c..2407915d 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -11,7 +11,7 @@ defaults:
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: linux.g5.4xlarge.nvidia.gpu
     strategy:
       fail-fast: false
     steps:

From 1fd5a10d1ec32eae0a5f44d2c063760cb6bc65ef Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 12:18:16 -0800
Subject: [PATCH 02/51] .

---
 .github/workflows/docs.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 2407915d..701bc54a 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -30,8 +30,8 @@ jobs:
         run: |
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
-          python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
-          conda install "ffmpeg=7.0.1" pkg-config -c conda-forge
+          python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
+          conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec
         run: |

From fa3e3b962f381b4497831e760d66bed1ae42f721 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 12:20:47 -0800
Subject: [PATCH 03/51] .

---
 .github/workflows/cpp_tests.yaml        | 2 +-
 .github/workflows/linux_cuda_wheel.yaml | 2 +-
 .github/workflows/linux_wheel.yaml      | 2 +-
 .github/workflows/macos_wheel.yaml      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/cpp_tests.yaml b/.github/workflows/cpp_tests.yaml
index b2b19a78..453f5bc2 100644
--- a/.github/workflows/cpp_tests.yaml
+++ b/.github/workflows/cpp_tests.yaml
@@ -3,7 +3,7 @@ name: CPP tests
 on:
   push:
     branches: [ main ]
-  pull_request:
+  # pull_request:
 
 concurrency:
   group: unit-test${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
diff --git a/.github/workflows/linux_cuda_wheel.yaml b/.github/workflows/linux_cuda_wheel.yaml
index 915c5236..7bb57f76 100644
--- a/.github/workflows/linux_cuda_wheel.yaml
+++ b/.github/workflows/linux_cuda_wheel.yaml
@@ -1,7 +1,7 @@
 name: Build and test Linux CUDA wheels
 
 on:
-  pull_request:
+  #pull_request:
   push:
     branches:
       - nightly
diff --git a/.github/workflows/linux_wheel.yaml b/.github/workflows/linux_wheel.yaml
index 38f25733..5cc75c9a 100644
--- a/.github/workflows/linux_wheel.yaml
+++ b/.github/workflows/linux_wheel.yaml
@@ -1,7 +1,7 @@
 name: Build and test Linux wheel
 
 on:
-  pull_request:
+  #pull_request:
   push:
     branches:
       - nightly
diff --git a/.github/workflows/macos_wheel.yaml b/.github/workflows/macos_wheel.yaml
index ef637194..45ccdb4d 100644
--- a/.github/workflows/macos_wheel.yaml
+++ b/.github/workflows/macos_wheel.yaml
@@ -1,7 +1,7 @@
 name: Build and test MacOS wheel
 
 on:
-  pull_request:
+  #pull_request:
   push:
     branches:
       - nightly

From 36a54209d7b6223e9366dba1312cdb38e1bd2090 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 12:24:41 -0800
Subject: [PATCH 04/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 701bc54a..1e3ced08 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,7 +31,7 @@ jobs:
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
-          conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
+          conda install "ffmpeg=7.0.1" pkg-config cmake nvidia/label/cuda-12.4.0::cuda-toolkit -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec
         run: |

From f49baca49a5bea34781d81f2da178ccb89f4393f Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 12:25:30 -0800
Subject: [PATCH 05/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 1e3ced08..6424d508 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,7 +31,7 @@ jobs:
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
-          conda install "ffmpeg=7.0.1" pkg-config cmake nvidia/label/cuda-12.4.0::cuda-toolkit -c conda-forge
+          conda install "ffmpeg=7.0.1" pkg-config cmake nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-runtime -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec
         run: |

From f087a913830abc9eac762a7d6760fbdcdaf7a10a Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 12:26:15 -0800
Subject: [PATCH 06/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 6424d508..83f462e3 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,7 +31,7 @@ jobs:
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
-          conda install "ffmpeg=7.0.1" pkg-config cmake nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-runtime -c conda-forge
+          conda install "ffmpeg=7.0.1" pkg-config cmake "nvidia/label/cuda-12.4.0::cuda-toolkit" "nvidia/label/cuda-12.4.0::cuda-runtime" -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec
         run: |

From 5092418ebbdeefd17b086fa5d9480187b34f770f Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 13:59:14 -0800
Subject: [PATCH 07/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 83f462e3..65fb4e7b 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,7 +31,7 @@ jobs:
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
-          conda install "ffmpeg=7.0.1" pkg-config cmake "nvidia/label/cuda-12.4.0::cuda-toolkit" "nvidia/label/cuda-12.4.0::cuda-runtime" -c conda-forge
+          conda install "ffmpeg=7.0.1" pkg-config cmake cudatoolkit==12.4.0 -c nvidia -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec
         run: |

From 243e2ca09005f5d986019e7b746df256d41a796b Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 14:10:16 -0800
Subject: [PATCH 08/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 65fb4e7b..1e3ced08 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,7 +31,7 @@ jobs:
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
-          conda install "ffmpeg=7.0.1" pkg-config cmake cudatoolkit==12.4.0 -c nvidia -c conda-forge
+          conda install "ffmpeg=7.0.1" pkg-config cmake nvidia/label/cuda-12.4.0::cuda-toolkit -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec
         run: |

From 7c6c033a9d2d0d6b6b9dcd85f8518b88f4102537 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 14:15:37 -0800
Subject: [PATCH 09/51] .

---
 .github/workflows/docs.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 1e3ced08..2db1d19b 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,7 +31,8 @@ jobs:
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
-          conda install "ffmpeg=7.0.1" pkg-config cmake nvidia/label/cuda-12.4.0::cuda-toolkit -c conda-forge
+          conda install nvidia/label/cuda-12.4.0::cuda-toolkit
+          conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec
         run: |

From e40ec7a9e973bc5ed16f7f71171a5dafd408110f Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 14:28:57 -0800
Subject: [PATCH 10/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 2db1d19b..e84d36a0 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,7 +31,7 @@ jobs:
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
-          conda install nvidia/label/cuda-12.4.0::cuda-toolkit
+          conda install nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-runtime
           conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec

From bb4bff96d7ccc203d228193319db61681e8c31fd Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 14:35:33 -0800
Subject: [PATCH 11/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index e84d36a0..24d768eb 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,7 +31,7 @@ jobs:
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
-          conda install nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-runtime
+          conda install nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-runtime nvidia/label/cuda-12.4.0::cuda-cudart nvidia/label/cuda-12.4.0::cuda-nvrtc-dev
           conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec

From e8a5b07488871be8ae10bf908b5554e0585a7a2a Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 14:42:05 -0800
Subject: [PATCH 12/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 24d768eb..ed72440e 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,7 +31,7 @@ jobs:
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
-          conda install nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-runtime nvidia/label/cuda-12.4.0::cuda-cudart nvidia/label/cuda-12.4.0::cuda-nvrtc-dev
+          conda install nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-cudart nvidia/label/cuda-12.4.0::libnpp
           conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec

From c9d54a4a9cdc3b37756b5f100b47cb925958caef Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 14:48:47 -0800
Subject: [PATCH 13/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index ed72440e..57c3006d 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -30,9 +30,9 @@ jobs:
         run: |
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
-          python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
           conda install nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-cudart nvidia/label/cuda-12.4.0::libnpp
           conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
+          python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
           ffmpeg -version
       - name: Build and install torchcodec
         run: |

From fb633e439601a4d953d236550a8243a077a0c48b Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 16:31:17 -0800
Subject: [PATCH 14/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 57c3006d..f9e37858 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -30,7 +30,7 @@ jobs:
         run: |
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
-          conda install nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-cudart nvidia/label/cuda-12.4.0::libnpp
+          conda install nvidia/label/cuda-12.4.0::cuda nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-cudart nvidia/label/cuda-12.4.0::libnpp
           conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
           python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
           ffmpeg -version

From 9e334cd96783b24ae1ae1630a5ac63894a8ba8b2 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 16:37:24 -0800
Subject: [PATCH 15/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index f9e37858..db2e0717 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -30,7 +30,7 @@ jobs:
         run: |
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
-          conda install nvidia/label/cuda-12.4.0::cuda nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-cudart nvidia/label/cuda-12.4.0::libnpp
+          conda install nvidia/label/cuda-12.4.1::cuda nvidia/label/cuda-12.4.1::cuda-toolkit nvidia/label/cuda-12.4.1::cuda-cudart nvidia/label/cuda-12.4.1::libnpp
           conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
           python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
           ffmpeg -version

From c107e02c9c7b41b357df6e23b9639f1df98a4bf0 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 16:44:46 -0800
Subject: [PATCH 16/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index db2e0717..84d3de8f 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -23,7 +23,7 @@ jobs:
           auto-update-conda: true
           miniconda-version: "latest"
           activate-environment: test
-          python-version: '3.12'
+          python-version: '3.9'
       - name: Update pip
         run: python -m pip install --upgrade pip
       - name: Install dependencies and FFmpeg

From 885c43fc3f41a6aeb42b0c2fc8344697cc147d67 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 16:45:18 -0800
Subject: [PATCH 17/51] .

---
 .github/workflows/docs.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 84d3de8f..4556c419 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -36,6 +36,8 @@ jobs:
           ffmpeg -version
       - name: Build and install torchcodec
         run: |
+          find $CONDA_PREFIX/lib
+          find $CONDA_PREFIX/lib64
           python -m pip install -e ".[dev]" --no-build-isolation -vvv
       - name: Install doc dependencies
         run: |

From dd937c68b9ab1097f30b6138aab444b04b2dda31 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 16:45:37 -0800
Subject: [PATCH 18/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 4556c419..0d413025 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -38,7 +38,7 @@ jobs:
         run: |
           find $CONDA_PREFIX/lib
           find $CONDA_PREFIX/lib64
-          python -m pip install -e ".[dev]" --no-build-isolation -vvv
+          ENABLE_CUDA=1 python -m pip install -e ".[dev]" --no-build-isolation -vvv
       - name: Install doc dependencies
         run: |
           cd docs

From bab07dbc651013803b72bef7eefcb2aa11be062c Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 16:53:46 -0800
Subject: [PATCH 19/51] .

---
 .github/workflows/docs.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 0d413025..f1ef63c0 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -36,8 +36,8 @@ jobs:
           ffmpeg -version
       - name: Build and install torchcodec
         run: |
-          find $CONDA_PREFIX/lib
-          find $CONDA_PREFIX/lib64
+          find $CONDA_PREFIX/lib -type f -iname libnvtoolsext\*.so | xargs ldd || true
+          find $CONDA_PREFIX/lib64 -type f -iname libnvtoolsext\*.so | xargs ldd || true
           ENABLE_CUDA=1 python -m pip install -e ".[dev]" --no-build-isolation -vvv
       - name: Install doc dependencies
         run: |

From 60b06e1f0c9a2b484a7e11d97f8c901300a4899c Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 17:01:58 -0800
Subject: [PATCH 20/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index f1ef63c0..cdd7cd9d 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -30,7 +30,7 @@ jobs:
         run: |
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
-          conda install nvidia/label/cuda-12.4.1::cuda nvidia/label/cuda-12.4.1::cuda-toolkit nvidia/label/cuda-12.4.1::cuda-cudart nvidia/label/cuda-12.4.1::libnpp
+          conda install nvidia/label/cuda-12.4.1::cuda nvidia/label/cuda-12.4.1::cuda-toolkit nvidia/label/cuda-12.4.1::cuda-cudart conda install nvidia/label/cuda-12.4.1::cuda-nvtx nvidia/label/cuda-12.4.1::libnpp
           conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
           python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
           ffmpeg -version

From 904bfa33437cb5f934302937f18993f619dccb8f Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 17:28:17 -0800
Subject: [PATCH 21/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index cdd7cd9d..1aa556ff 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -30,7 +30,7 @@ jobs:
         run: |
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
-          conda install nvidia/label/cuda-12.4.1::cuda nvidia/label/cuda-12.4.1::cuda-toolkit nvidia/label/cuda-12.4.1::cuda-cudart conda install nvidia/label/cuda-12.4.1::cuda-nvtx nvidia/label/cuda-12.4.1::libnpp
+          conda install nvidia/label/cuda-12.4.1::cuda nvidia/label/cuda-12.4.1::cuda-toolkit nvidia/label/cuda-12.4.1::cuda-cudart nvidia/label/cuda-12.4.1::cuda-nvtx nvidia/label/cuda-12.4.1::libnpp
           conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
           python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
           ffmpeg -version

From 75e76ee559c7c762d777a71037c0cebdf86bd1e7 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 17:37:31 -0800
Subject: [PATCH 22/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 1aa556ff..87d16b80 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -32,7 +32,7 @@ jobs:
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           conda install nvidia/label/cuda-12.4.1::cuda nvidia/label/cuda-12.4.1::cuda-toolkit nvidia/label/cuda-12.4.1::cuda-cudart nvidia/label/cuda-12.4.1::cuda-nvtx nvidia/label/cuda-12.4.1::libnpp
           conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
-          python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124
+          conda install pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch-nightly -c nvidia
           ffmpeg -version
       - name: Build and install torchcodec
         run: |

From 16218ac5be71abff55007cad2ef4b54df0a42a95 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 17:46:12 -0800
Subject: [PATCH 23/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 87d16b80..b038352d 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,8 +31,8 @@ jobs:
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           conda install nvidia/label/cuda-12.4.1::cuda nvidia/label/cuda-12.4.1::cuda-toolkit nvidia/label/cuda-12.4.1::cuda-cudart nvidia/label/cuda-12.4.1::cuda-nvtx nvidia/label/cuda-12.4.1::libnpp
-          conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
           conda install pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch-nightly -c nvidia
+          conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec
         run: |

From e8f0128c56e82d9b77c10fec2c4641aacefe6128 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 17:55:21 -0800
Subject: [PATCH 24/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index b038352d..7017c121 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -30,8 +30,8 @@ jobs:
         run: |
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
-          conda install nvidia/label/cuda-12.4.1::cuda nvidia/label/cuda-12.4.1::cuda-toolkit nvidia/label/cuda-12.4.1::cuda-cudart nvidia/label/cuda-12.4.1::cuda-nvtx nvidia/label/cuda-12.4.1::libnpp
           conda install pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch-nightly -c nvidia
+          conda install nvidia/label/cuda-12.4.1::cuda nvidia/label/cuda-12.4.1::cuda-toolkit nvidia/label/cuda-12.4.1::cuda-cudart nvidia/label/cuda-12.4.1::cuda-nvtx nvidia/label/cuda-12.4.1::libnpp
           conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec

From 9c36f4ecf095aa9640194010cc156fb63a64d29d Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 18:37:57 -0800
Subject: [PATCH 25/51] .

---
 .github/workflows/docs.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 7017c121..ce20b436 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -38,6 +38,7 @@ jobs:
         run: |
           find $CONDA_PREFIX/lib -type f -iname libnvtoolsext\*.so | xargs ldd || true
           find $CONDA_PREFIX/lib64 -type f -iname libnvtoolsext\*.so | xargs ldd || true
+          find $CONDA_PREFIX -name cuda_cmake_macros.h
           ENABLE_CUDA=1 python -m pip install -e ".[dev]" --no-build-isolation -vvv
       - name: Install doc dependencies
         run: |

From 24064356227fb8f7909d6c78f288e8a03d8d82c3 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 18:46:39 -0800
Subject: [PATCH 26/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index ce20b436..74017863 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -32,7 +32,7 @@ jobs:
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           conda install pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch-nightly -c nvidia
           conda install nvidia/label/cuda-12.4.1::cuda nvidia/label/cuda-12.4.1::cuda-toolkit nvidia/label/cuda-12.4.1::cuda-cudart nvidia/label/cuda-12.4.1::cuda-nvtx nvidia/label/cuda-12.4.1::libnpp
-          conda install "ffmpeg=7.0.1" pkg-config cmake -c conda-forge
+          conda install "ffmpeg=7.0.1" pkg-config cmake compilers -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec
         run: |

From 7b78be320de9d769e799555f228fc2b374b8dc5e Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 18:49:37 -0800
Subject: [PATCH 27/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 74017863..6264b85c 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,7 +31,7 @@ jobs:
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           conda install pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch-nightly -c nvidia
-          conda install nvidia/label/cuda-12.4.1::cuda nvidia/label/cuda-12.4.1::cuda-toolkit nvidia/label/cuda-12.4.1::cuda-cudart nvidia/label/cuda-12.4.1::cuda-nvtx nvidia/label/cuda-12.4.1::libnpp
+          conda install nvidia/label/cuda-12.4.0::cuda nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-cudart nvidia/label/cuda-12.4.0::cuda-nvtx nvidia/label/cuda-12.4.0::libnpp
           conda install "ffmpeg=7.0.1" pkg-config cmake compilers -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec

From 20c6fba6e96cce7e6e79d8d7b193e7f21afda32e Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 19:04:47 -0800
Subject: [PATCH 28/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 6264b85c..10c599d8 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,7 +31,7 @@ jobs:
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
           conda install pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch-nightly -c nvidia
-          conda install nvidia/label/cuda-12.4.0::cuda nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-cudart nvidia/label/cuda-12.4.0::cuda-nvtx nvidia/label/cuda-12.4.0::libnpp
+          # conda install nvidia/label/cuda-12.4.0::cuda nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-cudart nvidia/label/cuda-12.4.0::cuda-nvtx nvidia/label/cuda-12.4.0::libnpp
           conda install "ffmpeg=7.0.1" pkg-config cmake compilers -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec

From 7630fddf07a8f7b64b7efd548094fccce6296019 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 19:13:15 -0800
Subject: [PATCH 29/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 10c599d8..32ac23cf 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -30,7 +30,7 @@ jobs:
         run: |
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
-          conda install pytorch torchvision torchaudio pytorch-cuda=12.4 -c pytorch-nightly -c nvidia
+          conda install pytorch torchvision torchaudio pytorch-cuda=12.4 cudatoolkit=12.4 -c pytorch-nightly -c nvidia
           # conda install nvidia/label/cuda-12.4.0::cuda nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-cudart nvidia/label/cuda-12.4.0::cuda-nvtx nvidia/label/cuda-12.4.0::libnpp
           conda install "ffmpeg=7.0.1" pkg-config cmake compilers -c conda-forge
           ffmpeg -version

From 37bfa5c3935cc512bcd42bcb389db4592380eb2b Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Tue, 5 Nov 2024 19:17:00 -0800
Subject: [PATCH 30/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 32ac23cf..dd384025 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -30,7 +30,7 @@ jobs:
         run: |
           # TODO: torchvision and torchaudio shouldn't be needed. They were only added
           #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
-          conda install pytorch torchvision torchaudio pytorch-cuda=12.4 cudatoolkit=12.4 -c pytorch-nightly -c nvidia
+          conda install pytorch torchvision torchaudio pytorch-cuda=12.4 cuda-toolkit=12.4 -c pytorch-nightly -c nvidia
           # conda install nvidia/label/cuda-12.4.0::cuda nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-cudart nvidia/label/cuda-12.4.0::cuda-nvtx nvidia/label/cuda-12.4.0::libnpp
           conda install "ffmpeg=7.0.1" pkg-config cmake compilers -c conda-forge
           ffmpeg -version

From 24f28432c589060e4c489d890f7e7fa06e2ef562 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Wed, 6 Nov 2024 12:58:33 -0800
Subject: [PATCH 31/51] .

---
 .github/workflows/docs.yaml |  11 +-
 packaging/cuda12.4.yaml     | 289 ++++++++++++++++++++++++++++++++++++
 2 files changed, 291 insertions(+), 9 deletions(-)
 create mode 100644 packaging/cuda12.4.yaml

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index dd384025..b4eadcac 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -23,22 +23,15 @@ jobs:
           auto-update-conda: true
           miniconda-version: "latest"
           activate-environment: test
-          python-version: '3.9'
+          python-version: '3.12'
       - name: Update pip
         run: python -m pip install --upgrade pip
       - name: Install dependencies and FFmpeg
         run: |
-          # TODO: torchvision and torchaudio shouldn't be needed. They were only added
-          #  to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
-          conda install pytorch torchvision torchaudio pytorch-cuda=12.4 cuda-toolkit=12.4 -c pytorch-nightly -c nvidia
-          # conda install nvidia/label/cuda-12.4.0::cuda nvidia/label/cuda-12.4.0::cuda-toolkit nvidia/label/cuda-12.4.0::cuda-cudart nvidia/label/cuda-12.4.0::cuda-nvtx nvidia/label/cuda-12.4.0::libnpp
-          conda install "ffmpeg=7.0.1" pkg-config cmake compilers -c conda-forge
+          conda env update --file packaging/cuda12.4.yaml
           ffmpeg -version
       - name: Build and install torchcodec
         run: |
-          find $CONDA_PREFIX/lib -type f -iname libnvtoolsext\*.so | xargs ldd || true
-          find $CONDA_PREFIX/lib64 -type f -iname libnvtoolsext\*.so | xargs ldd || true
-          find $CONDA_PREFIX -name cuda_cmake_macros.h
           ENABLE_CUDA=1 python -m pip install -e ".[dev]" --no-build-isolation -vvv
       - name: Install doc dependencies
         run: |
diff --git a/packaging/cuda12.4.yaml b/packaging/cuda12.4.yaml
new file mode 100644
index 00000000..7666f0cb
--- /dev/null
+++ b/packaging/cuda12.4.yaml
@@ -0,0 +1,289 @@
+name: cuda4
+channels:
+  - pytorch-nightly
+  - nvidia
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=2_kmp_llvm
+  - aom=3.9.1=hac33072_0
+  - archspec=0.2.3=pyhd3eb1b0_0
+  - blas=1.0=mkl
+  - boltons=23.0.0=py312h06a4308_0
+  - brotli-python=1.0.9=py312h6a678d5_8
+  - bzip2=1.0.8=h5eee18b_6
+  - c-ares=1.34.2=heb4867d_0
+  - ca-certificates=2024.9.24=h06a4308_0
+  - cairo=1.18.0=hebfffa5_3
+  - certifi=2024.8.30=py312h06a4308_0
+  - cffi=1.17.1=py312h1fdaa30_0
+  - charset-normalizer=3.3.2=pyhd3eb1b0_0
+  - cmake=3.30.5=hf9cb763_0
+  - conda=24.9.2=py312h7900ff3_0
+  - conda-libmamba-solver=24.9.0=pyhd3eb1b0_0
+  - conda-package-handling=2.3.0=py312h06a4308_0
+  - conda-package-streaming=0.10.0=py312h06a4308_0
+  - cuda=12.4.0=0
+  - cuda-cccl=12.4.127=0
+  - cuda-command-line-tools=12.4.1=0
+  - cuda-compiler=12.6.2=0
+  - cuda-cudart=12.4.127=0
+  - cuda-cudart-dev=12.4.127=0
+  - cuda-cudart-static=12.4.127=0
+  - cuda-cuobjdump=12.4.127=0
+  - cuda-cupti=12.4.127=0
+  - cuda-cupti-static=12.4.127=0
+  - cuda-cuxxfilt=12.4.127=0
+  - cuda-demo-suite=12.4.127=0
+  - cuda-documentation=12.4.127=0
+  - cuda-driver-dev=12.4.127=0
+  - cuda-gdb=12.4.127=0
+  - cuda-libraries=12.4.1=0
+  - cuda-libraries-dev=12.6.0=0
+  - cuda-libraries-static=12.4.1=0
+  - cuda-nsight=12.4.127=0
+  - cuda-nvcc=12.4.131=0
+  - cuda-nvdisasm=12.4.127=0
+  - cuda-nvml-dev=12.4.127=0
+  - cuda-nvprof=12.4.127=0
+  - cuda-nvprune=12.4.127=0
+  - cuda-nvrtc=12.4.127=0
+  - cuda-nvrtc-dev=12.4.127=0
+  - cuda-nvrtc-static=12.4.127=0
+  - cuda-nvtx=12.4.127=0
+  - cuda-nvvp=12.4.127=0
+  - cuda-opencl=12.4.127=0
+  - cuda-opencl-dev=12.4.127=0
+  - cuda-profiler-api=12.4.127=0
+  - cuda-runtime=12.4.0=0
+  - cuda-sanitizer-api=12.4.127=0
+  - cuda-toolkit=12.4.0=0
+  - cuda-tools=12.4.1=0
+  - cuda-version=11.8=h70ddcb2_3
+  - cuda-visual-tools=12.6.0=0
+  - cudatoolkit=11.8.0=h4ba93d1_13
+  - cudnn=9.3.0.75=hc149ed2_0
+  - dav1d=1.2.1=hd590300_0
+  - distro=1.9.0=py312h06a4308_0
+  - expat=2.6.3=h6a678d5_0
+  - ffmpeg=7.1.0=gpl_h2e64a5a_503
+  - filelock=3.13.1=py312h06a4308_0
+  - fmt=9.1.0=hdb19cb5_1
+  - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
+  - font-ttf-inconsolata=3.000=h77eed37_0
+  - font-ttf-source-code-pro=2.038=h77eed37_0
+  - font-ttf-ubuntu=0.83=h77eed37_3
+  - fontconfig=2.15.0=h7e30c49_1
+  - fonts-conda-ecosystem=1=0
+  - fonts-conda-forge=1=0
+  - freetype=2.12.1=h267a509_2
+  - fribidi=1.0.10=h36c2ea0_0
+  - frozendict=2.4.2=py312h06a4308_0
+  - fsspec=2024.10.0=pyhff2d567_0
+  - gdk-pixbuf=2.42.12=hb9ae30d_0
+  - gds-tools=1.9.1.3=0
+  - giflib=5.2.2=h5eee18b_0
+  - gmp=6.3.0=hac33072_2
+  - gnutls=3.6.15=he1e5248_0
+  - graphite2=1.3.14=h295c915_1
+  - harfbuzz=9.0.0=hda332d3_1
+  - icu=75.1=he02047a_0
+  - idna=3.7=py312h06a4308_0
+  - intel-openmp=2022.0.1=h06a4308_3633
+  - jinja2=3.1.4=py312h06a4308_1
+  - jsonpatch=1.33=py312h06a4308_1
+  - jsonpointer=2.1=pyhd3eb1b0_0
+  - kaldi=5.5.1112=cpu_hd7b63f8_5
+  - kernel-headers_linux-64=3.10.0=he073ed8_18
+  - keyutils=1.6.1=h166bdaf_0
+  - krb5=1.21.3=h659f571_0
+  - lame=3.100=h7b6447c_0
+  - lcms2=2.16=hb7c19ff_0
+  - ld_impl_linux-64=2.40=h12ee557_0
+  - lerc=4.0.0=h27087fc_0
+  - libabseil=20240722.0=cxx17_h5888daf_1
+  - libarchive=3.7.4=hfca40fe_0
+  - libass=0.17.3=h1dc1e6a_0
+  - libblas=3.9.0=20_linux64_mkl
+  - libcblas=3.9.0=20_linux64_mkl
+  - libcublas=12.4.5.8=0
+  - libcublas-dev=12.4.5.8=0
+  - libcublas-static=12.4.5.8=0
+  - libcufft=11.2.1.3=0
+  - libcufft-dev=11.2.1.3=0
+  - libcufft-static=11.2.1.3=0
+  - libcufile=1.9.1.3=0
+  - libcufile-dev=1.9.1.3=0
+  - libcufile-static=1.9.1.3=0
+  - libcurand=10.3.5.147=0
+  - libcurand-dev=10.3.5.147=0
+  - libcurand-static=10.3.5.147=0
+  - libcurl=8.11.0=hbbe4b11_0
+  - libcusolver=11.6.1.9=0
+  - libcusolver-dev=11.6.1.9=0
+  - libcusolver-static=11.6.1.9=0
+  - libcusparse=12.3.1.170=0
+  - libcusparse-dev=12.3.1.170=0
+  - libcusparse-static=12.3.1.170=0
+  - libdeflate=1.22=hb9d3cd8_0
+  - libdrm=2.4.123=hb9d3cd8_0
+  - libedit=3.1.20230828=h5eee18b_0
+  - libegl=1.7.0=ha4b6fd6_1
+  - libev=4.33=h7f8727e_1
+  - libexpat=2.6.3=h5888daf_0
+  - libffi=3.4.4=h6a678d5_1
+  - libgcc=14.2.0=h77fa898_1
+  - libgcc-ng=14.2.0=h69a702a_1
+  - libgfortran=14.2.0=h69a702a_1
+  - libgfortran5=14.2.0=hd5240d6_1
+  - libgl=1.7.0=ha4b6fd6_1
+  - libglib=2.82.2=h2ff4ddf_0
+  - libglvnd=1.7.0=ha4b6fd6_1
+  - libglx=1.7.0=ha4b6fd6_1
+  - libgomp=14.2.0=h77fa898_1
+  - libhwloc=2.11.1=default_hecaa2ac_1000
+  - libiconv=1.17=hd590300_2
+  - libidn2=2.3.4=h5eee18b_0
+  - libjpeg-turbo=3.0.3=h5eee18b_0
+  - liblapack=3.9.0=20_linux64_mkl
+  - liblapacke=3.9.0=20_linux64_mkl
+  - libmagma=2.8.0=hfdb99dd_0
+  - libmagma_sparse=2.8.0=h9ddd185_0
+  - libmamba=1.5.8=hfe524e5_3
+  - libmambapy=1.5.8=py312h2dafd23_3
+  - libnghttp2=1.64.0=h161d5f1_0
+  - libnpp=12.2.5.30=0
+  - libnpp-dev=12.2.5.30=0
+  - libnpp-static=12.2.5.30=0
+  - libnsl=2.0.1=hd590300_0
+  - libnvfatbin=12.4.127=0
+  - libnvfatbin-dev=12.4.127=0
+  - libnvjitlink=12.4.127=0
+  - libnvjitlink-dev=12.4.127=0
+  - libnvjpeg=12.3.1.117=0
+  - libnvjpeg-dev=12.3.1.117=0
+  - libnvjpeg-static=12.3.1.117=0
+  - libopenvino=2024.4.0=hac27bb2_2
+  - libopenvino-auto-batch-plugin=2024.4.0=h4d9b6c2_2
+  - libopenvino-auto-plugin=2024.4.0=h4d9b6c2_2
+  - libopenvino-hetero-plugin=2024.4.0=h3f63f65_2
+  - libopenvino-intel-cpu-plugin=2024.4.0=hac27bb2_2
+  - libopenvino-intel-gpu-plugin=2024.4.0=hac27bb2_2
+  - libopenvino-intel-npu-plugin=2024.4.0=hac27bb2_2
+  - libopenvino-ir-frontend=2024.4.0=h3f63f65_2
+  - libopenvino-onnx-frontend=2024.4.0=h5c8f2c3_2
+  - libopenvino-paddle-frontend=2024.4.0=h5c8f2c3_2
+  - libopenvino-pytorch-frontend=2024.4.0=h5888daf_2
+  - libopenvino-tensorflow-frontend=2024.4.0=h6481b9d_2
+  - libopenvino-tensorflow-lite-frontend=2024.4.0=h5888daf_2
+  - libopus=1.3.1=h5eee18b_1
+  - libpciaccess=0.18=hd590300_0
+  - libpng=1.6.44=hadc24fc_0
+  - libprotobuf=5.28.2=h5b01275_0
+  - librsvg=2.58.4=hc0ffecb_0
+  - libsolv=0.7.30=h3509ff9_0
+  - libsqlite=3.47.0=hadc24fc_1
+  - libssh2=1.11.0=h0841786_0
+  - libstdcxx=14.2.0=hc0a3c3a_1
+  - libstdcxx-ng=14.2.0=h4852527_1
+  - libtasn1=4.19.0=h5eee18b_0
+  - libtiff=4.7.0=he137b08_1
+  - libtorch=2.4.1=cuda118_h232d35b_303
+  - libunistring=0.9.10=h27cfd23_0
+  - libuuid=2.38.1=h0b41bf4_0
+  - libuv=1.49.2=hb9d3cd8_0
+  - libva=2.22.0=h8a09558_1
+  - libvpx=1.14.1=hac33072_0
+  - libwebp=1.4.0=h2c329e2_0
+  - libwebp-base=1.4.0=hd590300_0
+  - libxcb=1.17.0=h8a09558_0
+  - libxcrypt=4.4.36=hd590300_1
+  - libxml2=2.13.4=hb346dea_2
+  - libzlib=1.3.1=hb9d3cd8_2
+  - llvm-openmp=19.1.3=h024ca30_0
+  - lz4-c=1.9.4=h6a678d5_1
+  - lzo=2.10=hd590300_1001
+  - markupsafe=2.1.3=py312h5eee18b_0
+  - menuinst=2.1.2=py312h06a4308_0
+  - mkl=2023.2.0=h84fe81f_50496
+  - mkl-service=2.4.0=py312h5eee18b_1
+  - mkl_fft=1.3.11=py312h5eee18b_0
+  - mkl_random=1.2.8=py312h526ad5a_0
+  - mpmath=1.3.0=py312h06a4308_0
+  - nccl=2.23.4.1=h03a54cd_2
+  - ncurses=6.5=he02047a_1
+  - nettle=3.7.3=hbbd107a_1
+  - networkx=3.2.1=py312h06a4308_0
+  - nsight-compute=2024.1.1.4=0
+  - numpy=2.1.3=py312hc5e2394_0
+  - numpy-base=2.1.3=py312h0da6c21_0
+  - ocl-icd=2.3.2=hd590300_1
+  - openfst=1.8.3=h84d6215_3
+  - openh264=2.4.1=h59595ed_0
+  - openjpeg=2.5.2=he7f1fd0_0
+  - openssl=3.3.2=hb9d3cd8_0
+  - packaging=24.1=py312h06a4308_0
+  - pango=1.54.0=h4c5309f_1
+  - pcre2=10.44=hba22ea6_2
+  - pillow=11.0.0=py312h7b63e92_0
+  - pip=24.2=py312h06a4308_0
+  - pixman=0.43.2=h59595ed_0
+  - pkg-config=0.29.2=h1bed415_8
+  - platformdirs=3.10.0=py312h06a4308_0
+  - pluggy=1.0.0=py312h06a4308_1
+  - pthread-stubs=0.4=hb9d3cd8_1002
+  - pugixml=1.14=h59595ed_0
+  - pybind11-abi=5=hd3eb1b0_0
+  - pycosat=0.6.6=py312h5eee18b_1
+  - pycparser=2.21=pyhd3eb1b0_0
+  - pysocks=1.7.1=py312h06a4308_0
+  - python=3.12.7=hc5c86c4_0_cpython
+  - python_abi=3.12=5_cp312
+  - pytorch=2.4.1=cuda118_py312h02e3f75_303
+  - pytorch-cuda=12.4=hc786d27_7
+  - pytorch-mutex=1.0=cpu
+  - pyyaml=6.0.2=py312h5eee18b_0
+  - readline=8.2=h5eee18b_0
+  - reproc=14.2.4=h6a678d5_2
+  - reproc-cpp=14.2.4=h6a678d5_2
+  - requests=2.32.3=py312h06a4308_0
+  - rhash=1.4.5=hb9d3cd8_0
+  - ruamel.yaml=0.18.6=py312h5eee18b_0
+  - ruamel.yaml.clib=0.2.8=py312h5eee18b_0
+  - setuptools=72.1.0=py312h06a4308_0
+  - sleef=3.7=h1b44611_0
+  - snappy=1.2.1=ha2e4443_0
+  - sqlite=3.47.0=h9eae976_1
+  - svt-av1=2.3.0=h5888daf_0
+  - sympy=1.13.2=py312h06a4308_0
+  - sysroot_linux-64=2.17=h4a8ded7_18
+  - tbb=2021.13.0=h84d6215_0
+  - tk=8.6.13=noxft_h4845f30_101
+  - torchaudio=2.4.1=cuda_118py312h3b1587d_1
+  - torchvision=0.19.1=cuda118py312h9250042_1
+  - tqdm=4.66.5=py312he106c6f_0
+  - truststore=0.8.0=py312h06a4308_0
+  - typing_extensions=4.11.0=py312h06a4308_0
+  - tzdata=2024b=h04d1e81_0
+  - urllib3=2.2.3=py312h06a4308_0
+  - wayland=1.23.1=h3e06ad9_0
+  - wayland-protocols=1.37=hd8ed1ab_0
+  - wheel=0.44.0=py312h06a4308_0
+  - x264=1!164.3095=h166bdaf_2
+  - x265=3.5=h924138e_3
+  - xorg-libice=1.1.1=hb9d3cd8_1
+  - xorg-libsm=1.2.4=he73a12e_1
+  - xorg-libx11=1.8.10=h4f16b4b_0
+  - xorg-libxau=1.0.11=hb9d3cd8_1
+  - xorg-libxdmcp=1.1.5=hb9d3cd8_0
+  - xorg-libxext=1.3.6=hb9d3cd8_0
+  - xorg-libxfixes=6.0.1=hb9d3cd8_0
+  - xorg-libxrender=0.9.11=hb9d3cd8_1
+  - xorg-xorgproto=2024.1=hb9d3cd8_1
+  - xz=5.4.6=h5eee18b_1
+  - yaml=0.2.5=h7b6447c_0
+  - yaml-cpp=0.8.0=h6a678d5_1
+  - zlib=1.3.1=hb9d3cd8_2
+  - zstandard=0.23.0=py312h2c38b39_0
+  - zstd=1.5.6=ha6fb4c9_0

From 4cb95a24fba6e42a36f2131bdcacd3b2db95dbec Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Wed, 6 Nov 2024 12:59:35 -0800
Subject: [PATCH 32/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index b4eadcac..e3bd02c9 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -24,11 +24,11 @@ jobs:
           miniconda-version: "latest"
           activate-environment: test
           python-version: '3.12'
+          environment-file: packaging/cuda12.4.yaml
       - name: Update pip
         run: python -m pip install --upgrade pip
       - name: Install dependencies and FFmpeg
         run: |
-          conda env update --file packaging/cuda12.4.yaml
           ffmpeg -version
       - name: Build and install torchcodec
         run: |

From 4055346774de7f0f149b0f5dcb04ae4e4b789571 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Wed, 6 Nov 2024 13:09:41 -0800
Subject: [PATCH 33/51] .

---
 examples/cuda_example.py | 175 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 175 insertions(+)
 create mode 100644 examples/cuda_example.py

diff --git a/examples/cuda_example.py b/examples/cuda_example.py
new file mode 100644
index 00000000..4fd72e72
--- /dev/null
+++ b/examples/cuda_example.py
@@ -0,0 +1,175 @@
+"""
+Accelerated video decoding with NVDEC
+=====================================
+
+.. _nvdec_tutorial:
+
+**Author**: `Ahmad Sharif <ahmads@meta.com>`__
+
+This tutorial shows how to use NVIDIA’s hardware video decoder (NVDEC)
+with TorchCodec, and how it improves the performance of video decoding.
+"""
+
+######################################################################
+#
+# .. note::
+#
+#    This tutorial requires FFmpeg libraries compiled with HW
+#    acceleration enabled.
+#
+#    Please refer to
+#    :ref:`Enabling GPU video decoder/encoder <enabling_hw_decoder>`
+#    for how to build FFmpeg with HW acceleration.
+#
+
+import torch
+
+print(torch.__version__)
+print(torchaudio.__version__)
+
+######################################################################
+#
+
+import matplotlib.pyplot as plt
+from torchcodec import VideoDecoder
+
+print("Avaialbe GPU:")
+print(torch.cuda.get_device_properties(0))
+
+######################################################################
+#
+# We will use the following video which has the following properties;
+#
+# - Codec: H.264
+# - Resolution: 960x540
+# - FPS: 29.97
+# - Pixel format: YUV420P
+#
+# .. raw:: html
+#
+#    <video style="max-width: 100%" controls>
+#      <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4" type="video/mp4">
+#    </video>
+
+######################################################################
+#
+
+src = torchaudio.utils.download_asset(
+    "tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4"
+)
+
+######################################################################
+# Decoding videos with NVDEC
+# --------------------------
+#
+# To use HW video decoder, you need to specify the HW decoder when
+# defining the output video stream by passing ``decoder`` option to
+# :py:meth:`~torchaudio.io.StreamReader.add_video_stream` method.
+#
+
+vd = VideoDecoder(src)
+vd.add_video_stream(0, device="cuda:0")
+frame = vd[0]
+
+######################################################################
+#
+# The video frames are decoded and returned as tensor of NCHW format.
+
+print(frame.data.shape, frame.data.dtype)
+
+######################################################################
+#
+# By default, the decoded frames are sent back to CPU memory, and
+# CPU tensors are created.
+
+print(frame.data.device)
+
+
+######################################################################
+# .. note::
+#
+#    When there are multiple of GPUs available, ``StreamReader`` by
+#    default uses the first GPU. You can change this by providing
+#    ``"gpu"`` option.
+#
+# .. code::
+#
+#    # Video data is sent to CUDA device 0, decoded and
+#    # converted on the same device.
+#    s.add_video_stream(
+#        ...,
+#        decoder="h264_cuvid",
+#        decoder_option={"gpu": "0"},
+#        hw_accel="cuda:0",
+#    )
+#
+# .. note::
+#
+#    ``"gpu"`` option and ``hw_accel`` option can be specified
+#    independently. If they do not match, decoded frames are
+#    transfered to the device specified by ``hw_accell``
+#    automatically.
+#
+# .. code::
+#
+#    # Video data is sent to CUDA device 0, and decoded there.
+#    # Then it is transfered to CUDA device 1, and converted to
+#    # CUDA tensor.
+#    s.add_video_stream(
+#        ...,
+#        decoder="h264_cuvid",
+#        decoder_option={"gpu": "0"},
+#        hw_accel="cuda:1",
+#    )
+
+######################################################################
+# Visualization
+# -------------
+#
+# Let's look at the frames decoded by HW decoder and compare them
+# against equivalent results from software decoders.
+#
+# The following function seeks into the given timestamp and decode one
+# frame with the specificed decoder.
+
+
+def test_decode(decoder: str, seek: float):
+    vd = VideoDecoder(src)
+    return vd.get_frame_played_at(seek)
+
+
+######################################################################
+#
+
+timestamps = [12, 19, 45, 131, 180]
+
+cpu_frames = [test_decode(decoder="h264", seek=ts) for ts in timestamps]
+cuda_frames = [test_decode(decoder="h264_cuvid", seek=ts) for ts in timestamps]
+
+
+######################################################################
+#
+# Now we visualize the resutls.
+#
+
+
+def plot_cpu_and_cuda():
+    n_rows = len(timestamps)
+    fig, axes = plt.subplots(n_rows, 2, figsize=[12.8, 16.0])
+    for i in range(n_rows):
+        axes[i][0].imshow(cpu_frames[i])
+        axes[i][1].imshow(cuda_frames[i])
+
+    axes[0][0].set_title("Software decoder")
+    axes[0][1].set_title("HW decoder")
+    plt.setp(axes, xticks=[], yticks=[])
+    plt.tight_layout()
+
+
+plot_cpu_and_cuda()
+
+######################################################################
+#
+# They are indistinguishable to the eyes of the author.
+# Feel free to let us know if you spot something. :)
+#

From 63bbb9e581eb27b2953e9b4b39eae909ece31762 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Wed, 6 Nov 2024 14:30:49 -0800
Subject: [PATCH 34/51] .

---
 examples/cuda_example.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/examples/cuda_example.py b/examples/cuda_example.py
index 4fd72e72..7a339bbd 100644
--- a/examples/cuda_example.py
+++ b/examples/cuda_example.py
@@ -17,9 +17,6 @@
 #    This tutorial requires FFmpeg libraries compiled with HW
 #    acceleration enabled.
 #
-#    Please refer to
-#    :ref:`Enabling GPU video decoder/encoder <enabling_hw_decoder>`
-#    for how to build FFmpeg with HW acceleration.
 #
 
 import torch

From 51e2308861abca4bbf0bfaa3a495d04faba7202a Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Wed, 6 Nov 2024 14:47:30 -0800
Subject: [PATCH 35/51] .

---
 examples/cuda_example.py | 46 ++++++++++++++++++----------------------
 1 file changed, 21 insertions(+), 25 deletions(-)

diff --git a/examples/cuda_example.py b/examples/cuda_example.py
index 7a339bbd..9d2b2aed 100644
--- a/examples/cuda_example.py
+++ b/examples/cuda_example.py
@@ -10,6 +10,7 @@
 with TorchCodec, and how it improves the performance of video decoding.
 """
 
+# %%
 ######################################################################
 #
 # .. note::
@@ -18,21 +19,14 @@
 #    acceleration enabled.
 #
 #
-
 import torch
 
 print(torch.__version__)
-print(torchaudio.__version__)
-
-######################################################################
-#
-
-import matplotlib.pyplot as plt
-from torchcodec import VideoDecoder
-
-print("Avaialbe GPU:")
+print(torch.cuda.is_available())
 print(torch.cuda.get_device_properties(0))
 
+
+# %%
 ######################################################################
 #
 # We will use the following video which has the following properties;
@@ -50,11 +44,16 @@
 
 ######################################################################
 #
+import urllib.request
 
-src = torchaudio.utils.download_asset(
-    "tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4"
+video_file = "video.mp4"
+urllib.request.urlretrieve(
+    "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4",
+    video_file,
 )
 
+
+# %%
 ######################################################################
 # Decoding videos with NVDEC
 # --------------------------
@@ -63,25 +62,28 @@
 # defining the output video stream by passing ``decoder`` option to
 # :py:meth:`~torchaudio.io.StreamReader.add_video_stream` method.
 #
+from torchcodec import VideoDecoder
 
-vd = VideoDecoder(src)
+vd = VideoDecoder(video_file)
 vd.add_video_stream(0, device="cuda:0")
 frame = vd[0]
 
+# %%
 ######################################################################
 #
 # The video frames are decoded and returned as tensor of NCHW format.
 
 print(frame.data.shape, frame.data.dtype)
 
+# %%
 ######################################################################
 #
-# By default, the decoded frames are sent back to CPU memory, and
-# CPU tensors are created.
+# The video frames are left on the GPU memory.
 
 print(frame.data.device)
 
 
+# %%
 ######################################################################
 # .. note::
 #
@@ -119,6 +121,7 @@
 #        hw_accel="cuda:1",
 #    )
 
+
 ######################################################################
 # Visualization
 # -------------
@@ -128,28 +131,20 @@
 #
 # The following function seeks into the given timestamp and decode one
 # frame with the specificed decoder.
+import matplotlib.pyplot as plt
 
 
 def test_decode(decoder: str, seek: float):
-    vd = VideoDecoder(src)
+    vd = VideoDecoder(video_file)
     return vd.get_frame_played_at(seek)
 
 
-######################################################################
-#
-
 timestamps = [12, 19, 45, 131, 180]
 
 cpu_frames = [test_decode(decoder="h264", seek=ts) for ts in timestamps]
 cuda_frames = [test_decode(decoder="h264_cuvid", seek=ts) for ts in timestamps]
 
 
-######################################################################
-#
-# Now we visualize the resutls.
-#
-
-
 def plot_cpu_and_cuda():
     n_rows = len(timestamps)
     fig, axes = plt.subplots(n_rows, 2, figsize=[12.8, 16.0])
@@ -165,6 +160,7 @@ def plot_cpu_and_cuda():
 
 plot_cpu_and_cuda()
 
+# %%
 ######################################################################
 #
 # They are indistinguishable to the eyes of the author.

From a9269341e8ec71543533fc0869a944e160d1fdfd Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Wed, 6 Nov 2024 15:12:12 -0800
Subject: [PATCH 36/51] .

---
 docs/source/index.rst     |   8 ++
 examples/basic_example.py |  20 +++--
 examples/cuda_example.py  | 168 --------------------------------------
 3 files changed, 19 insertions(+), 177 deletions(-)
 delete mode 100644 examples/cuda_example.py

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 1ce569f3..22024888 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -42,6 +42,14 @@ We achieve these capabilities through:
 
         A simple video decoding example
 
+     .. grid-item-card:: :octicon:`file-code;1em`
+        GPU decoding using TorchCodec
+        :img-top: _static/img/card-background.svg
+        :link: generated_examples/basic_cuda_example.html
+        :link-type: url
+
+        A GPU decoding example
+
      .. grid-item-card:: :octicon:`file-code;1em`
         API Reference
         :img-top: _static/img/card-background.svg
diff --git a/examples/basic_example.py b/examples/basic_example.py
index ba85b32f..645df5b0 100644
--- a/examples/basic_example.py
+++ b/examples/basic_example.py
@@ -19,8 +19,9 @@
 # :ref:`creating_decoder`.
 
 from typing import Optional
-import torch
+
 import requests
+import torch
 
 
 # Video source: https://www.pexels.com/video/dog-eating-854132/
@@ -33,16 +34,16 @@
 raw_video_bytes = response.content
 
 
-def plot(frames: torch.Tensor, title : Optional[str] = None):
+def plot(frames: torch.Tensor, title: Optional[str] = None):
     try:
-        from torchvision.utils import make_grid
-        from torchvision.transforms.v2.functional import to_pil_image
         import matplotlib.pyplot as plt
+        from torchvision.transforms.v2.functional import to_pil_image
+        from torchvision.utils import make_grid
     except ImportError:
         print("Cannot plot, please run `pip install torchvision matplotlib`")
         return
 
-    plt.rcParams["savefig.bbox"] = 'tight'
+    plt.rcParams["savefig.bbox"] = "tight"
     fig, ax = plt.subplots()
     ax.imshow(to_pil_image(make_grid(frames)))
     ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
@@ -76,7 +77,7 @@ def plot(frames: torch.Tensor, title : Optional[str] = None):
 # ---------------------------------------
 
 first_frame = decoder[0]  # using a single int index
-every_twenty_frame = decoder[0 : -1 : 20]  # using slices
+every_twenty_frame = decoder[0:-1:20]  # using slices
 
 print(f"{first_frame.shape = }")
 print(f"{first_frame.dtype = }")
@@ -106,9 +107,10 @@ def plot(frames: torch.Tensor, title : Optional[str] = None):
 # The decoder is a normal iterable object and can be iterated over like so:
 
 for frame in decoder:
-    assert (
-        isinstance(frame, torch.Tensor)
-        and frame.shape == (3, decoder.metadata.height, decoder.metadata.width)
+    assert isinstance(frame, torch.Tensor) and frame.shape == (
+        3,
+        decoder.metadata.height,
+        decoder.metadata.width,
     )
 
 # %%
diff --git a/examples/cuda_example.py b/examples/cuda_example.py
deleted file mode 100644
index 9d2b2aed..00000000
--- a/examples/cuda_example.py
+++ /dev/null
@@ -1,168 +0,0 @@
-"""
-Accelerated video decoding with NVDEC
-=====================================
-
-.. _nvdec_tutorial:
-
-**Author**: `Ahmad Sharif <ahmads@meta.com>`__
-
-This tutorial shows how to use NVIDIA’s hardware video decoder (NVDEC)
-with TorchCodec, and how it improves the performance of video decoding.
-"""
-
-# %%
-######################################################################
-#
-# .. note::
-#
-#    This tutorial requires FFmpeg libraries compiled with HW
-#    acceleration enabled.
-#
-#
-import torch
-
-print(torch.__version__)
-print(torch.cuda.is_available())
-print(torch.cuda.get_device_properties(0))
-
-
-# %%
-######################################################################
-#
-# We will use the following video which has the following properties;
-#
-# - Codec: H.264
-# - Resolution: 960x540
-# - FPS: 29.97
-# - Pixel format: YUV420P
-#
-# .. raw:: html
-#
-#    <video style="max-width: 100%" controls>
-#      <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4" type="video/mp4">
-#    </video>
-
-######################################################################
-#
-import urllib.request
-
-video_file = "video.mp4"
-urllib.request.urlretrieve(
-    "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4",
-    video_file,
-)
-
-
-# %%
-######################################################################
-# Decoding videos with NVDEC
-# --------------------------
-#
-# To use HW video decoder, you need to specify the HW decoder when
-# defining the output video stream by passing ``decoder`` option to
-# :py:meth:`~torchaudio.io.StreamReader.add_video_stream` method.
-#
-from torchcodec import VideoDecoder
-
-vd = VideoDecoder(video_file)
-vd.add_video_stream(0, device="cuda:0")
-frame = vd[0]
-
-# %%
-######################################################################
-#
-# The video frames are decoded and returned as tensor of NCHW format.
-
-print(frame.data.shape, frame.data.dtype)
-
-# %%
-######################################################################
-#
-# The video frames are left on the GPU memory.
-
-print(frame.data.device)
-
-
-# %%
-######################################################################
-# .. note::
-#
-#    When there are multiple of GPUs available, ``StreamReader`` by
-#    default uses the first GPU. You can change this by providing
-#    ``"gpu"`` option.
-#
-# .. code::
-#
-#    # Video data is sent to CUDA device 0, decoded and
-#    # converted on the same device.
-#    s.add_video_stream(
-#        ...,
-#        decoder="h264_cuvid",
-#        decoder_option={"gpu": "0"},
-#        hw_accel="cuda:0",
-#    )
-#
-# .. note::
-#
-#    ``"gpu"`` option and ``hw_accel`` option can be specified
-#    independently. If they do not match, decoded frames are
-#    transfered to the device specified by ``hw_accell``
-#    automatically.
-#
-# .. code::
-#
-#    # Video data is sent to CUDA device 0, and decoded there.
-#    # Then it is transfered to CUDA device 1, and converted to
-#    # CUDA tensor.
-#    s.add_video_stream(
-#        ...,
-#        decoder="h264_cuvid",
-#        decoder_option={"gpu": "0"},
-#        hw_accel="cuda:1",
-#    )
-
-
-######################################################################
-# Visualization
-# -------------
-#
-# Let's look at the frames decoded by HW decoder and compare them
-# against equivalent results from software decoders.
-#
-# The following function seeks into the given timestamp and decode one
-# frame with the specificed decoder.
-import matplotlib.pyplot as plt
-
-
-def test_decode(decoder: str, seek: float):
-    vd = VideoDecoder(video_file)
-    return vd.get_frame_played_at(seek)
-
-
-timestamps = [12, 19, 45, 131, 180]
-
-cpu_frames = [test_decode(decoder="h264", seek=ts) for ts in timestamps]
-cuda_frames = [test_decode(decoder="h264_cuvid", seek=ts) for ts in timestamps]
-
-
-def plot_cpu_and_cuda():
-    n_rows = len(timestamps)
-    fig, axes = plt.subplots(n_rows, 2, figsize=[12.8, 16.0])
-    for i in range(n_rows):
-        axes[i][0].imshow(cpu_frames[i])
-        axes[i][1].imshow(cuda_frames[i])
-
-    axes[0][0].set_title("Software decoder")
-    axes[0][1].set_title("HW decoder")
-    plt.setp(axes, xticks=[], yticks=[])
-    plt.tight_layout()
-
-
-plot_cpu_and_cuda()
-
-# %%
-######################################################################
-#
-# They are indistinguishable to the eyes of the author.
-# Feel free to let us know if you spot something. :)
-#

From 400001a4ff10878d2e0b36cb5dc4a4b3b32ca9df Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Wed, 6 Nov 2024 15:46:17 -0800
Subject: [PATCH 37/51] .

---
 examples/basic_cuda_example.py | 152 +++++++++++++++++++++++++++++++++
 1 file changed, 152 insertions(+)
 create mode 100644 examples/basic_cuda_example.py

diff --git a/examples/basic_cuda_example.py b/examples/basic_cuda_example.py
new file mode 100644
index 00000000..c84372d1
--- /dev/null
+++ b/examples/basic_cuda_example.py
@@ -0,0 +1,152 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Accelerated video decoding with NVDEC
+=====================================
+
+.. _nvdec_tutorial:
+
+**Author**: `Ahmad Sharif <ahmads@meta.com>`__
+
+This tutorial shows how to use NVIDIA’s hardware video decoder (NVDEC)
+with TorchCodec. This decoder is called CUDA decoder in the documentation
+and APIs.
+
+To use the CUDA decoder, you have to have the following installed in your
+environment:
+* NVDEC-enabled FFMPEG
+* libnpp
+* CUDA-enabled pytorch
+
+FFMPEG versions 5, 6 and 7 from conda-forge are built with NVDEC support and
+you can install them by running (for example to install ffmpeg version 7):
+
+.. code-block:: bash
+
+   conda install ffmpeg=7 -c conda-forge
+   conda install libnpp -c nvidia
+"""
+
+# %%
+#
+# .. note::
+#
+#    This tutorial requires FFmpeg libraries compiled with CUDA support.
+#
+#
+import torch
+
+print(f"{torch.__version__=}")
+print(f"{torch.cuda.is_available()=}")
+print(f"{torch.cuda.get_device_properties(0)=}")
+
+
+# %%
+######################################################################
+# Downloading the video
+######################################################################
+#
+# We will use the following video which has the following properties;
+#
+# - Codec: H.264
+# - Resolution: 960x540
+# - FPS: 29.97
+# - Pixel format: YUV420P
+#
+# .. raw:: html
+#
+#    <video style="max-width: 100%" controls>
+#      <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4" type="video/mp4">
+#    </video>
+import urllib.request
+
+video_file = "video.mp4"
+urllib.request.urlretrieve(
+    "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4",
+    video_file,
+)
+
+
+# %%
+######################################################################
+# Decoding with CUDA
+######################################################################
+#
+# To use CUDA decoder, you need to pass in a cuda device to the decoder.
+#
+from torchcodec.decoders import VideoDecoder
+
+vd = VideoDecoder(video_file, device="cuda:0")
+frame = vd[0]
+
+# %%
+#
+# The video frames are decoded and returned as tensor of NCHW format.
+
+print(frame.data.shape, frame.data.dtype)
+
+# %%
+#
+# The video frames are left on the GPU memory.
+
+print(frame.data.device)
+
+
+# %%
+######################################################################
+# Visualizing Frames
+######################################################################
+#
+# Let's look at the frames decoded by CUDA decoder and compare them
+# against equivalent results from the CPU decoders.
+import matplotlib.pyplot as plt
+
+
+def get_frames(timestamps: list[float], device: str):
+    decoder = VideoDecoder(video_file, device=device)
+    return [decoder.get_frame_played_at(ts) for ts in timestamps]
+
+
+def get_numpy_images(frames):
+    numpy_images = []
+    for frame in frames:
+        # We transfer to the CPU so they can be visualized by matplotlib.
+        numpy_image = frame.data.to("cpu").permute(1, 2, 0).numpy()
+        numpy_images.append(numpy_image)
+    return numpy_images
+
+
+timestamps = [12, 19, 45, 131, 180]
+cpu_frames = get_frames(timestamps, device="cpu")
+cuda_frames = get_frames(timestamps, device="cuda:0")
+cpu_numpy_images = get_numpy_images(cpu_frames)
+cuda_numpy_images = get_numpy_images(cuda_frames)
+
+
+def plot_cpu_and_cuda():
+    n_rows = len(timestamps)
+    fig, axes = plt.subplots(n_rows, 2, figsize=[12.8, 16.0])
+    for i in range(n_rows):
+        axes[i][0].imshow(cpu_numpy_images[i])
+        axes[i][1].imshow(cuda_numpy_images[i])
+
+    axes[0][0].set_title("CPU decoder")
+    axes[0][1].set_title("CUDA decoder")
+    plt.setp(axes, xticks=[], yticks=[])
+    plt.tight_layout()
+
+
+plot_cpu_and_cuda()
+
+# %%
+#
+# They look visually similar to the human eye but there may be subtle
+# differences because CUDA math is not bit-exact to CPU math.
+#
+first_cpu_frame = cpu_frames[0].data.to("cpu")
+first_cuda_frame = cuda_frames[0].data.to("cpu")
+frames_equal = torch.equal(first_cpu_frame, first_cuda_frame)
+print(f"{frames_equal=}")

From ccf95daabb89375fe55947273d412a94afea3c4b Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 08:21:03 -0800
Subject: [PATCH 38/51] .

---
 examples/basic_cuda_example.py | 38 +++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/examples/basic_cuda_example.py b/examples/basic_cuda_example.py
index c84372d1..9088d069 100644
--- a/examples/basic_cuda_example.py
+++ b/examples/basic_cuda_example.py
@@ -11,15 +11,33 @@
 
 **Author**: `Ahmad Sharif <ahmads@meta.com>`__
 
-This tutorial shows how to use NVIDIA’s hardware video decoder (NVDEC)
-with TorchCodec. This decoder is called CUDA decoder in the documentation
-and APIs.
+TorchCodec can use Nvidia hardware to speed-up video decoding. An additional benefit
+of doing decoding on the GPU is that the decoded tensor is left on GPU memory to
+benefit from subsequent GPU transforms like scaling or cropping. In this tutorial this
+Nvidia-GPU-accelerated decoding is called "CUDA Decoding".
+
+CUDA Decoding can offer speed-up over CPU Decoding in a few scenarios:
+
+#. You are deocding a batch of videos that is saturating the CPU
+#. You want to do heavy transforms on the decoded tensors after decoding
+#. You want to free up the CPU to do other work
+
+In some scenarios CUDA Decoding can be slower than CPU Decoding, example:
+
+#. If your GPU is already busy and CPU is not
+#. If you have small resolution videos and the PCI-e transfer latency is large
+#. You want bit-exact results compared to CPU Decoding
+
+It's best to experiment with CUDA Decoding to see if it improves your use-case. With
+TorchCodec you can simply pass in a device parameter to the VideoDecoder class to
+use CUDA Decoding.
+
+In order use CUDA Decoding will need the following installed in your environment:
+
+#. CUDA-enabled pytorch
+#. FFMPEG binaries that support NVDEC-enabled codecs
+#. libnpp
 
-To use the CUDA decoder, you have to have the following installed in your
-environment:
-* NVDEC-enabled FFMPEG
-* libnpp
-* CUDA-enabled pytorch
 
 FFMPEG versions 5, 6 and 7 from conda-forge are built with NVDEC support and
 you can install them by running (for example to install ffmpeg version 7):
@@ -126,7 +144,7 @@ def get_numpy_images(frames):
 cuda_numpy_images = get_numpy_images(cuda_frames)
 
 
-def plot_cpu_and_cuda():
+def plot_cpu_and_cuda_images():
     n_rows = len(timestamps)
     fig, axes = plt.subplots(n_rows, 2, figsize=[12.8, 16.0])
     for i in range(n_rows):
@@ -139,7 +157,7 @@ def plot_cpu_and_cuda():
     plt.tight_layout()
 
 
-plot_cpu_and_cuda()
+plot_cpu_and_cuda_images()
 
 # %%
 #

From 209e746b6c8daaa2d349984a9bd71927aae881e6 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 08:28:05 -0800
Subject: [PATCH 39/51] .

---
 examples/basic_cuda_example.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/examples/basic_cuda_example.py b/examples/basic_cuda_example.py
index 9088d069..81b28adb 100644
--- a/examples/basic_cuda_example.py
+++ b/examples/basic_cuda_example.py
@@ -11,16 +11,20 @@
 
 **Author**: `Ahmad Sharif <ahmads@meta.com>`__
 
-TorchCodec can use Nvidia hardware to speed-up video decoding. An additional benefit
-of doing decoding on the GPU is that the decoded tensor is left on GPU memory to
-benefit from subsequent GPU transforms like scaling or cropping. In this tutorial this
-Nvidia-GPU-accelerated decoding is called "CUDA Decoding".
+TorchCodec can use Nvidia hardware to speed-up video decoding. This is called "CUDA Decoding".
+CUDA Decoding can be faster than CPU Decoding for the actual decoding step and for
+subsequent transform steps like scaling, cropping or rotating. This is because the decode step leaves
+the decoded tensor in GPU memory so the GPU doesn't have to fetch from main memory before
+running the transform steps. Encoded packets are often much smaller than decoded frames so
+CUDA decoding also uses less PCI-e bandwidth.
 
 CUDA Decoding can offer speed-up over CPU Decoding in a few scenarios:
 
-#. You are deocding a batch of videos that is saturating the CPU
-#. You want to do heavy transforms on the decoded tensors after decoding
-#. You want to free up the CPU to do other work
+#. You are decoding a large resolution video
+#. You are decoding a large batch of videos that's saturting the CPU
+#. You want to do whole-image transforms like scaling or convolutions on the decoded tensors
+   after decoding
+#. Your CPU is saturated and you want to free it up for other work
 
 In some scenarios CUDA Decoding can be slower than CPU Decoding, example:
 

From 0a8ae5fcd538897336b6692811dea760a89ffefd Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 10:05:37 -0800
Subject: [PATCH 40/51] .

---
 .github/workflows/docs.yaml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index e3bd02c9..16295292 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -24,15 +24,15 @@ jobs:
           miniconda-version: "latest"
           activate-environment: test
           python-version: '3.12'
-          environment-file: packaging/cuda12.4.yaml
       - name: Update pip
         run: python -m pip install --upgrade pip
-      - name: Install dependencies and FFmpeg
+      - name: Install torchcodec from nightly
         run: |
-          ffmpeg -version
-      - name: Build and install torchcodec
+          pip3 install --pre torch torchvision torchaudio torchcodec --index-url https://download.pytorch.org/whl/nightly/cu124
+      - name: Install FFMPEG
         run: |
-          ENABLE_CUDA=1 python -m pip install -e ".[dev]" --no-build-isolation -vvv
+          conda install ffmpeg=7 -c conda-forge
+          ffmpeg -version
       - name: Install doc dependencies
         run: |
           cd docs

From 8864b30e8a66dc93f2189d4d81202feb08418272 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 10:13:16 -0800
Subject: [PATCH 41/51] .

---
 .github/workflows/docs.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 16295292..a4884456 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -29,8 +29,9 @@ jobs:
       - name: Install torchcodec from nightly
         run: |
           pip3 install --pre torch torchvision torchaudio torchcodec --index-url https://download.pytorch.org/whl/nightly/cu124
-      - name: Install FFMPEG
+      - name: Install FFMPEG and other deps
         run: |
+          conda install cuda-nvrtc=12.4 -c nvidia
           conda install ffmpeg=7 -c conda-forge
           ffmpeg -version
       - name: Install doc dependencies

From 936cbd10d71237f5587eb9a66ba0ce98c99c43d4 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 10:16:35 -0800
Subject: [PATCH 42/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index a4884456..4a98fe8f 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,7 +31,7 @@ jobs:
           pip3 install --pre torch torchvision torchaudio torchcodec --index-url https://download.pytorch.org/whl/nightly/cu124
       - name: Install FFMPEG and other deps
         run: |
-          conda install cuda-nvrtc=12.4 -c nvidia
+          conda install cuda-nvrtc=12.4 libnpp -c nvidia
           conda install ffmpeg=7 -c conda-forge
           ffmpeg -version
       - name: Install doc dependencies

From 49197b5324a41da5dbf06912bac1dcfce7dacdec Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 10:23:28 -0800
Subject: [PATCH 43/51] .

---
 docs/source/index.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index d26827ce..3e8ed8e7 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -50,11 +50,11 @@ We achieve these capabilities through:
 
         How to sample video clips
 
-      .. grid-item-card:: :octicon:`file-code;1em`
-         GPU decoding using TorchCodec
-         :img-top: _static/img/card-background.svg
-         :link: generated_examples/basic_cuda_example.html
-         :link-type: url
+     .. grid-item-card:: :octicon:`file-code;1em`
+        GPU decoding using TorchCodec
+        :img-top: _static/img/card-background.svg
+        :link: generated_examples/basic_cuda_example.html
+        :link-type: url
 
         A simple example demonstrating Nvidia GPU decoding
 

From 8291aa6e18689a77a5a706662886ec7374f20b3b Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 10:23:46 -0800
Subject: [PATCH 44/51] .

---
 packaging/cuda12.4.yaml | 289 ----------------------------------------
 1 file changed, 289 deletions(-)
 delete mode 100644 packaging/cuda12.4.yaml

diff --git a/packaging/cuda12.4.yaml b/packaging/cuda12.4.yaml
deleted file mode 100644
index 7666f0cb..00000000
--- a/packaging/cuda12.4.yaml
+++ /dev/null
@@ -1,289 +0,0 @@
-name: cuda4
-channels:
-  - pytorch-nightly
-  - nvidia
-  - conda-forge
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=conda_forge
-  - _openmp_mutex=4.5=2_kmp_llvm
-  - aom=3.9.1=hac33072_0
-  - archspec=0.2.3=pyhd3eb1b0_0
-  - blas=1.0=mkl
-  - boltons=23.0.0=py312h06a4308_0
-  - brotli-python=1.0.9=py312h6a678d5_8
-  - bzip2=1.0.8=h5eee18b_6
-  - c-ares=1.34.2=heb4867d_0
-  - ca-certificates=2024.9.24=h06a4308_0
-  - cairo=1.18.0=hebfffa5_3
-  - certifi=2024.8.30=py312h06a4308_0
-  - cffi=1.17.1=py312h1fdaa30_0
-  - charset-normalizer=3.3.2=pyhd3eb1b0_0
-  - cmake=3.30.5=hf9cb763_0
-  - conda=24.9.2=py312h7900ff3_0
-  - conda-libmamba-solver=24.9.0=pyhd3eb1b0_0
-  - conda-package-handling=2.3.0=py312h06a4308_0
-  - conda-package-streaming=0.10.0=py312h06a4308_0
-  - cuda=12.4.0=0
-  - cuda-cccl=12.4.127=0
-  - cuda-command-line-tools=12.4.1=0
-  - cuda-compiler=12.6.2=0
-  - cuda-cudart=12.4.127=0
-  - cuda-cudart-dev=12.4.127=0
-  - cuda-cudart-static=12.4.127=0
-  - cuda-cuobjdump=12.4.127=0
-  - cuda-cupti=12.4.127=0
-  - cuda-cupti-static=12.4.127=0
-  - cuda-cuxxfilt=12.4.127=0
-  - cuda-demo-suite=12.4.127=0
-  - cuda-documentation=12.4.127=0
-  - cuda-driver-dev=12.4.127=0
-  - cuda-gdb=12.4.127=0
-  - cuda-libraries=12.4.1=0
-  - cuda-libraries-dev=12.6.0=0
-  - cuda-libraries-static=12.4.1=0
-  - cuda-nsight=12.4.127=0
-  - cuda-nvcc=12.4.131=0
-  - cuda-nvdisasm=12.4.127=0
-  - cuda-nvml-dev=12.4.127=0
-  - cuda-nvprof=12.4.127=0
-  - cuda-nvprune=12.4.127=0
-  - cuda-nvrtc=12.4.127=0
-  - cuda-nvrtc-dev=12.4.127=0
-  - cuda-nvrtc-static=12.4.127=0
-  - cuda-nvtx=12.4.127=0
-  - cuda-nvvp=12.4.127=0
-  - cuda-opencl=12.4.127=0
-  - cuda-opencl-dev=12.4.127=0
-  - cuda-profiler-api=12.4.127=0
-  - cuda-runtime=12.4.0=0
-  - cuda-sanitizer-api=12.4.127=0
-  - cuda-toolkit=12.4.0=0
-  - cuda-tools=12.4.1=0
-  - cuda-version=11.8=h70ddcb2_3
-  - cuda-visual-tools=12.6.0=0
-  - cudatoolkit=11.8.0=h4ba93d1_13
-  - cudnn=9.3.0.75=hc149ed2_0
-  - dav1d=1.2.1=hd590300_0
-  - distro=1.9.0=py312h06a4308_0
-  - expat=2.6.3=h6a678d5_0
-  - ffmpeg=7.1.0=gpl_h2e64a5a_503
-  - filelock=3.13.1=py312h06a4308_0
-  - fmt=9.1.0=hdb19cb5_1
-  - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
-  - font-ttf-inconsolata=3.000=h77eed37_0
-  - font-ttf-source-code-pro=2.038=h77eed37_0
-  - font-ttf-ubuntu=0.83=h77eed37_3
-  - fontconfig=2.15.0=h7e30c49_1
-  - fonts-conda-ecosystem=1=0
-  - fonts-conda-forge=1=0
-  - freetype=2.12.1=h267a509_2
-  - fribidi=1.0.10=h36c2ea0_0
-  - frozendict=2.4.2=py312h06a4308_0
-  - fsspec=2024.10.0=pyhff2d567_0
-  - gdk-pixbuf=2.42.12=hb9ae30d_0
-  - gds-tools=1.9.1.3=0
-  - giflib=5.2.2=h5eee18b_0
-  - gmp=6.3.0=hac33072_2
-  - gnutls=3.6.15=he1e5248_0
-  - graphite2=1.3.14=h295c915_1
-  - harfbuzz=9.0.0=hda332d3_1
-  - icu=75.1=he02047a_0
-  - idna=3.7=py312h06a4308_0
-  - intel-openmp=2022.0.1=h06a4308_3633
-  - jinja2=3.1.4=py312h06a4308_1
-  - jsonpatch=1.33=py312h06a4308_1
-  - jsonpointer=2.1=pyhd3eb1b0_0
-  - kaldi=5.5.1112=cpu_hd7b63f8_5
-  - kernel-headers_linux-64=3.10.0=he073ed8_18
-  - keyutils=1.6.1=h166bdaf_0
-  - krb5=1.21.3=h659f571_0
-  - lame=3.100=h7b6447c_0
-  - lcms2=2.16=hb7c19ff_0
-  - ld_impl_linux-64=2.40=h12ee557_0
-  - lerc=4.0.0=h27087fc_0
-  - libabseil=20240722.0=cxx17_h5888daf_1
-  - libarchive=3.7.4=hfca40fe_0
-  - libass=0.17.3=h1dc1e6a_0
-  - libblas=3.9.0=20_linux64_mkl
-  - libcblas=3.9.0=20_linux64_mkl
-  - libcublas=12.4.5.8=0
-  - libcublas-dev=12.4.5.8=0
-  - libcublas-static=12.4.5.8=0
-  - libcufft=11.2.1.3=0
-  - libcufft-dev=11.2.1.3=0
-  - libcufft-static=11.2.1.3=0
-  - libcufile=1.9.1.3=0
-  - libcufile-dev=1.9.1.3=0
-  - libcufile-static=1.9.1.3=0
-  - libcurand=10.3.5.147=0
-  - libcurand-dev=10.3.5.147=0
-  - libcurand-static=10.3.5.147=0
-  - libcurl=8.11.0=hbbe4b11_0
-  - libcusolver=11.6.1.9=0
-  - libcusolver-dev=11.6.1.9=0
-  - libcusolver-static=11.6.1.9=0
-  - libcusparse=12.3.1.170=0
-  - libcusparse-dev=12.3.1.170=0
-  - libcusparse-static=12.3.1.170=0
-  - libdeflate=1.22=hb9d3cd8_0
-  - libdrm=2.4.123=hb9d3cd8_0
-  - libedit=3.1.20230828=h5eee18b_0
-  - libegl=1.7.0=ha4b6fd6_1
-  - libev=4.33=h7f8727e_1
-  - libexpat=2.6.3=h5888daf_0
-  - libffi=3.4.4=h6a678d5_1
-  - libgcc=14.2.0=h77fa898_1
-  - libgcc-ng=14.2.0=h69a702a_1
-  - libgfortran=14.2.0=h69a702a_1
-  - libgfortran5=14.2.0=hd5240d6_1
-  - libgl=1.7.0=ha4b6fd6_1
-  - libglib=2.82.2=h2ff4ddf_0
-  - libglvnd=1.7.0=ha4b6fd6_1
-  - libglx=1.7.0=ha4b6fd6_1
-  - libgomp=14.2.0=h77fa898_1
-  - libhwloc=2.11.1=default_hecaa2ac_1000
-  - libiconv=1.17=hd590300_2
-  - libidn2=2.3.4=h5eee18b_0
-  - libjpeg-turbo=3.0.3=h5eee18b_0
-  - liblapack=3.9.0=20_linux64_mkl
-  - liblapacke=3.9.0=20_linux64_mkl
-  - libmagma=2.8.0=hfdb99dd_0
-  - libmagma_sparse=2.8.0=h9ddd185_0
-  - libmamba=1.5.8=hfe524e5_3
-  - libmambapy=1.5.8=py312h2dafd23_3
-  - libnghttp2=1.64.0=h161d5f1_0
-  - libnpp=12.2.5.30=0
-  - libnpp-dev=12.2.5.30=0
-  - libnpp-static=12.2.5.30=0
-  - libnsl=2.0.1=hd590300_0
-  - libnvfatbin=12.4.127=0
-  - libnvfatbin-dev=12.4.127=0
-  - libnvjitlink=12.4.127=0
-  - libnvjitlink-dev=12.4.127=0
-  - libnvjpeg=12.3.1.117=0
-  - libnvjpeg-dev=12.3.1.117=0
-  - libnvjpeg-static=12.3.1.117=0
-  - libopenvino=2024.4.0=hac27bb2_2
-  - libopenvino-auto-batch-plugin=2024.4.0=h4d9b6c2_2
-  - libopenvino-auto-plugin=2024.4.0=h4d9b6c2_2
-  - libopenvino-hetero-plugin=2024.4.0=h3f63f65_2
-  - libopenvino-intel-cpu-plugin=2024.4.0=hac27bb2_2
-  - libopenvino-intel-gpu-plugin=2024.4.0=hac27bb2_2
-  - libopenvino-intel-npu-plugin=2024.4.0=hac27bb2_2
-  - libopenvino-ir-frontend=2024.4.0=h3f63f65_2
-  - libopenvino-onnx-frontend=2024.4.0=h5c8f2c3_2
-  - libopenvino-paddle-frontend=2024.4.0=h5c8f2c3_2
-  - libopenvino-pytorch-frontend=2024.4.0=h5888daf_2
-  - libopenvino-tensorflow-frontend=2024.4.0=h6481b9d_2
-  - libopenvino-tensorflow-lite-frontend=2024.4.0=h5888daf_2
-  - libopus=1.3.1=h5eee18b_1
-  - libpciaccess=0.18=hd590300_0
-  - libpng=1.6.44=hadc24fc_0
-  - libprotobuf=5.28.2=h5b01275_0
-  - librsvg=2.58.4=hc0ffecb_0
-  - libsolv=0.7.30=h3509ff9_0
-  - libsqlite=3.47.0=hadc24fc_1
-  - libssh2=1.11.0=h0841786_0
-  - libstdcxx=14.2.0=hc0a3c3a_1
-  - libstdcxx-ng=14.2.0=h4852527_1
-  - libtasn1=4.19.0=h5eee18b_0
-  - libtiff=4.7.0=he137b08_1
-  - libtorch=2.4.1=cuda118_h232d35b_303
-  - libunistring=0.9.10=h27cfd23_0
-  - libuuid=2.38.1=h0b41bf4_0
-  - libuv=1.49.2=hb9d3cd8_0
-  - libva=2.22.0=h8a09558_1
-  - libvpx=1.14.1=hac33072_0
-  - libwebp=1.4.0=h2c329e2_0
-  - libwebp-base=1.4.0=hd590300_0
-  - libxcb=1.17.0=h8a09558_0
-  - libxcrypt=4.4.36=hd590300_1
-  - libxml2=2.13.4=hb346dea_2
-  - libzlib=1.3.1=hb9d3cd8_2
-  - llvm-openmp=19.1.3=h024ca30_0
-  - lz4-c=1.9.4=h6a678d5_1
-  - lzo=2.10=hd590300_1001
-  - markupsafe=2.1.3=py312h5eee18b_0
-  - menuinst=2.1.2=py312h06a4308_0
-  - mkl=2023.2.0=h84fe81f_50496
-  - mkl-service=2.4.0=py312h5eee18b_1
-  - mkl_fft=1.3.11=py312h5eee18b_0
-  - mkl_random=1.2.8=py312h526ad5a_0
-  - mpmath=1.3.0=py312h06a4308_0
-  - nccl=2.23.4.1=h03a54cd_2
-  - ncurses=6.5=he02047a_1
-  - nettle=3.7.3=hbbd107a_1
-  - networkx=3.2.1=py312h06a4308_0
-  - nsight-compute=2024.1.1.4=0
-  - numpy=2.1.3=py312hc5e2394_0
-  - numpy-base=2.1.3=py312h0da6c21_0
-  - ocl-icd=2.3.2=hd590300_1
-  - openfst=1.8.3=h84d6215_3
-  - openh264=2.4.1=h59595ed_0
-  - openjpeg=2.5.2=he7f1fd0_0
-  - openssl=3.3.2=hb9d3cd8_0
-  - packaging=24.1=py312h06a4308_0
-  - pango=1.54.0=h4c5309f_1
-  - pcre2=10.44=hba22ea6_2
-  - pillow=11.0.0=py312h7b63e92_0
-  - pip=24.2=py312h06a4308_0
-  - pixman=0.43.2=h59595ed_0
-  - pkg-config=0.29.2=h1bed415_8
-  - platformdirs=3.10.0=py312h06a4308_0
-  - pluggy=1.0.0=py312h06a4308_1
-  - pthread-stubs=0.4=hb9d3cd8_1002
-  - pugixml=1.14=h59595ed_0
-  - pybind11-abi=5=hd3eb1b0_0
-  - pycosat=0.6.6=py312h5eee18b_1
-  - pycparser=2.21=pyhd3eb1b0_0
-  - pysocks=1.7.1=py312h06a4308_0
-  - python=3.12.7=hc5c86c4_0_cpython
-  - python_abi=3.12=5_cp312
-  - pytorch=2.4.1=cuda118_py312h02e3f75_303
-  - pytorch-cuda=12.4=hc786d27_7
-  - pytorch-mutex=1.0=cpu
-  - pyyaml=6.0.2=py312h5eee18b_0
-  - readline=8.2=h5eee18b_0
-  - reproc=14.2.4=h6a678d5_2
-  - reproc-cpp=14.2.4=h6a678d5_2
-  - requests=2.32.3=py312h06a4308_0
-  - rhash=1.4.5=hb9d3cd8_0
-  - ruamel.yaml=0.18.6=py312h5eee18b_0
-  - ruamel.yaml.clib=0.2.8=py312h5eee18b_0
-  - setuptools=72.1.0=py312h06a4308_0
-  - sleef=3.7=h1b44611_0
-  - snappy=1.2.1=ha2e4443_0
-  - sqlite=3.47.0=h9eae976_1
-  - svt-av1=2.3.0=h5888daf_0
-  - sympy=1.13.2=py312h06a4308_0
-  - sysroot_linux-64=2.17=h4a8ded7_18
-  - tbb=2021.13.0=h84d6215_0
-  - tk=8.6.13=noxft_h4845f30_101
-  - torchaudio=2.4.1=cuda_118py312h3b1587d_1
-  - torchvision=0.19.1=cuda118py312h9250042_1
-  - tqdm=4.66.5=py312he106c6f_0
-  - truststore=0.8.0=py312h06a4308_0
-  - typing_extensions=4.11.0=py312h06a4308_0
-  - tzdata=2024b=h04d1e81_0
-  - urllib3=2.2.3=py312h06a4308_0
-  - wayland=1.23.1=h3e06ad9_0
-  - wayland-protocols=1.37=hd8ed1ab_0
-  - wheel=0.44.0=py312h06a4308_0
-  - x264=1!164.3095=h166bdaf_2
-  - x265=3.5=h924138e_3
-  - xorg-libice=1.1.1=hb9d3cd8_1
-  - xorg-libsm=1.2.4=he73a12e_1
-  - xorg-libx11=1.8.10=h4f16b4b_0
-  - xorg-libxau=1.0.11=hb9d3cd8_1
-  - xorg-libxdmcp=1.1.5=hb9d3cd8_0
-  - xorg-libxext=1.3.6=hb9d3cd8_0
-  - xorg-libxfixes=6.0.1=hb9d3cd8_0
-  - xorg-libxrender=0.9.11=hb9d3cd8_1
-  - xorg-xorgproto=2024.1=hb9d3cd8_1
-  - xz=5.4.6=h5eee18b_1
-  - yaml=0.2.5=h7b6447c_0
-  - yaml-cpp=0.8.0=h6a678d5_1
-  - zlib=1.3.1=hb9d3cd8_2
-  - zstandard=0.23.0=py312h2c38b39_0
-  - zstd=1.5.6=ha6fb4c9_0

From 4e10d0b1e2b0799a204cbdbd52fb959516d21b9b Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 10:26:04 -0800
Subject: [PATCH 45/51] .

---
 examples/basic_cuda_example.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/basic_cuda_example.py b/examples/basic_cuda_example.py
index 81b28adb..ba2f375d 100644
--- a/examples/basic_cuda_example.py
+++ b/examples/basic_cuda_example.py
@@ -40,7 +40,7 @@
 
 #. CUDA-enabled pytorch
 #. FFMPEG binaries that support NVDEC-enabled codecs
-#. libnpp
+#. libnpp and nvrtc (these are usually installed when you install the full cuda-toolkit)
 
 
 FFMPEG versions 5, 6 and 7 from conda-forge are built with NVDEC support and
@@ -49,7 +49,7 @@
 .. code-block:: bash
 
    conda install ffmpeg=7 -c conda-forge
-   conda install libnpp -c nvidia
+   conda install libnpp cuda-nvrtc -c nvidia
 """
 
 # %%

From b90bc7f0d7c9caa663cbf6a2f80ac4ead1b4f5f0 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 10:27:45 -0800
Subject: [PATCH 46/51] .

---
 .github/workflows/cpp_tests.yaml        | 2 +-
 .github/workflows/linux_cuda_wheel.yaml | 2 +-
 .github/workflows/macos_wheel.yaml      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/cpp_tests.yaml b/.github/workflows/cpp_tests.yaml
index 453f5bc2..b2b19a78 100644
--- a/.github/workflows/cpp_tests.yaml
+++ b/.github/workflows/cpp_tests.yaml
@@ -3,7 +3,7 @@ name: CPP tests
 on:
   push:
     branches: [ main ]
-  # pull_request:
+  pull_request:
 
 concurrency:
   group: unit-test${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
diff --git a/.github/workflows/linux_cuda_wheel.yaml b/.github/workflows/linux_cuda_wheel.yaml
index 7bb57f76..915c5236 100644
--- a/.github/workflows/linux_cuda_wheel.yaml
+++ b/.github/workflows/linux_cuda_wheel.yaml
@@ -1,7 +1,7 @@
 name: Build and test Linux CUDA wheels
 
 on:
-  #pull_request:
+  pull_request:
   push:
     branches:
       - nightly
diff --git a/.github/workflows/macos_wheel.yaml b/.github/workflows/macos_wheel.yaml
index 45ccdb4d..ef637194 100644
--- a/.github/workflows/macos_wheel.yaml
+++ b/.github/workflows/macos_wheel.yaml
@@ -1,7 +1,7 @@
 name: Build and test MacOS wheel
 
 on:
-  #pull_request:
+  pull_request:
   push:
     branches:
       - nightly

From 2ae49ac0670c5fd0afe7b95239ae256cb467a83e Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 10:29:55 -0800
Subject: [PATCH 47/51] .

---
 .github/workflows/linux_wheel.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/linux_wheel.yaml b/.github/workflows/linux_wheel.yaml
index 5cc75c9a..38f25733 100644
--- a/.github/workflows/linux_wheel.yaml
+++ b/.github/workflows/linux_wheel.yaml
@@ -1,7 +1,7 @@
 name: Build and test Linux wheel
 
 on:
-  #pull_request:
+  pull_request:
   push:
     branches:
       - nightly

From f0444d48a35b515c936c211eb248bdfd51537030 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 11:52:20 -0800
Subject: [PATCH 48/51] .

---
 examples/basic_example.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/examples/basic_example.py b/examples/basic_example.py
index 645df5b0..ba85b32f 100644
--- a/examples/basic_example.py
+++ b/examples/basic_example.py
@@ -19,9 +19,8 @@
 # :ref:`creating_decoder`.
 
 from typing import Optional
-
-import requests
 import torch
+import requests
 
 
 # Video source: https://www.pexels.com/video/dog-eating-854132/
@@ -34,16 +33,16 @@
 raw_video_bytes = response.content
 
 
-def plot(frames: torch.Tensor, title: Optional[str] = None):
+def plot(frames: torch.Tensor, title : Optional[str] = None):
     try:
-        import matplotlib.pyplot as plt
-        from torchvision.transforms.v2.functional import to_pil_image
         from torchvision.utils import make_grid
+        from torchvision.transforms.v2.functional import to_pil_image
+        import matplotlib.pyplot as plt
     except ImportError:
         print("Cannot plot, please run `pip install torchvision matplotlib`")
         return
 
-    plt.rcParams["savefig.bbox"] = "tight"
+    plt.rcParams["savefig.bbox"] = 'tight'
     fig, ax = plt.subplots()
     ax.imshow(to_pil_image(make_grid(frames)))
     ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
@@ -77,7 +76,7 @@ def plot(frames: torch.Tensor, title: Optional[str] = None):
 # ---------------------------------------
 
 first_frame = decoder[0]  # using a single int index
-every_twenty_frame = decoder[0:-1:20]  # using slices
+every_twenty_frame = decoder[0 : -1 : 20]  # using slices
 
 print(f"{first_frame.shape = }")
 print(f"{first_frame.dtype = }")
@@ -107,10 +106,9 @@ def plot(frames: torch.Tensor, title: Optional[str] = None):
 # The decoder is a normal iterable object and can be iterated over like so:
 
 for frame in decoder:
-    assert isinstance(frame, torch.Tensor) and frame.shape == (
-        3,
-        decoder.metadata.height,
-        decoder.metadata.width,
+    assert (
+        isinstance(frame, torch.Tensor)
+        and frame.shape == (3, decoder.metadata.height, decoder.metadata.width)
     )
 
 # %%

From 8d2070a721d2b719ff110e01f95a39bc5bd6ba60 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 12:10:23 -0800
Subject: [PATCH 49/51] .

---
 .github/workflows/docs.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 4a98fe8f..4bd4c48f 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -34,6 +34,9 @@ jobs:
           conda install cuda-nvrtc=12.4 libnpp -c nvidia
           conda install ffmpeg=7 -c conda-forge
           ffmpeg -version
+      - name: Build torchcodec
+        run: |
+          python -m pip install -e ".[dev]" --no-build-isolation -vvv
       - name: Install doc dependencies
         run: |
           cd docs

From 89c380e4ec2a504b423717a5074a9e6503117816 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 12:20:12 -0800
Subject: [PATCH 50/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 4bd4c48f..8c73b513 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -32,7 +32,7 @@ jobs:
       - name: Install FFMPEG and other deps
         run: |
           conda install cuda-nvrtc=12.4 libnpp -c nvidia
-          conda install ffmpeg=7 -c conda-forge
+          conda install ffmpeg=7 cmake pkg-config -c conda-forge
           ffmpeg -version
       - name: Build torchcodec
         run: |

From 150766921b987426d70ae3cf6f12a9f73f15b924 Mon Sep 17 00:00:00 2001
From: Ahmad Sharif <ahmads@fb.com>
Date: Thu, 7 Nov 2024 12:54:57 -0800
Subject: [PATCH 51/51] .

---
 .github/workflows/docs.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 8c73b513..0b870a27 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -31,7 +31,7 @@ jobs:
           pip3 install --pre torch torchvision torchaudio torchcodec --index-url https://download.pytorch.org/whl/nightly/cu124
       - name: Install FFMPEG and other deps
         run: |
-          conda install cuda-nvrtc=12.4 libnpp -c nvidia
+          conda install cuda-nvrtc=12.4 libnpp cuda-nvcc=12.4 cuda-cudart=12.4 -c nvidia
           conda install ffmpeg=7 cmake pkg-config -c conda-forge
           ffmpeg -version
       - name: Build torchcodec