NVIDIA
diff --git a/‎.github/workflows/docker-image.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/docker-image.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 5 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 18 additions & 17 deletions b/‎CMakeLists.txt‎
Lines changed: 18 additions & 17 deletions
diff --git a/‎README.md‎
Lines changed: 22 additions & 22 deletions b/‎README.md‎
Lines changed: 22 additions & 22 deletions
diff --git a/‎VERSION‎
Lines changed: 1 addition & 1 deletion b/‎VERSION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmake/toolchains/cmake_aarch64_cross.toolchain‎
Lines changed: 5 additions & 2 deletions b/‎cmake/toolchains/cmake_aarch64_cross.toolchain‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎demo/Diffusion/README.md‎
Lines changed: 3 additions & 3 deletions b/‎demo/Diffusion/README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎demo/Diffusion/demo_controlnet.py‎
Lines changed: 1 addition & 1 deletion b/‎demo/Diffusion/demo_controlnet.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎demo/Diffusion/demo_controlnet_sd35.py‎
Lines changed: 1 addition & 1 deletion b/‎demo/Diffusion/demo_controlnet_sd35.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎demo/Diffusion/demo_diffusion/engine.py‎
Lines changed: 1 addition & 1 deletion b/‎demo/Diffusion/demo_diffusion/engine.py‎
Lines changed: 1 addition & 1 deletion
@@ -8,11 +8,11 @@ on:
 
 jobs:
 
-  build-ubuntu2004:
+  build-ubuntu2204:
 
     runs-on: ubuntu-latest
 
     steps:
     - uses: actions/checkout@v3
-    - name: Build TensorRT-OSS ubuntu20.04 container
-      run: docker build . --file docker/ubuntu-20.04.Dockerfile --build-arg uid=1000 --build-arg gid=1000 --tag tensorrt-ubuntu20.04:$(date +%s)
+    - name: Build TensorRT-OSS ubuntu22.04 container
+      run: docker build . --file docker/ubuntu-22.04.Dockerfile --build-arg uid=1000 --build-arg gid=1000 --tag tensorrt-ubuntu22.04:$(date +%s)
@@ -1,5 +1,10 @@
 # TensorRT OSS Release Changelog
 
+## 10.13.2 GA - 2025-8-18
+- Added support for CUDA 13.0, dropped support for CUDA 11.X
+- Dropped support for Ubuntu 20.04
+- Dropped support for Python versions < 3.10 for samples and demos
+
 ## 10.13.0 GA - 2025-7-24
 - Plugin changes
   - Fixed a division-by-zero error in geluPlugin that occured when the bias is omitted.
 
@@ -66,6 +66,24 @@ endif()
 
 set(CMAKE_SKIP_BUILD_RPATH True)
 
+# Set CUDA architectures before enabling CUDA language to avoid detection issues in containers
+if (DEFINED GPU_ARCHS AND NOT GPU_ARCHS STREQUAL "")
+  message(STATUS "GPU_ARCHS defined as ${GPU_ARCHS}. Setting CUDA architectures for SM ${GPU_ARCHS}")
+  separate_arguments(GPU_ARCHS)
+  foreach(SM IN LISTS GPU_ARCHS)
+    list(APPEND CMAKE_CUDA_ARCHITECTURES "${SM}")
+  endforeach()
+else()
+  # Set default architectures for container builds where auto-detection fails
+  set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90)
+
+  if(CUDA_VERSION VERSION_GREATER_EQUAL 12.8)
+      list(APPEND CMAKE_CUDA_ARCHITECTURES 100 120)
+  endif()
+
+  message(STATUS "Setting default CUDA architectures for container build: ${CMAKE_CUDA_ARCHITECTURES}")
+endif()
+
 project(TensorRT
         LANGUAGES CXX CUDA
         VERSION ${TRT_VERSION}
@@ -177,23 +195,6 @@ endif()
 set(CUDA_LIBRARIES ${CUDART_LIB})
 
 ############################################################################################
-# CUDA targets
-
-if (DEFINED GPU_ARCHS)
-  message(STATUS "GPU_ARCHS defined as ${GPU_ARCHS}. Generating CUDA code for SM ${GPU_ARCHS}")
-  separate_arguments(GPU_ARCHS)
-  foreach(SM IN LISTS GPU_ARCHS)
-    list(APPEND CMAKE_CUDA_ARCHITECTURES "${SM}")
-  endforeach()
-else()
-  list(APPEND CMAKE_CUDA_ARCHITECTURES 72 75 80 86 87 89 90)
-  
-  if(CUDA_VERSION VERSION_GREATER_EQUAL 12.8)
-      list(APPEND CMAKE_CUDA_ARCHITECTURES 100 120)
-  endif()
-
-  message(STATUS "GPU_ARCHS is not defined. Generating CUDA code for default SMs: ${CMAKE_CUDA_ARCHITECTURES}")
-endif()
 set(BERT_GENCODES)
 # Generate SASS for each architecture
 foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
 
@@ -32,20 +32,20 @@ To build the TensorRT-OSS components, you will first need the following software
 
 **TensorRT GA build**
 
-- TensorRT v10.13.0.35
+- TensorRT v10.13.2.6
   - Available from direct download links listed below
 
 **System Packages**
 
 - [CUDA](https://developer.nvidia.com/cuda-toolkit)
   - Recommended versions:
+  - cuda-13.0.0
   - cuda-12.9.0
-  - cuda-11.8.0
 - [CUDNN (optional)](https://developer.nvidia.com/cudnn)
   - cuDNN 8.9
 - [GNU make](https://ftp.gnu.org/gnu/make/) >= v4.1
-- [cmake](https://github.com/Kitware/CMake/releases) >= v3.13
-- [python](https://www.python.org/downloads/) >= v3.8, <= v3.10.x
+- [cmake](https://github.com/Kitware/CMake/releases) >= v3.31
+- [python](https://www.python.org/downloads/) >= v3.10, <= v3.13.x
 - [pip](https://pypi.org/project/pip/#history) >= v19.0
 - Essential utilities
   - [git](https://git-scm.com/downloads), [pkg-config](https://www.freedesktop.org/wiki/Software/pkg-config/), [wget](https://www.gnu.org/software/wget/faq.html#download)
@@ -86,24 +86,24 @@ To build the TensorRT-OSS components, you will first need the following software
 
    Else download and extract the TensorRT GA build from [NVIDIA Developer Zone](https://developer.nvidia.com) with the direct links below:
 
-   - [TensorRT 10.13.0.35 for CUDA 11.8, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.0/tars/TensorRT-10.13.0.35.Linux.x86_64-gnu.cuda-11.8.tar.gz)
-   - [TensorRT 10.13.0.35 for CUDA 12.9, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.0/tars/TensorRT-10.13.0.35.Linux.x86_64-gnu.cuda-12.9.tar.gz)
-   - [TensorRT 10.13.0.35 for CUDA 11.8, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.0/zip/TensorRT-10.13.0.35.Windows.win10.cuda-11.8.zip)
-   - [TensorRT 10.13.0.35 for CUDA 12.9, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.0/zip/TensorRT-10.13.0.35.Windows.win10.cuda-12.9.zip)
+   - [TensorRT 10.13.2.6 for CUDA 13.0, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-10.13.2.6.Linux.x86_64-gnu.cuda-13.0.tar.gz)
+   - [TensorRT 10.13.2.6 for CUDA 12.9, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-10.13.2.6.Linux.x86_64-gnu.cuda-12.9.tar.gz)
+   - [TensorRT 10.13.2.6 for CUDA 13.0, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/zip/TensorRT-10.13.2.6.Windows.win10.cuda-13.0.zip)
+   - [TensorRT 10.13.2.6 for CUDA 12.9, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/zip/TensorRT-10.13.2.6.Windows.win10.cuda-12.9.zip)
 
-   **Example: Ubuntu 20.04 on x86-64 with cuda-12.9**
+   **Example: Ubuntu 22.04 on x86-64 with cuda-13.0**
 
    ```bash
    cd ~/Downloads
-   tar -xvzf TensorRT-10.13.0.35.Linux.x86_64-gnu.cuda-12.9.tar.gz
-   export TRT_LIBPATH=`pwd`/TensorRT-10.13.0.35
+   tar -xvzf TensorRT-10.13.2.6.Linux.x86_64-gnu.cuda-13.0.tar.gz
+   export TRT_LIBPATH=`pwd`/TensorRT-10.13.2.6
    ```
 
    **Example: Windows on x86-64 with cuda-12.9**
 
    ```powershell
-   Expand-Archive -Path TensorRT-10.13.0.35.Windows.win10.cuda-12.9.zip
-   $env:TRT_LIBPATH="$pwd\TensorRT-10.13.0.35\lib"
+   Expand-Archive -Path TensorRT-10.13.2.6.Windows.win10.cuda-12.9.zip
+   $env:TRT_LIBPATH="$pwd\TensorRT-10.13.2.6\lib"
    ```
 
 ## Setting Up The Build Environment
@@ -112,10 +112,10 @@ For Linux platforms, we recommend that you generate a docker container for build
 
 1. #### Generate the TensorRT-OSS build container.
 
-   **Example: Ubuntu 20.04 on x86-64 with cuda-12.9 (default)**
+   **Example: Ubuntu 22.04 on x86-64 with cuda-13.0 (default)**
 
    ```bash
-   ./docker/build.sh --file docker/ubuntu-20.04.Dockerfile --tag tensorrt-ubuntu20.04-cuda12.9
+   ./docker/build.sh --file docker/ubuntu-22.04.Dockerfile --tag tensorrt-ubuntu22.04-cuda13.0
    ```
 
    **Example: Rockylinux8 on x86-64 with cuda-12.9**
@@ -137,9 +137,9 @@ For Linux platforms, we recommend that you generate a docker container for build
    ```
 
 2. #### Launch the TensorRT-OSS build container.
-   **Example: Ubuntu 20.04 build container**
+   **Example: Ubuntu 22.04 build container**
    ```bash
-   ./docker/launch.sh --tag tensorrt-ubuntu20.04-cuda12.9 --gpus all
+   ./docker/launch.sh --tag tensorrt-ubuntu22.04-cuda13.0 --gpus all
    ```
    > NOTE:
    > <br> 1. Use the `--tag` corresponding to build container generated in Step 1.
@@ -199,21 +199,21 @@ For Linux platforms, we recommend that you generate a docker container for build
   msbuild TensorRT.sln /property:Configuration=Release -m:$env:NUMBER_OF_PROCESSORS
   ```
 
-  > NOTE: The default CUDA version used by CMake is 12.9.0. To override this, for example to 11.8, append `-DCUDA_VERSION=11.8` to the cmake command.
+  > NOTE: The default CUDA version used by CMake is 13.0. To override this, for example to 12.9, append `-DCUDA_VERSION=12.9` to the cmake command.
 
 - Required CMake build arguments are:
   - `TRT_LIB_DIR`: Path to the TensorRT installation directory containing libraries.
   - `TRT_OUT_DIR`: Output directory where generated build artifacts will be copied.
 - Optional CMake build arguments:
   - `CMAKE_BUILD_TYPE`: Specify if binaries generated are for release or debug (contain debug symbols). Values consists of [`Release`] | `Debug`
-  - `CUDA_VERSION`: The version of CUDA to target, for example [`11.7.1`].
-  - `CUDNN_VERSION`: The version of cuDNN to target, for example [`8.6`].
-  - `PROTOBUF_VERSION`: The version of Protobuf to use, for example [`3.0.0`]. Note: Changing this will not configure CMake to use a system version of Protobuf, it will configure CMake to download and try building that version.
+  - `CUDA_VERSION`: The version of CUDA to target, for example [`12.9.9`].
+  - `CUDNN_VERSION`: The version of cuDNN to target, for example [`8.9`].
+  - `PROTOBUF_VERSION`: The version of Protobuf to use, for example [`3.20.1`]. Note: Changing this will not configure CMake to use a system version of Protobuf, it will configure CMake to download and try building that version.
   - `CMAKE_TOOLCHAIN_FILE`: The path to a toolchain file for cross compilation.
   - `BUILD_PARSERS`: Specify if the parsers should be built, for example [`ON`] | `OFF`. If turned OFF, CMake will try to find precompiled versions of the parser libraries to use in compiling samples. First in `${TRT_LIB_DIR}`, then on the system. If the build type is Debug, then it will prefer debug builds of the libraries before release versions if available.
   - `BUILD_PLUGINS`: Specify if the plugins should be built, for example [`ON`] | `OFF`. If turned OFF, CMake will try to find a precompiled version of the plugin library to use in compiling samples. First in `${TRT_LIB_DIR}`, then on the system. If the build type is Debug, then it will prefer debug builds of the libraries before release versions if available.
   - `BUILD_SAMPLES`: Specify if the samples should be built, for example [`ON`] | `OFF`.
-  - `GPU_ARCHS`: GPU (SM) architectures to target. By default we generate CUDA code for all major SMs. Specific SM versions can be specified here as a quoted space-separated list to reduce compilation time and binary size. Table of compute capabilities of NVIDIA GPUs can be found [here](https://developer.nvidia.com/cuda-gpus). Examples: - NVidia A100: `-DGPU_ARCHS="80"` - Tesla T4, GeForce RTX 2080: `-DGPU_ARCHS="75"` - Titan V, Tesla V100: `-DGPU_ARCHS="70"` - Multiple SMs: `-DGPU_ARCHS="80 75"`
+  - `GPU_ARCHS`: GPU (SM) architectures to target. By default we generate CUDA code for all major SMs. Specific SM versions can be specified here as a quoted space-separated list to reduce compilation time and binary size. Table of compute capabilities of NVIDIA GPUs can be found [here](https://developer.nvidia.com/cuda-gpus). Examples: - NVidia A100: `-DGPU_ARCHS="80"` - RTX 50 series: `-DGPU_ARCHS="120"` - Multiple SMs: `-DGPU_ARCHS="80 120"`
   - `TRT_PLATFORM_ID`: Bare-metal build (unlike containerized cross-compilation). Currently supported options: `x86_64` (default).
 
 # References
 
@@ -1 +1 @@
-10.13.0.35
+10.13.2.6
@@ -22,8 +22,11 @@ set(TRT_PLATFORM_ID "aarch64")
 
 set(CUDA_PLATFORM_ID "sbsa-linux")
 
-set(CMAKE_C_COMPILER /usr/bin/aarch64-linux-gnu-gcc-8)
-set(CMAKE_CXX_COMPILER /usr/bin/aarch64-linux-gnu-g++-8)
+set(CMAKE_C_COMPILER /usr/bin/aarch64-linux-gnu-gcc)
+set(CMAKE_CXX_COMPILER /usr/bin/aarch64-linux-gnu-g++)
+set(CMAKE_C_COMPILER_ID "GNU")
+set(CMAKE_CXX_COMPILER_ID "GNU")
+set(CMAKE_CXX_COMPILE_FEATURES cxx_std_17)
 
 set(CMAKE_C_FLAGS "" CACHE STRING "" FORCE)
 set(CMAKE_CXX_FLAGS "" CACHE STRING "" FORCE)
 
@@ -7,7 +7,7 @@ This demo application ("demoDiffusion") showcases the acceleration of Stable Dif
 ### Clone the TensorRT OSS repository
 
 ```bash
-git clone [email protected]:NVIDIA/TensorRT.git -b release/10.13 --single-branch
+git clone [email protected]:NVIDIA/TensorRT.git -b release/10.13.2 --single-branch
 cd TensorRT
 ```
 
@@ -19,7 +19,7 @@ Install nvidia-docker using [these intructions](https://docs.nvidia.com/datacent
 docker run --rm -it --gpus all -v $PWD:/workspace nvcr.io/nvidia/pytorch:25.01-py3 /bin/bash
 ```
 
-NOTE: The demo supports CUDA>=11.8
+NOTE: The demo supports CUDA>=12
 
 ### Install latest TensorRT release
 
@@ -49,7 +49,7 @@ onnx                1.15.0
 onnx-graphsurgeon   0.5.2
 onnxruntime         1.16.3
 polygraphy          0.49.9
-tensorrt            10.13.0.35
+tensorrt            10.13.2.6
 tokenizers          0.13.3
 torch               2.2.0
 transformers        4.42.2
 
@@ -19,7 +19,7 @@
 
 import controlnet_aux
 import torch
-from cuda import cudart
+from cuda.bindings import runtime as cudart
 from PIL import Image
 
 from demo_diffusion import dd_argparse
 
@@ -18,7 +18,7 @@
 import argparse
 
 import torch
-from cuda import cudart
+from cuda.bindings import runtime as cudart
 from PIL import Image
 
 from demo_diffusion import dd_argparse
 
@@ -24,7 +24,7 @@
 import numpy as np
 import tensorrt as trt
 import torch
-from cuda import cudart
+from cuda.bindings import runtime as cudart
 from polygraphy.backend.common import bytes_from_path
 from polygraphy.backend.trt import (
     engine_from_bytes,