Skip to content

Commit a471b2a

Browse files
authored
TensorRT 10.13.2 OSS Release (#4556)
Signed-off-by: Kevin Chen <[email protected]>
1 parent b8db91e commit a471b2a

File tree

81 files changed

+1247
-2025
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+1247
-2025
lines changed

.github/workflows/docker-image.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ on:
88

99
jobs:
1010

11-
build-ubuntu2004:
11+
build-ubuntu2204:
1212

1313
runs-on: ubuntu-latest
1414

1515
steps:
1616
- uses: actions/checkout@v3
17-
- name: Build TensorRT-OSS ubuntu20.04 container
18-
run: docker build . --file docker/ubuntu-20.04.Dockerfile --build-arg uid=1000 --build-arg gid=1000 --tag tensorrt-ubuntu20.04:$(date +%s)
17+
- name: Build TensorRT-OSS ubuntu22.04 container
18+
run: docker build . --file docker/ubuntu-22.04.Dockerfile --build-arg uid=1000 --build-arg gid=1000 --tag tensorrt-ubuntu22.04:$(date +%s)

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# TensorRT OSS Release Changelog
22

3+
## 10.13.2 GA - 2025-8-18
4+
- Added support for CUDA 13.0, dropped support for CUDA 11.X
5+
- Dropped support for Ubuntu 20.04
6+
- Dropped support for Python versions < 3.10 for samples and demos
7+
38
## 10.13.0 GA - 2025-7-24
49
- Plugin changes
510
- Fixed a division-by-zero error in geluPlugin that occured when the bias is omitted.

CMakeLists.txt

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,24 @@ endif()
6666

6767
set(CMAKE_SKIP_BUILD_RPATH True)
6868

69+
# Set CUDA architectures before enabling CUDA language to avoid detection issues in containers
70+
if (DEFINED GPU_ARCHS AND NOT GPU_ARCHS STREQUAL "")
71+
message(STATUS "GPU_ARCHS defined as ${GPU_ARCHS}. Setting CUDA architectures for SM ${GPU_ARCHS}")
72+
separate_arguments(GPU_ARCHS)
73+
foreach(SM IN LISTS GPU_ARCHS)
74+
list(APPEND CMAKE_CUDA_ARCHITECTURES "${SM}")
75+
endforeach()
76+
else()
77+
# Set default architectures for container builds where auto-detection fails
78+
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90)
79+
80+
if(CUDA_VERSION VERSION_GREATER_EQUAL 12.8)
81+
list(APPEND CMAKE_CUDA_ARCHITECTURES 100 120)
82+
endif()
83+
84+
message(STATUS "Setting default CUDA architectures for container build: ${CMAKE_CUDA_ARCHITECTURES}")
85+
endif()
86+
6987
project(TensorRT
7088
LANGUAGES CXX CUDA
7189
VERSION ${TRT_VERSION}
@@ -177,23 +195,6 @@ endif()
177195
set(CUDA_LIBRARIES ${CUDART_LIB})
178196

179197
############################################################################################
180-
# CUDA targets
181-
182-
if (DEFINED GPU_ARCHS)
183-
message(STATUS "GPU_ARCHS defined as ${GPU_ARCHS}. Generating CUDA code for SM ${GPU_ARCHS}")
184-
separate_arguments(GPU_ARCHS)
185-
foreach(SM IN LISTS GPU_ARCHS)
186-
list(APPEND CMAKE_CUDA_ARCHITECTURES "${SM}")
187-
endforeach()
188-
else()
189-
list(APPEND CMAKE_CUDA_ARCHITECTURES 72 75 80 86 87 89 90)
190-
191-
if(CUDA_VERSION VERSION_GREATER_EQUAL 12.8)
192-
list(APPEND CMAKE_CUDA_ARCHITECTURES 100 120)
193-
endif()
194-
195-
message(STATUS "GPU_ARCHS is not defined. Generating CUDA code for default SMs: ${CMAKE_CUDA_ARCHITECTURES}")
196-
endif()
197198
set(BERT_GENCODES)
198199
# Generate SASS for each architecture
199200
foreach(arch ${CMAKE_CUDA_ARCHITECTURES})

README.md

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,20 @@ To build the TensorRT-OSS components, you will first need the following software
3232

3333
**TensorRT GA build**
3434

35-
- TensorRT v10.13.0.35
35+
- TensorRT v10.13.2.6
3636
- Available from direct download links listed below
3737

3838
**System Packages**
3939

4040
- [CUDA](https://developer.nvidia.com/cuda-toolkit)
4141
- Recommended versions:
42+
- cuda-13.0.0
4243
- cuda-12.9.0
43-
- cuda-11.8.0
4444
- [CUDNN (optional)](https://developer.nvidia.com/cudnn)
4545
- cuDNN 8.9
4646
- [GNU make](https://ftp.gnu.org/gnu/make/) >= v4.1
47-
- [cmake](https://github.com/Kitware/CMake/releases) >= v3.13
48-
- [python](https://www.python.org/downloads/) >= v3.8, <= v3.10.x
47+
- [cmake](https://github.com/Kitware/CMake/releases) >= v3.31
48+
- [python](https://www.python.org/downloads/) >= v3.10, <= v3.13.x
4949
- [pip](https://pypi.org/project/pip/#history) >= v19.0
5050
- Essential utilities
5151
- [git](https://git-scm.com/downloads), [pkg-config](https://www.freedesktop.org/wiki/Software/pkg-config/), [wget](https://www.gnu.org/software/wget/faq.html#download)
@@ -86,24 +86,24 @@ To build the TensorRT-OSS components, you will first need the following software
8686

8787
Else download and extract the TensorRT GA build from [NVIDIA Developer Zone](https://developer.nvidia.com) with the direct links below:
8888

89-
- [TensorRT 10.13.0.35 for CUDA 11.8, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.0/tars/TensorRT-10.13.0.35.Linux.x86_64-gnu.cuda-11.8.tar.gz)
90-
- [TensorRT 10.13.0.35 for CUDA 12.9, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.0/tars/TensorRT-10.13.0.35.Linux.x86_64-gnu.cuda-12.9.tar.gz)
91-
- [TensorRT 10.13.0.35 for CUDA 11.8, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.0/zip/TensorRT-10.13.0.35.Windows.win10.cuda-11.8.zip)
92-
- [TensorRT 10.13.0.35 for CUDA 12.9, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.0/zip/TensorRT-10.13.0.35.Windows.win10.cuda-12.9.zip)
89+
- [TensorRT 10.13.2.6 for CUDA 13.0, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-10.13.2.6.Linux.x86_64-gnu.cuda-13.0.tar.gz)
90+
- [TensorRT 10.13.2.6 for CUDA 12.9, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-10.13.2.6.Linux.x86_64-gnu.cuda-12.9.tar.gz)
91+
- [TensorRT 10.13.2.6 for CUDA 13.0, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/zip/TensorRT-10.13.2.6.Windows.win10.cuda-13.0.zip)
92+
- [TensorRT 10.13.2.6 for CUDA 12.9, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/zip/TensorRT-10.13.2.6.Windows.win10.cuda-12.9.zip)
9393

94-
**Example: Ubuntu 20.04 on x86-64 with cuda-12.9**
94+
**Example: Ubuntu 22.04 on x86-64 with cuda-13.0**
9595

9696
```bash
9797
cd ~/Downloads
98-
tar -xvzf TensorRT-10.13.0.35.Linux.x86_64-gnu.cuda-12.9.tar.gz
99-
export TRT_LIBPATH=`pwd`/TensorRT-10.13.0.35
98+
tar -xvzf TensorRT-10.13.2.6.Linux.x86_64-gnu.cuda-13.0.tar.gz
99+
export TRT_LIBPATH=`pwd`/TensorRT-10.13.2.6
100100
```
101101

102102
**Example: Windows on x86-64 with cuda-12.9**
103103

104104
```powershell
105-
Expand-Archive -Path TensorRT-10.13.0.35.Windows.win10.cuda-12.9.zip
106-
$env:TRT_LIBPATH="$pwd\TensorRT-10.13.0.35\lib"
105+
Expand-Archive -Path TensorRT-10.13.2.6.Windows.win10.cuda-12.9.zip
106+
$env:TRT_LIBPATH="$pwd\TensorRT-10.13.2.6\lib"
107107
```
108108

109109
## Setting Up The Build Environment
@@ -112,10 +112,10 @@ For Linux platforms, we recommend that you generate a docker container for build
112112

113113
1. #### Generate the TensorRT-OSS build container.
114114

115-
**Example: Ubuntu 20.04 on x86-64 with cuda-12.9 (default)**
115+
**Example: Ubuntu 22.04 on x86-64 with cuda-13.0 (default)**
116116

117117
```bash
118-
./docker/build.sh --file docker/ubuntu-20.04.Dockerfile --tag tensorrt-ubuntu20.04-cuda12.9
118+
./docker/build.sh --file docker/ubuntu-22.04.Dockerfile --tag tensorrt-ubuntu22.04-cuda13.0
119119
```
120120

121121
**Example: Rockylinux8 on x86-64 with cuda-12.9**
@@ -137,9 +137,9 @@ For Linux platforms, we recommend that you generate a docker container for build
137137
```
138138

139139
2. #### Launch the TensorRT-OSS build container.
140-
**Example: Ubuntu 20.04 build container**
140+
**Example: Ubuntu 22.04 build container**
141141
```bash
142-
./docker/launch.sh --tag tensorrt-ubuntu20.04-cuda12.9 --gpus all
142+
./docker/launch.sh --tag tensorrt-ubuntu22.04-cuda13.0 --gpus all
143143
```
144144
> NOTE:
145145
> <br> 1. Use the `--tag` corresponding to build container generated in Step 1.
@@ -199,21 +199,21 @@ For Linux platforms, we recommend that you generate a docker container for build
199199
msbuild TensorRT.sln /property:Configuration=Release -m:$env:NUMBER_OF_PROCESSORS
200200
```
201201

202-
> NOTE: The default CUDA version used by CMake is 12.9.0. To override this, for example to 11.8, append `-DCUDA_VERSION=11.8` to the cmake command.
202+
> NOTE: The default CUDA version used by CMake is 13.0. To override this, for example to 12.9, append `-DCUDA_VERSION=12.9` to the cmake command.
203203
204204
- Required CMake build arguments are:
205205
- `TRT_LIB_DIR`: Path to the TensorRT installation directory containing libraries.
206206
- `TRT_OUT_DIR`: Output directory where generated build artifacts will be copied.
207207
- Optional CMake build arguments:
208208
- `CMAKE_BUILD_TYPE`: Specify if binaries generated are for release or debug (contain debug symbols). Values consists of [`Release`] | `Debug`
209-
- `CUDA_VERSION`: The version of CUDA to target, for example [`11.7.1`].
210-
- `CUDNN_VERSION`: The version of cuDNN to target, for example [`8.6`].
211-
- `PROTOBUF_VERSION`: The version of Protobuf to use, for example [`3.0.0`]. Note: Changing this will not configure CMake to use a system version of Protobuf, it will configure CMake to download and try building that version.
209+
- `CUDA_VERSION`: The version of CUDA to target, for example [`12.9.9`].
210+
- `CUDNN_VERSION`: The version of cuDNN to target, for example [`8.9`].
211+
- `PROTOBUF_VERSION`: The version of Protobuf to use, for example [`3.20.1`]. Note: Changing this will not configure CMake to use a system version of Protobuf, it will configure CMake to download and try building that version.
212212
- `CMAKE_TOOLCHAIN_FILE`: The path to a toolchain file for cross compilation.
213213
- `BUILD_PARSERS`: Specify if the parsers should be built, for example [`ON`] | `OFF`. If turned OFF, CMake will try to find precompiled versions of the parser libraries to use in compiling samples. First in `${TRT_LIB_DIR}`, then on the system. If the build type is Debug, then it will prefer debug builds of the libraries before release versions if available.
214214
- `BUILD_PLUGINS`: Specify if the plugins should be built, for example [`ON`] | `OFF`. If turned OFF, CMake will try to find a precompiled version of the plugin library to use in compiling samples. First in `${TRT_LIB_DIR}`, then on the system. If the build type is Debug, then it will prefer debug builds of the libraries before release versions if available.
215215
- `BUILD_SAMPLES`: Specify if the samples should be built, for example [`ON`] | `OFF`.
216-
- `GPU_ARCHS`: GPU (SM) architectures to target. By default we generate CUDA code for all major SMs. Specific SM versions can be specified here as a quoted space-separated list to reduce compilation time and binary size. Table of compute capabilities of NVIDIA GPUs can be found [here](https://developer.nvidia.com/cuda-gpus). Examples: - NVidia A100: `-DGPU_ARCHS="80"` - Tesla T4, GeForce RTX 2080: `-DGPU_ARCHS="75"` - Titan V, Tesla V100: `-DGPU_ARCHS="70"` - Multiple SMs: `-DGPU_ARCHS="80 75"`
216+
- `GPU_ARCHS`: GPU (SM) architectures to target. By default we generate CUDA code for all major SMs. Specific SM versions can be specified here as a quoted space-separated list to reduce compilation time and binary size. Table of compute capabilities of NVIDIA GPUs can be found [here](https://developer.nvidia.com/cuda-gpus). Examples: - NVidia A100: `-DGPU_ARCHS="80"` - RTX 50 series: `-DGPU_ARCHS="120"` - Multiple SMs: `-DGPU_ARCHS="80 120"`
217217
- `TRT_PLATFORM_ID`: Bare-metal build (unlike containerized cross-compilation). Currently supported options: `x86_64` (default).
218218

219219
# References

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
10.13.0.35
1+
10.13.2.6

cmake/toolchains/cmake_aarch64_cross.toolchain

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,11 @@ set(TRT_PLATFORM_ID "aarch64")
2222

2323
set(CUDA_PLATFORM_ID "sbsa-linux")
2424

25-
set(CMAKE_C_COMPILER /usr/bin/aarch64-linux-gnu-gcc-8)
26-
set(CMAKE_CXX_COMPILER /usr/bin/aarch64-linux-gnu-g++-8)
25+
set(CMAKE_C_COMPILER /usr/bin/aarch64-linux-gnu-gcc)
26+
set(CMAKE_CXX_COMPILER /usr/bin/aarch64-linux-gnu-g++)
27+
set(CMAKE_C_COMPILER_ID "GNU")
28+
set(CMAKE_CXX_COMPILER_ID "GNU")
29+
set(CMAKE_CXX_COMPILE_FEATURES cxx_std_17)
2730

2831
set(CMAKE_C_FLAGS "" CACHE STRING "" FORCE)
2932
set(CMAKE_CXX_FLAGS "" CACHE STRING "" FORCE)

demo/Diffusion/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ This demo application ("demoDiffusion") showcases the acceleration of Stable Dif
77
### Clone the TensorRT OSS repository
88

99
```bash
10-
git clone [email protected]:NVIDIA/TensorRT.git -b release/10.13 --single-branch
10+
git clone [email protected]:NVIDIA/TensorRT.git -b release/10.13.2 --single-branch
1111
cd TensorRT
1212
```
1313

@@ -19,7 +19,7 @@ Install nvidia-docker using [these intructions](https://docs.nvidia.com/datacent
1919
docker run --rm -it --gpus all -v $PWD:/workspace nvcr.io/nvidia/pytorch:25.01-py3 /bin/bash
2020
```
2121

22-
NOTE: The demo supports CUDA>=11.8
22+
NOTE: The demo supports CUDA>=12
2323

2424
### Install latest TensorRT release
2525

@@ -49,7 +49,7 @@ onnx 1.15.0
4949
onnx-graphsurgeon 0.5.2
5050
onnxruntime 1.16.3
5151
polygraphy 0.49.9
52-
tensorrt 10.13.0.35
52+
tensorrt 10.13.2.6
5353
tokenizers 0.13.3
5454
torch 2.2.0
5555
transformers 4.42.2

demo/Diffusion/demo_controlnet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
import controlnet_aux
2121
import torch
22-
from cuda import cudart
22+
from cuda.bindings import runtime as cudart
2323
from PIL import Image
2424

2525
from demo_diffusion import dd_argparse

demo/Diffusion/demo_controlnet_sd35.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import argparse
1919

2020
import torch
21-
from cuda import cudart
21+
from cuda.bindings import runtime as cudart
2222
from PIL import Image
2323

2424
from demo_diffusion import dd_argparse

demo/Diffusion/demo_diffusion/engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import numpy as np
2525
import tensorrt as trt
2626
import torch
27-
from cuda import cudart
27+
from cuda.bindings import runtime as cudart
2828
from polygraphy.backend.common import bytes_from_path
2929
from polygraphy.backend.trt import (
3030
engine_from_bytes,

0 commit comments

Comments
 (0)