You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[TensorRT] IExecutionContext::executeV2: Error Code 3: API Usage Error (Parameter check failed, condition: nullPtrAllowed. Tensor "output" is bound to nullptr, which is allowed only for an empty input tensor, shape tensor, or an output tensor associated with an IOuputAllocator.)
while running a toy example inference, even though the memory seems to be allocated correctly.
#include<NvInfer.h>
#include<cuda_runtime_api.h>
#include<fstream>
#include<vector>
#include<iostream>
#include<memory>
#defineALIGN_TO(value, alignment) (((value) + (alignment) - 1) & ~((alignment) - 1))
// Function to load the engine from a file
std::vector<char> loadEngineFile(const std::string& engineFilePath) {
std::ifstream file(engineFilePath, std::ios::binary);
if (!file) throwstd::runtime_error("Failed to open engine file.");
return std::vector<char>((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
}
// Logger class implementing TensorRT's ILogger interfaceclassLogger : publicnvinfer1::ILogger {
public:voidlog(Severity severity, constchar* msg) noexceptoverride {
// Only print messages of severity higher than INFOif (severity <= Severity::kINFO) {
std::cerr << "[TensorRT] " << msg << std::endl;
}
}
};
// Function to print elements from a void* blobvoidprintBlob(constvoid* blob, size_t count, size_t elementSize, const std::string& type) {
if (blob == nullptr) {
std::cerr << "Error: blob is null!" << std::endl;
return;
}
// float* host_data = new float[count];// cudaMemcpy(host_data, blob, count * sizeof(float), cudaMemcpyDeviceToHost);if (type == "int") {
constint* data = static_cast<constint*>(blob);
for (size_t i = 0; i < count; ++i) {
std::cout << data[i] << "";
}
}
std::cout << std::endl;
}
// Global logger instance
Logger gLogger;
intgetTotalSize(nvinfer1::Dims& dims) {
int size = 1;
for (int i = 0; i < dims.nbDims; i++) {
size *= dims.d[i];
}
return size;
}
std::vector<nvinfer1::Dims> getTensorShapes(std::vector<std::string> tensorNames, nvinfer1::ICudaEngine* engine) {
std::vector<nvinfer1::Dims> shapesToReturn;
for (auto tensorName : tensorNames) {
nvinfer1::Dims shape = engine->getTensorShape(tensorName.c_str());
shapesToReturn.push_back(shape);
std::string dimensions;
for (int i = 0; i < shape.nbDims; i++) {
dimensions += std::to_string(shape.d[i]);
if (i < shape.nbDims - 1) {
dimensions += ", ";
}
}
gLogger.log(nvinfer1::ILogger::Severity::kINFO, ("\t" + tensorName + ", dim: (" + dimensions + "), total_size = " + std::to_string(getTotalSize(shape))).c_str());
// Let's make sure the data types are all float32
nvinfer1::DataType type = engine->getTensorDataType(tensorName.c_str());
if (type != nvinfer1::DataType::kFLOAT) {
gLogger.log(nvinfer1::ILogger::Severity::kERROR, "Only float32 inputs are supported.");
exit(1);
}
}
return shapesToReturn;
}
void** allocateDeviceMemory(std::vector<nvinfer1::Dims> shapes) {
void** buffers = newvoid*[shapes.size()];
size_t freeMem, totalMem;
for (size_t i = 0; i < shapes.size(); i++) {
auto shape = shapes[i];
size_t totalSize = getTotalSize(shape);
size_t aligned_memory_size = ALIGN_TO(totalSize * sizeof(float), 256);
std::cout << "Aligned memory size: " << aligned_memory_size << std::endl;
cudaMemGetInfo(&freeMem, &totalMem);
std::cout << "Free memory: " << freeMem << " / Total memory: " << totalMem << std::endl;
// Allocate memory
cudaError_t err = cudaMalloc(reinterpret_cast<void**>(&buffers[i]), aligned_memory_size);
if (err != cudaSuccess) {
gLogger.log(nvinfer1::ILogger::Severity::kERROR, "Failed to allocate device memory.");
std::cerr << "cudaMalloc failed for buffer " << i << " with error: " << cudaGetErrorString(err) << std::endl;
exit(1);
}
// Set all values to 0int value = 13;
err = cudaMemset(buffers[i], value, aligned_memory_size);
if (err != cudaSuccess) {
gLogger.log(nvinfer1::ILogger::Severity::kERROR, "Failed to set device memory.");
std::cerr << "cudaMemset failed for buffer " << i << " with error: " << cudaGetErrorString(err) << std::endl;
exit(1);
}
// Verify that the buffer is not nullif (buffers[i] == nullptr) {
gLogger.log(nvinfer1::ILogger::Severity::kERROR, "Buffer is null after allocation.");
std::cerr << "Buffer " << i << " is null after allocation." << std::endl;
exit(1);
}
// Get the number of elements in the shapeint numElements = getTotalSize(shape); // Ensure this correctly gives the number of elements// Print the allocated memory
std::cout << "Printing blob for buffer " << i << std::endl;
int* host_data = newint[10];
cudaMemcpy(host_data, buffers[i], 10 * sizeof(int), cudaMemcpyDeviceToHost);
printBlob(host_data, 10, sizeof(int), "int");
std::cout << "Done printing blob for buffer " << i << std::endl;
}
return buffers;
}
intmain() {
// Initialize TensorRT runtime
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
if (!runtime) {
gLogger.log(nvinfer1::ILogger::Severity::kERROR, "Failed to create TensorRT runtime.");
return1;
}
// Load engine file
std::vector<char> engineData = loadEngineFile("/workspace/onnx_to_tensorrt/super_resolution.engine");
// Deserialize engine
nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(engineData.data(), engineData.size());
// List out inputs / outputs for the baseline model
std::vector<std::string> inputTensorNames = {
"input",
};
std::vector<std::string> outputTensorNames = {
"output",
};
// Make sure inputs are correctgLogger.log(nvinfer1::ILogger::Severity::kINFO, "Input tensor names:");
std::vector<nvinfer1::Dims> inputShapes = getTensorShapes(inputTensorNames, engine);
// Make sure outputs are correctgLogger.log(nvinfer1::ILogger::Severity::kINFO, "Output tensor names:");
std::vector<nvinfer1::Dims> outputShapes = getTensorShapes(outputTensorNames, engine);
// Allocate device memory for inputs and outputsvoid* const* inputBuffers = allocateDeviceMemory(inputShapes);
void* const* outputBuffers = allocateDeviceMemory(outputShapes); // We need this as well// Debug: Log allocated memory addresses and sizes
nvinfer1::IExecutionContext* context = engine->createExecutionContext();
for (size_t i = 0; i < inputShapes.size(); ++i) {
size_t inputSize = getTotalSize(inputShapes[i]);
autoconst name = inputTensorNames[i];
if (!context->setTensorAddress(name.c_str(), inputBuffers[i])) {
gLogger.log(nvinfer1::ILogger::Severity::kERROR, "Failed to set tensor address.");
exit(1);
}
gLogger.log(nvinfer1::ILogger::Severity::kINFO, ("Successfully bound input buffer \"" + inputTensorNames[i] + "\" address: " + std::to_string(reinterpret_cast<uintptr_t>(inputBuffers[i])) + ", size: " + std::to_string(inputSize)).c_str());
int* host_data = newint[10];
cudaMemcpy(host_data, inputBuffers[i], 10 * sizeof(int), cudaMemcpyDeviceToHost);
printBlob(host_data, 10, sizeof(int), "int");
}
for (size_t i = 0; i < outputShapes.size(); ++i) {
size_t outputSize = getTotalSize(outputShapes[i]);
autoconst name = outputTensorNames[i];
if (!context->setTensorAddress(name.c_str(), outputBuffers[i])) {
gLogger.log(nvinfer1::ILogger::Severity::kERROR, "Failed to set tensor address.");
exit(1);
}
gLogger.log(nvinfer1::ILogger::Severity::kINFO, ("Successfully bound output buffer \"" + name + "\" address: " + std::to_string(reinterpret_cast<uintptr_t>(outputBuffers[i])) + ", size: " + std::to_string(outputSize)).c_str());
int* host_data = newint[10];
cudaMemcpy(host_data, outputBuffers[i], 10 * sizeof(int), cudaMemcpyDeviceToHost);
printBlob(host_data, 10, sizeof(int), "int");
}
// Print out the names of the tensorsfor (int i = 0; i < engine->getNbIOTensors(); i++) {
nvinfer1::TensorLocation tensor_location = engine->getTensorLocation(engine->getIOTensorName(i));
std::string device = (tensor_location == nvinfer1::TensorLocation::kDEVICE) ? "GPU" : "CPU";
gLogger.log(nvinfer1::ILogger::Severity::kINFO, ("Tensor name: \"" + std::string(engine->getIOTensorName(i)) + "\", device: " + device).c_str());
}
// Execute inference
context->setDebugSync(true);
std::cout << "Debug state: " << context->getDebugSync() << std::endl;
bool executionSuccessful = context->executeV2(inputBuffers);
if (!executionSuccessful) {
gLogger.log(nvinfer1::ILogger::Severity::kERROR, "Inference execution failed.");
} else {
gLogger.log(nvinfer1::ILogger::Severity::kINFO, "Inference execution successful!!!!!!!!!!!!!!!!!!!!!!!!!");
}
// TODO: Do something with the output data// Copy output data to host// cudaMemcpy(hostOutputData, buffers[outputIndex], outputSize * sizeof(float), cudaMemcpyDeviceToHost);// // Release resourcesfor (size_t i = 0; i < inputShapes.size(); i++) {
cudaFree(inputBuffers[i]);
}
for (size_t i = 0; i < outputShapes.size(); i++) {
cudaFree(outputBuffers[i]);
}
delete[] inputBuffers;
delete[] outputBuffers;
// context->destroy();// engine->destroy();// runtime->destroy();return0;
}
Environment
I'm running the code in a container provided in this repo (but the 10.7 release, check details below), launching it with:
./docker/launch.sh --tag tensorrt-ubuntu20.04-cuda12.6 --gpus all
TensorRT Version: 10.7, I'm at the following commit hash:
To build the code I use the following CMakeLists.txt:
cmake_minimum_required(VERSION 3.16)
project(tensorrt_minimalistic)
# Set C++ standard and optimization flagsset(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_FLAGS"${CMAKE_CXX_FLAGS} -Wall -Ofast -DNDEBUG -Wno-deprecated-declarations")
# For finding FindTensorRT.cmakeset(CMAKE_MODULE_PATH"${CMAKE_SOURCE_DIR}/cmake"${CMAKE_MODULE_PATH})
# Specify the path to TensorRT root directory (modify as needed)if (NOT TensorRT_DIR)
set(TensorRT_DIR /workspace/TensorRT/)
endif()
# Set CUDA root directory (modify as needed)set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda)
# Include TensorRT and CUDAfind_package(TensorRT REQUIRED)
find_package(CUDA REQUIRED)
# Add include directoriesinclude_directories(${CUDA_INCLUDE_DIRS}${TensorRT_INCLUDE_DIRS}include)
# Add the main executableadd_executable(main main.cpp)
# Link TensorRT and CUDA librariestarget_link_libraries(dupa PUBLIC${CUDA_LIBRARIES}${TensorRT_LIBRARIES})
Description
I get an error:
while running a toy example inference, even though the memory seems to be allocated correctly.
Environment
I'm running the code in a container provided in this repo (but the 10.7 release, check details below), launching it with:
TensorRT Version: 10.7, I'm at the following commit hash:
NVIDIA GPU: T1200
NVIDIA Driver Version: 535.183.01
CUDA Version: 12.6
CUDNN Version: 8.9.6.50 (I got it from the
ubuntu-20.04.Dockerfile
: https://github.com/NVIDIA/TensorRT/blob/release/10.7/docker/ubuntu-22.04.Dockerfile)Operating System: Ubuntu 20.04 (not just the container, I mean my host OS is the same)
Python Version (if applicable): Not applicable.
Tensorflow Version (if applicable): Not applicable.
PyTorch Version (if applicable): Not applicable.
Baremetal or Container (if so, version): https://github.com/NVIDIA/TensorRT/blob/release/10.7/docker/ubuntu-22.04.Dockerfile
Relevant Files
I load in the code a
super_resolution.engine
file, I supplied it below.Model link: https://drive.google.com/file/d/1c02RKBQDTJ-mo9WYh93xZy5nokEwJx_r/view?usp=sharing
Steps To Reproduce
To build the code I use the following
CMakeLists.txt
:and build it with the standard:
and then call it with:
which gives me the following output:
Commands or scripts:
./main
(see details about building above)Have you tried the latest release?: No, but I just noticed the 10.8 release. Should I give it a try?
Can this model run on other frameworks? For example run ONNX model with ONNXRuntime (
polygraphy run <model.onnx> --onnxrt
): No idea.The text was updated successfully, but these errors were encountered: