-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit dfd5bf9
Showing
33 changed files
with
5,529 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
cmake_minimum_required(VERSION 3.10) | ||
|
||
project(yolov8) | ||
|
||
add_definitions(-std=c++11) | ||
add_definitions(-DAPI_EXPORTS) | ||
set(CMAKE_CXX_STANDARD 11) | ||
set(CMAKE_BUILD_TYPE Debug) | ||
|
||
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc) | ||
enable_language(CUDA) | ||
|
||
include_directories(${PROJECT_SOURCE_DIR}/include) | ||
include_directories(${PROJECT_SOURCE_DIR}/plugin) | ||
|
||
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different | ||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") | ||
message("embed_platform on") | ||
include_directories(/usr/local/cuda/targets/aarch64-linux/include) | ||
link_directories(/usr/local/cuda/targets/aarch64-linux/lib) | ||
else() | ||
message("embed_platform off") | ||
# cuda | ||
include_directories(/usr/local/cuda/include) | ||
link_directories(/usr/local/cuda/lib64) | ||
|
||
# tensorrt | ||
include_directories(/home/lindsay/TensorRT-8.4.1.5/include) | ||
link_directories(/home/lindsay/TensorRT-8.4.1.5/lib) | ||
# include_directories(/home/lindsay/TensorRT-7.2.3.4/include) | ||
# link_directories(/home/lindsay/TensorRT-7.2.3.4/lib) | ||
|
||
|
||
endif() | ||
|
||
add_library(myplugins SHARED ${PROJECT_SOURCE_DIR}/plugin/yololayer.cu) | ||
target_link_libraries(myplugins nvinfer cudart) | ||
|
||
find_package(OpenCV) | ||
include_directories(${OpenCV_INCLUDE_DIRS}) | ||
|
||
|
||
file(GLOB_RECURSE SRCS ${PROJECT_SOURCE_DIR}/src/*.cpp ${PROJECT_SOURCE_DIR}/src/*.cu) | ||
add_executable(yolov8_det ${PROJECT_SOURCE_DIR}/yolov8_det.cpp ${SRCS}) | ||
|
||
target_link_libraries(yolov8_det nvinfer) | ||
target_link_libraries(yolov8_det cudart) | ||
target_link_libraries(yolov8_det myplugins) | ||
target_link_libraries(yolov8_det ${OpenCV_LIBS}) | ||
|
||
add_executable(yolov8_seg ${PROJECT_SOURCE_DIR}/yolov8_seg.cpp ${SRCS}) | ||
target_link_libraries(yolov8_seg nvinfer cudart myplugins ${OpenCV_LIBS}) | ||
|
||
add_executable(yolov8_cls ${PROJECT_SOURCE_DIR}/yolov8_cls.cpp ${SRCS}) | ||
target_link_libraries(yolov8_cls nvinfer cudart myplugins ${OpenCV_LIBS}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
# yolov8 | ||
|
||
The Pytorch implementation is [ultralytics/yolov8](https://github.com/ultralytics/ultralytics/tree/main/ultralytics). | ||
|
||
The tensorrt code is derived from [xiaocao-tian/yolov8_tensorrt](https://github.com/xiaocao-tian/yolov8_tensorrt) | ||
|
||
## Contributors | ||
|
||
<a href="https://github.com/xiaocao-tian"><img src="https://avatars.githubusercontent.com/u/65889782?v=4?s=48" width="40px;" alt=""/></a> | ||
<a href="https://github.com/lindsayshuo"><img src="https://avatars.githubusercontent.com/u/45239466?v=4?s=48" width="40px;" alt=""/></a> | ||
<a href="https://github.com/xinsuinizhuan"><img src="https://avatars.githubusercontent.com/u/40679769?v=4?s=48" width="40px;" alt=""/></a> | ||
<a href="https://github.com/Rex-LK"><img src="https://avatars.githubusercontent.com/u/74702576?s=48&v=4" width="40px;" alt=""/></a> | ||
<a href="https://github.com/emptysoal"><img src="https://avatars.githubusercontent.com/u/57931586?s=48&v=4" width="40px;" alt=""/></a> | ||
|
||
## Requirements | ||
|
||
- TensorRT 8.0+ | ||
- OpenCV 3.4.0+ | ||
|
||
## Different versions of yolov8 | ||
|
||
Currently, we support yolov8 | ||
|
||
- For yolov8 , download .pt from [https://github.com/ultralytics/assets/releases](https://github.com/ultralytics/assets/releases), then follow how-to-run in current page. | ||
|
||
## Config | ||
|
||
- Choose the model n/s/m/l/x from command line arguments. | ||
- Check more configs in [include/config.h](./include/config.h) | ||
|
||
## How to Run, yolov8n as example | ||
|
||
1. generate .wts from pytorch with .pt, or download .wts from model zoo | ||
|
||
``` | ||
// download https://github.com/ultralytics/assets/releases/yolov8n.pt | ||
cp {tensorrtx}/yolov8/gen_wts.py {ultralytics}/ultralytics | ||
cd {ultralytics}/ultralytics | ||
python gen_wts.py -w yolov8n.pt -o yolov8n.wts -t detect | ||
// a file 'yolov8n.wts' will be generated. | ||
``` | ||
|
||
2. build tensorrtx/yolov8 and run | ||
|
||
### Detection | ||
``` | ||
cd {tensorrtx}/yolov8/ | ||
// update kNumClass in config.h if your model is trained on custom dataset | ||
mkdir build | ||
cd build | ||
cp {ultralytics}/ultralytics/yolov8.wts {tensorrtx}/yolov8/build | ||
cmake .. | ||
make | ||
sudo ./yolov8_det -s [.wts] [.engine] [n/s/m/l/x] // serialize model to plan file | ||
sudo ./yolov8_det -d [.engine] [image folder] [c/g] // deserialize and run inference, the images in [image folder] will be processed. | ||
// For example yolov8 | ||
sudo ./yolov8_det -s yolov8n.wts yolov8.engine n | ||
sudo ./yolov8_det -d yolov8n.engine ../images c //cpu postprocess | ||
sudo ./yolov8_det -d yolov8n.engine ../images g //gpu postprocess | ||
``` | ||
|
||
### Instance Segmentation | ||
``` | ||
# Build and serialize TensorRT engine | ||
./yolov8_seg -s yolov8s-seg.wts yolov8s-seg.engine s | ||
# Download the labels file | ||
wget -O coco.txt https://raw.githubusercontent.com/amikelive/coco-labels/master/coco-labels-2014_2017.txt | ||
# Run inference with labels file | ||
./yolov8_seg -d yolov8s-seg.engine ../images c coco.txt | ||
``` | ||
|
||
### Classification | ||
``` | ||
cd {tensorrtx}/yolov8/ | ||
// Download inference images | ||
wget https://github.com/lindsayshuo/infer_pic/blob/main/1709970363.6990473rescls.jpg | ||
mkdir samples | ||
cp -r 1709970363.6990473rescls.jpg samples | ||
// Download ImageNet labels | ||
wget https://github.com/joannzhang00/ImageNet-dataset-classes-labels/blob/main/imagenet_classes.txt | ||
// update kClsNumClass in config.h if your model is trained on custom dataset | ||
mkdir build | ||
cd build | ||
cp {ultralytics}/ultralytics/yolov8n-cls.wts {tensorrtx}/yolov8/build | ||
cmake .. | ||
make | ||
sudo ./yolov8_cls -s [.wts] [.engine] [n/s/m/l/x] // serialize model to plan file | ||
sudo ./yolov8_cls -d [.engine] [image folder] // deserialize and run inference, the images in [image folder] will be processed. | ||
// For example yolov8n | ||
sudo ./yolov8_cls -s yolov8n-cls.wts yolov8-cls.engine n | ||
sudo ./yolov8_cls -d yolov8n-cls.engine ../samples | ||
``` | ||
|
||
4. optional, load and run the tensorrt model in python | ||
|
||
``` | ||
// install python-tensorrt, pycuda, etc. | ||
// ensure the yolov8n.engine and libmyplugins.so have been built | ||
python yolov8_det.py # Detection | ||
python yolov8_seg.py # Segmentation | ||
python yolov8_cls.py # Classification | ||
``` | ||
|
||
# INT8 Quantization | ||
|
||
1. Prepare calibration images, you can randomly select 1000s images from your train set. For coco, you can also download my calibration images `coco_calib` from [GoogleDrive](https://drive.google.com/drive/folders/1s7jE9DtOngZMzJC1uL307J2MiaGwdRSI?usp=sharing) or [BaiduPan](https://pan.baidu.com/s/1GOm_-JobpyLMAqZWCDUhKg) pwd: a9wh | ||
|
||
2. unzip it in yolov8/build | ||
|
||
3. set the macro `USE_INT8` in config.h and make | ||
|
||
4. serialize the model and test | ||
|
||
<p align="center"> | ||
<img src="https://user-images.githubusercontent.com/15235574/78247927-4d9fac00-751e-11ea-8b1b-704a0aeb3fcf.jpg" height="360px;"> | ||
</p> | ||
|
||
## More Information | ||
|
||
See the readme in [home page.](https://github.com/wang-xinyu/tensorrtx) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import sys | ||
import argparse | ||
import os | ||
import struct | ||
import torch | ||
|
||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser(description='Convert .pt file to .wts') | ||
parser.add_argument('-w', '--weights', required=True, | ||
help='Input weights (.pt) file path (required)') | ||
parser.add_argument( | ||
'-o', '--output', help='Output (.wts) file path (optional)') | ||
parser.add_argument( | ||
'-t', '--type', type=str, default='detect', choices=['detect', 'cls', 'seg'], | ||
help='determines the model is detection/classification') | ||
args = parser.parse_args() | ||
if not os.path.isfile(args.weights): | ||
raise SystemExit('Invalid input file') | ||
if not args.output: | ||
args.output = os.path.splitext(args.weights)[0] + '.wts' | ||
elif os.path.isdir(args.output): | ||
args.output = os.path.join( | ||
args.output, | ||
os.path.splitext(os.path.basename(args.weights))[0] + '.wts') | ||
return args.weights, args.output, args.type | ||
|
||
|
||
pt_file, wts_file, m_type = parse_args() | ||
|
||
print(f'Generating .wts for {m_type} model') | ||
|
||
# Load model | ||
print(f'Loading {pt_file}') | ||
|
||
# Initialize | ||
device = 'cpu' | ||
|
||
# Load model | ||
model = torch.load(pt_file, map_location=device)['model'].float() # load to FP32 | ||
|
||
if m_type in ['detect', 'seg']: | ||
anchor_grid = model.model[-1].anchors * model.model[-1].stride[..., None, None] | ||
|
||
delattr(model.model[-1], 'anchors') | ||
|
||
model.to(device).eval() | ||
|
||
with open(wts_file, 'w') as f: | ||
f.write('{}\n'.format(len(model.state_dict().keys()))) | ||
for k, v in model.state_dict().items(): | ||
vr = v.reshape(-1).cpu().numpy() | ||
f.write('{} {} '.format(k, len(vr))) | ||
for vv in vr: | ||
f.write(' ') | ||
f.write(struct.pack('>f', float(vv)).hex()) | ||
f.write('\n') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#pragma once | ||
#include <map> | ||
#include <vector> | ||
#include <string> | ||
#include "NvInfer.h" | ||
|
||
std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file); | ||
|
||
nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, | ||
nvinfer1::ITensor& input, int ch, int k, int s, int p, std::string lname); | ||
|
||
nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, | ||
nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname); | ||
|
||
nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights>& weightMap, | ||
nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname); | ||
|
||
nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, | ||
nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname); | ||
|
||
nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, | ||
nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname); | ||
|
||
nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition *network, std::vector<nvinfer1::IConcatenationLayer*> dets, bool is_segmentation = false); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#ifndef ENTROPY_CALIBRATOR_H | ||
#define ENTROPY_CALIBRATOR_H | ||
|
||
#include <NvInfer.h> | ||
#include <string> | ||
#include <vector> | ||
#include "macros.h" | ||
|
||
//! \class Int8EntropyCalibrator2 | ||
//! | ||
//! \brief Implements Entropy calibrator 2. | ||
//! CalibrationAlgoType is kENTROPY_CALIBRATION_2. | ||
//! | ||
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 | ||
{ | ||
public: | ||
Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache = true); | ||
virtual ~Int8EntropyCalibrator2(); | ||
int getBatchSize() const TRT_NOEXCEPT override; | ||
bool getBatch(void* bindings[], const char* names[], int nbBindings) TRT_NOEXCEPT override; | ||
const void* readCalibrationCache(size_t& length) TRT_NOEXCEPT override; | ||
void writeCalibrationCache(const void* cache, size_t length) TRT_NOEXCEPT override; | ||
|
||
private: | ||
int batchsize_; | ||
int input_w_; | ||
int input_h_; | ||
int img_idx_; | ||
std::string img_dir_; | ||
std::vector<std::string> img_files_; | ||
size_t input_count_; | ||
std::string calib_table_name_; | ||
const char* input_blob_name_; | ||
bool read_cache_; | ||
void* device_input_; | ||
std::vector<char> calib_cache_; | ||
}; | ||
|
||
#endif // ENTROPY_CALIBRATOR_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
//#define USE_FP16 | ||
#define USE_FP32 | ||
//#define USE_INT8 | ||
|
||
const static char *kInputTensorName = "images"; | ||
const static char *kOutputTensorName = "output"; | ||
const static int kNumClass = 10; | ||
const static int kBatchSize = 1; | ||
const static int kGpuId = 0; | ||
const static int kInputH = 640; | ||
const static int kInputW = 640; | ||
const static float kNmsThresh = 0.45f; | ||
const static float kConfThresh = 0.5f; | ||
const static int kMaxInputImageSize = 3000 * 3000; | ||
const static int kMaxNumOutputBbox = 1000; | ||
|
||
|
||
// Classfication model's number of classes | ||
constexpr static int kClsNumClass = 1000; | ||
// Classfication model's input shape | ||
constexpr static int kClsInputH = 224; | ||
constexpr static int kClsInputW = 224; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#ifndef TRTX_CUDA_UTILS_H_ | ||
#define TRTX_CUDA_UTILS_H_ | ||
|
||
#include <cuda_runtime_api.h> | ||
|
||
#ifndef CUDA_CHECK | ||
#define CUDA_CHECK(callstr)\ | ||
{\ | ||
cudaError_t error_code = callstr;\ | ||
if (error_code != cudaSuccess) {\ | ||
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__;\ | ||
assert(0);\ | ||
}\ | ||
} | ||
#endif // CUDA_CHECK | ||
|
||
#endif // TRTX_CUDA_UTILS_H_ | ||
|
Oops, something went wrong.