update to main branch from tensorrtx

lindsayshuo · Jun 28, 2024 · 1e004ac · 1e004ac
1 parent 6ec8460
commit 1e004ac
Show file tree

Hide file tree

Showing 19 changed files with 2,187 additions and 564 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -27,8 +27,8 @@ else()
   # tensorrt
   include_directories(/home/lindsay/TensorRT-8.4.1.5/include)
   link_directories(/home/lindsay/TensorRT-8.4.1.5/lib)
-#  include_directories(/home/lindsay/TensorRT-7.2.3.4/include)
-#  link_directories(/home/lindsay/TensorRT-7.2.3.4/lib)
+  #  include_directories(/home/lindsay/TensorRT-7.2.3.4/include)
+  #  link_directories(/home/lindsay/TensorRT-7.2.3.4/lib)
 
 
 endif()
@@ -51,5 +51,9 @@ target_link_libraries(yolov8_det ${OpenCV_LIBS})
 add_executable(yolov8_seg ${PROJECT_SOURCE_DIR}/yolov8_seg.cpp ${SRCS})
 target_link_libraries(yolov8_seg nvinfer cudart myplugins ${OpenCV_LIBS})
 
+
+add_executable(yolov8_pose ${PROJECT_SOURCE_DIR}/yolov8_pose.cpp ${SRCS})
+target_link_libraries(yolov8_pose nvinfer cudart myplugins ${OpenCV_LIBS})
+
 add_executable(yolov8_cls ${PROJECT_SOURCE_DIR}/yolov8_cls.cpp ${SRCS})
-target_link_libraries(yolov8_cls nvinfer cudart myplugins ${OpenCV_LIBS})
+target_link_libraries(yolov8_cls nvinfer cudart myplugins ${OpenCV_LIBS})
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# yolov8
+# YOLOv8
 
 The Pytorch implementation is [ultralytics/yolov8](https://github.com/ultralytics/ultralytics/tree/main/ultralytics).
 
@@ -11,6 +11,7 @@ The tensorrt code is derived from [xiaocao-tian/yolov8_tensorrt](https://github.
 <a href="https://github.com/xinsuinizhuan"><img src="https://avatars.githubusercontent.com/u/40679769?v=4?s=48" width="40px;" alt=""/></a>
 <a href="https://github.com/Rex-LK"><img src="https://avatars.githubusercontent.com/u/74702576?s=48&v=4" width="40px;" alt=""/></a>
 <a href="https://github.com/emptysoal"><img src="https://avatars.githubusercontent.com/u/57931586?s=48&v=4" width="40px;" alt=""/></a>
+<a href="https://github.com/ChangjunDAI"><img src="https://avatars.githubusercontent.com/u/65420228?s=48&v=4" width="40px;" alt=""/></a>
 
 ## Requirements
 
@@ -33,11 +34,15 @@ Currently, we support yolov8
 
 ```
 // download https://github.com/ultralytics/assets/releases/yolov8n.pt
-// download https://github.com/lindsayshuo/yolov8-p2/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt (only for  10 cls p2 model)
+// download https://github.com/lindsayshuo/yolov8-p2/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt (only for 10 cls p2 model)
 cp {tensorrtx}/yolov8/gen_wts.py {ultralytics}/ultralytics
 cd {ultralytics}/ultralytics
 python gen_wts.py -w yolov8n.pt -o yolov8n.wts -t detect
 // a file 'yolov8n.wts' will be generated.
+
+
+// For p2 model
+// download https://github.com/lindsayshuo/yolov8_p2_tensorrtx/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt (only for 10 cls p2 model)
 python gen_wts.py -w VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt -o VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts -t detect (only for  10 cls p2 model)
 // a file 'VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts' will be generated.
 ```
@@ -55,13 +60,15 @@ cmake ..
 make
 sudo ./yolov8_det -s [.wts] [.engine] [n/s/m/l/x/n2/s2/m2/l2/x2/n6/s6/m6/l6/x6]  // serialize model to plan file
 sudo ./yolov8_det -d [.engine] [image folder]  [c/g] // deserialize and run inference, the images in [image folder] will be processed.
-// For example yolov8
+
+// For example yolov8n
 sudo ./yolov8_det -s yolov8n.wts yolov8.engine n
 sudo ./yolov8_det -d yolov8n.engine ../images c //cpu postprocess
 sudo ./yolov8_det -d yolov8n.engine ../images g //gpu postprocess
 
-for p2 model:
-change the  "const static int kNumClass" in config.h to 10;
+
+// For p2 model:
+// change the  "const static int kNumClass" in config.h to 10;
 sudo ./yolov8_det -s VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.engine x2
 wget https://github.com/lindsayshuo/yolov8-p2/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last/0000008_01999_d_0000040.jpg
 cp -r 0000008_01999_d_0000040.jpg ../images
@@ -105,14 +112,35 @@ sudo ./yolov8_cls -s yolov8n-cls.wts yolov8-cls.engine n
 sudo ./yolov8_cls -d yolov8n-cls.engine ../samples
 ```
 
+
+### Pose Estimation
+```
+cd {tensorrtx}/yolov8/
+// update "kNumClass = 1" in config.h
+mkdir build
+cd build
+cp {ultralytics}/ultralytics/yolov8-pose.wts {tensorrtx}/yolov8/build
+cmake ..
+make
+sudo ./yolov8_pose -s [.wts] [.engine] [n/s/m/l/x/n2/s2/m2/l2/x2/n6/s6/m6/l6/x6]  // serialize model to plan file
+sudo ./yolov8_pose -d [.engine] [image folder]  [c/g] // deserialize and run inference, the images in [image folder] will be processed.
+
+// For example yolov8-pose
+sudo ./yolov8_pose -s yolov8n-pose.wts yolov8n-pose.engine n
+sudo ./yolov8_pose -d yolov8n-pose.engine ../images c //cpu postprocess
+sudo ./yolov8_pose -d yolov8n-pose.engine ../images g //gpu postprocess
+```
+
+
 4. optional, load and run the tensorrt model in python
 
 ```
 // install python-tensorrt, pycuda, etc.
 // ensure the yolov8n.engine and libmyplugins.so have been built
-python yolov8_det.py  # Detection
-python yolov8_seg.py  # Segmentation
-python yolov8_cls.py  # Classification
+python yolov8_det_trt.py  # Detection
+python yolov8_seg_trt.py  # Segmentation
+python yolov8_cls_trt.py  # Classification
+python yolov8_pose_trt.py  # Pose Estimation
 ```
 
 # INT8 Quantization
@@ -121,7 +149,7 @@ python yolov8_cls.py  # Classification
 
 2. unzip it in yolov8/build
 
-3. set the macro `USE_INT8` in config.h and make
+3. set the macro `USE_INT8` in config.h, change `kInputQuantizationFolder` into your image folder path and make
 
 4. serialize the model and test
 

diff --git a/gen_wts.py b/gen_wts.py
@@ -1,4 +1,4 @@
-import sys
+import sys  # noqa: F401
 import argparse
 import os
 import struct
@@ -12,7 +12,7 @@ def parse_args():
     parser.add_argument(
         '-o', '--output', help='Output (.wts) file path (optional)')
     parser.add_argument(
-        '-t', '--type', type=str, default='detect', choices=['detect', 'cls', 'seg'],
+        '-t', '--type', type=str, default='detect', choices=['detect', 'cls', 'seg', 'pose'],
         help='determines the model is detection/classification')
     args = parser.parse_args()
     if not os.path.isfile(args.weights):
@@ -39,7 +39,7 @@ def parse_args():
 # Load model
 model = torch.load(pt_file, map_location=device)['model'].float()  # load to FP32
 
-if m_type in ['detect', 'seg']:
+if m_type in ['detect', 'seg', 'pose']:
     anchor_grid = model.model[-1].anchors * model.model[-1].stride[..., None, None]
 
     delattr(model.model[-1], 'anchors')

diff --git a/include/block.h b/include/block.h
@@ -1,25 +1,30 @@
 #pragma once
 #include <map>
-#include <vector>
 #include <string>
+#include <vector>
 #include "NvInfer.h"
 
 std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file);
 
-nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
-nvinfer1::ITensor& input, int ch, int k, int s, int p, std::string lname);
-
-nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
-nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname);
+nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network,
+                                        std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
+                                        int ch, int k, int s, int p, std::string lname);
 
-nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights>& weightMap,
-nvinfer1::ITensor& input, int c1, int c2, int n, bool shortcut, float e, std::string lname);
+nvinfer1::IElementWiseLayer* C2F(nvinfer1::INetworkDefinition* network,
+                                 std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input, int c1,
+                                 int c2, int n, bool shortcut, float e, std::string lname);
 
-nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
-nvinfer1::ITensor& input, int c1, int c2, int k, std::string lname);
+nvinfer1::IElementWiseLayer* C2(nvinfer1::INetworkDefinition* network,
+                                std::map<std::string, nvinfer1::Weights>& weightMap, nvinfer1::ITensor& input, int c1,
+                                int c2, int n, bool shortcut, float e, std::string lname);
 
-nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap, 
-nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname);
+nvinfer1::IElementWiseLayer* SPPF(nvinfer1::INetworkDefinition* network,
+                                  std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input, int c1,
+                                  int c2, int k, std::string lname);
 
-nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition* network, std::vector<nvinfer1::IConcatenationLayer*> dets, const int* px_arry, int px_arry_num, bool is_segmentation);
+nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
+                             nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname);
 
+nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition* network,
+                                       std::vector<nvinfer1::IConcatenationLayer*> dets, const int* px_arry,
+                                       int px_arry_num, bool is_segmentation, bool is_pose);
diff --git a/include/config.h b/include/config.h
@@ -2,18 +2,21 @@
 //#define USE_FP32
 //#define USE_INT8
 
-const static char *kInputTensorName = "images";
-const static char *kOutputTensorName = "output";
-const static int kNumClass = 10;
+const static char* kInputTensorName = "images";
+const static char* kOutputTensorName = "output";
+const static int kNumClass = 80;
+const static int kNumberOfPoints = 17;  // number of keypoints total
 const static int kBatchSize = 1;
 const static int kGpuId = 0;
 const static int kInputH = 640;
 const static int kInputW = 640;
 const static float kNmsThresh = 0.45f;
 const static float kConfThresh = 0.5f;
+const static float kConfThreshKeypoints = 0.5f;  // keypoints confidence
 const static int kMaxInputImageSize = 3000 * 3000;
 const static int kMaxNumOutputBbox = 1000;
-
+//Quantization input image folder path
+const static char* kInputQuantizationFolder = "./coco_calib";
 
 // Classfication model's number of classes
 constexpr static int kClsNumClass = 1000;

diff --git a/include/model.h b/include/model.h
@@ -11,12 +11,21 @@ nvinfer1::IHostMemory* buildEngineYolov8DetP6(nvinfer1::IBuilder* builder, nvinf
                                               nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw,
                                               int& max_channels);
 
-nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder,
-nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw, int& max_channels);
+nvinfer1::IHostMemory* buildEngineYolov8DetP2(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
+                                              nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw,
+                                              int& max_channels);
 
 nvinfer1::IHostMemory* buildEngineYolov8Cls(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
                                             nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw);
 
 nvinfer1::IHostMemory* buildEngineYolov8Seg(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
                                             nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw,
                                             int& max_channels);
+
+nvinfer1::IHostMemory* buildEngineYolov8Pose(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
+                                             nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw,
+                                             int& max_channels);
+
+nvinfer1::IHostMemory* buildEngineYolov8PoseP6(nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config,
+                                               nvinfer1::DataType dt, const std::string& wts_path, float& gd, float& gw,
+                                               int& max_channels);
diff --git a/include/postprocess.h b/include/postprocess.h
@@ -1,23 +1,30 @@
 #pragma once
 
-#include "types.h"
-#include "NvInfer.h"
 #include <opencv2/opencv.hpp>
+#include "NvInfer.h"
+#include "types.h"
 
 cv::Rect get_rect(cv::Mat& img, float bbox[4]);
 
-void nms(std::vector<Detection>& res, float *output, float conf_thresh, float nms_thresh = 0.5);
+void nms(std::vector<Detection>& res, float* output, float conf_thresh, float nms_thresh = 0.5);
+
+void batch_nms(std::vector<std::vector<Detection>>& batch_res, float* output, int batch_size, int output_size,
+               float conf_thresh, float nms_thresh = 0.5);
 
-void batch_nms(std::vector<std::vector<Detection>>& batch_res, float *output, int batch_size, int output_size, float conf_thresh, float nms_thresh = 0.5);
+void draw_bbox(std::vector<cv::Mat>& img_batch, std::vector<std::vector<Detection>>& res_batch);
 
-void draw_bbox(std::vector<cv::Mat> &img_batch, std::vector<std::vector<Detection>> &res_batch);
+void draw_bbox_keypoints_line(std::vector<cv::Mat>& img_batch, std::vector<std::vector<Detection>>& res_batch);
 
-void batch_process(std::vector<std::vector<Detection>> &res_batch, const float* decode_ptr_host, int batch_size, int bbox_element, const std::vector<cv::Mat>& img_batch);
+void batch_process(std::vector<std::vector<Detection>>& res_batch, const float* decode_ptr_host, int batch_size,
+                   int bbox_element, const std::vector<cv::Mat>& img_batch);
 
-void process_decode_ptr_host(std::vector<Detection> &res, const float* decode_ptr_host, int bbox_element, cv::Mat& img, int count);
+void process_decode_ptr_host(std::vector<Detection>& res, const float* decode_ptr_host, int bbox_element, cv::Mat& img,
+                             int count);
 
-void cuda_decode(float* predict, int num_bboxes, float confidence_threshold,float* parray,int max_objects, cudaStream_t stream);
+void cuda_decode(float* predict, int num_bboxes, float confidence_threshold, float* parray, int max_objects,
+                 cudaStream_t stream);
 
 void cuda_nms(float* parray, float nms_threshold, int max_objects, cudaStream_t stream);
 
-void draw_mask_bbox(cv::Mat& img, std::vector<Detection>& dets, std::vector<cv::Mat>& masks, std::unordered_map<int, std::string>& labels_map);
+void draw_mask_bbox(cv::Mat& img, std::vector<Detection>& dets, std::vector<cv::Mat>& masks,
+                    std::unordered_map<int, std::string>& labels_map);
diff --git a/include/types.h b/include/types.h
@@ -2,15 +2,17 @@
 #include "config.h"
 
 struct alignas(float) Detection {
-  //center_x center_y w h
-  float bbox[4];
-  float conf;  // bbox_conf * cls_conf
-  float class_id;
-  float mask[32];
+    //center_x center_y w h
+    float bbox[4];
+    float conf;  // bbox_conf * cls_conf
+    float class_id;
+    float mask[32];
+    float keypoints[51];  // 17*3 keypoints
 };
 
 struct AffineMatrix {
     float value[6];
 };
 
-const int bbox_element = sizeof(AffineMatrix) / sizeof(float)+1;      // left, top, right, bottom, confidence, class, keepflag
+const int bbox_element =
+        sizeof(AffineMatrix) / sizeof(float) + 1;  // left, top, right, bottom, confidence, class, keepflag