diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..6e53d16
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/yolov8-p2.iml b/.idea/yolov8-p2.iml
new file mode 100644
index 0000000..d0876a7
--- /dev/null
+++ b/.idea/yolov8-p2.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..d5332a5
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,55 @@
+cmake_minimum_required(VERSION 3.10)
+
+project(yolov8)
+
+add_definitions(-std=c++11)
+add_definitions(-DAPI_EXPORTS)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_BUILD_TYPE Debug)
+
+set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
+enable_language(CUDA)
+
+include_directories(${PROJECT_SOURCE_DIR}/include)
+include_directories(${PROJECT_SOURCE_DIR}/plugin)
+
+# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+ message("embed_platform on")
+ include_directories(/usr/local/cuda/targets/aarch64-linux/include)
+ link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
+else()
+ message("embed_platform off")
+ # cuda
+ include_directories(/usr/local/cuda/include)
+ link_directories(/usr/local/cuda/lib64)
+
+ # tensorrt
+ include_directories(/home/lindsay/TensorRT-8.4.1.5/include)
+ link_directories(/home/lindsay/TensorRT-8.4.1.5/lib)
+# include_directories(/home/lindsay/TensorRT-7.2.3.4/include)
+# link_directories(/home/lindsay/TensorRT-7.2.3.4/lib)
+
+
+endif()
+
+add_library(myplugins SHARED ${PROJECT_SOURCE_DIR}/plugin/yololayer.cu)
+target_link_libraries(myplugins nvinfer cudart)
+
+find_package(OpenCV)
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+
+file(GLOB_RECURSE SRCS ${PROJECT_SOURCE_DIR}/src/*.cpp ${PROJECT_SOURCE_DIR}/src/*.cu)
+add_executable(yolov8_det ${PROJECT_SOURCE_DIR}/yolov8_det.cpp ${SRCS})
+
+target_link_libraries(yolov8_det nvinfer)
+target_link_libraries(yolov8_det cudart)
+target_link_libraries(yolov8_det myplugins)
+target_link_libraries(yolov8_det ${OpenCV_LIBS})
+
+add_executable(yolov8_seg ${PROJECT_SOURCE_DIR}/yolov8_seg.cpp ${SRCS})
+target_link_libraries(yolov8_seg nvinfer cudart myplugins ${OpenCV_LIBS})
+
+add_executable(yolov8_cls ${PROJECT_SOURCE_DIR}/yolov8_cls.cpp ${SRCS})
+target_link_libraries(yolov8_cls nvinfer cudart myplugins ${OpenCV_LIBS})
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e1d95b3
--- /dev/null
+++ b/README.md
@@ -0,0 +1,125 @@
+# yolov8
+
+The Pytorch implementation is [ultralytics/yolov8](https://github.com/ultralytics/ultralytics/tree/main/ultralytics).
+
+The tensorrt code is derived from [xiaocao-tian/yolov8_tensorrt](https://github.com/xiaocao-tian/yolov8_tensorrt)
+
+## Contributors
+
+
+
+
+
+
+
+## Requirements
+
+- TensorRT 8.0+
+- OpenCV 3.4.0+
+
+## Different versions of yolov8
+
+Currently, we support yolov8
+
+- For yolov8 , download .pt from [https://github.com/ultralytics/assets/releases](https://github.com/ultralytics/assets/releases), then follow how-to-run in current page.
+
+## Config
+
+- Choose the model n/s/m/l/x from command line arguments.
+- Check more configs in [include/config.h](./include/config.h)
+
+## How to Run, yolov8n as example
+
+1. generate .wts from pytorch with .pt, or download .wts from model zoo
+
+```
+// download https://github.com/ultralytics/assets/releases/yolov8n.pt
+cp {tensorrtx}/yolov8/gen_wts.py {ultralytics}/ultralytics
+cd {ultralytics}/ultralytics
+python gen_wts.py -w yolov8n.pt -o yolov8n.wts -t detect
+// a file 'yolov8n.wts' will be generated.
+```
+
+2. build tensorrtx/yolov8 and run
+
+### Detection
+```
+cd {tensorrtx}/yolov8/
+// update kNumClass in config.h if your model is trained on custom dataset
+mkdir build
+cd build
+cp {ultralytics}/ultralytics/yolov8.wts {tensorrtx}/yolov8/build
+cmake ..
+make
+sudo ./yolov8_det -s [.wts] [.engine] [n/s/m/l/x] // serialize model to plan file
+sudo ./yolov8_det -d [.engine] [image folder] [c/g] // deserialize and run inference, the images in [image folder] will be processed.
+// For example yolov8
+sudo ./yolov8_det -s yolov8n.wts yolov8.engine n
+sudo ./yolov8_det -d yolov8n.engine ../images c //cpu postprocess
+sudo ./yolov8_det -d yolov8n.engine ../images g //gpu postprocess
+```
+
+### Instance Segmentation
+```
+# Build and serialize TensorRT engine
+./yolov8_seg -s yolov8s-seg.wts yolov8s-seg.engine s
+
+# Download the labels file
+wget -O coco.txt https://raw.githubusercontent.com/amikelive/coco-labels/master/coco-labels-2014_2017.txt
+
+# Run inference with labels file
+./yolov8_seg -d yolov8s-seg.engine ../images c coco.txt
+```
+
+### Classification
+```
+cd {tensorrtx}/yolov8/
+// Download inference images
+wget https://github.com/lindsayshuo/infer_pic/blob/main/1709970363.6990473rescls.jpg
+mkdir samples
+cp -r 1709970363.6990473rescls.jpg samples
+// Download ImageNet labels
+wget https://github.com/joannzhang00/ImageNet-dataset-classes-labels/blob/main/imagenet_classes.txt
+
+// update kClsNumClass in config.h if your model is trained on custom dataset
+mkdir build
+cd build
+cp {ultralytics}/ultralytics/yolov8n-cls.wts {tensorrtx}/yolov8/build
+cmake ..
+make
+sudo ./yolov8_cls -s [.wts] [.engine] [n/s/m/l/x] // serialize model to plan file
+sudo ./yolov8_cls -d [.engine] [image folder] // deserialize and run inference, the images in [image folder] will be processed.
+
+// For example yolov8n
+sudo ./yolov8_cls -s yolov8n-cls.wts yolov8-cls.engine n
+sudo ./yolov8_cls -d yolov8n-cls.engine ../samples
+```
+
+4. optional, load and run the tensorrt model in python
+
+```
+// install python-tensorrt, pycuda, etc.
+// ensure the yolov8n.engine and libmyplugins.so have been built
+python yolov8_det.py # Detection
+python yolov8_seg.py # Segmentation
+python yolov8_cls.py # Classification
+```
+
+# INT8 Quantization
+
+1. Prepare calibration images, you can randomly select 1000s images from your train set. For coco, you can also download my calibration images `coco_calib` from [GoogleDrive](https://drive.google.com/drive/folders/1s7jE9DtOngZMzJC1uL307J2MiaGwdRSI?usp=sharing) or [BaiduPan](https://pan.baidu.com/s/1GOm_-JobpyLMAqZWCDUhKg) pwd: a9wh
+
+2. unzip it in yolov8/build
+
+3. set the macro `USE_INT8` in config.h and make
+
+4. serialize the model and test
+
+
+
+
+
+## More Information
+
+See the readme in [home page.](https://github.com/wang-xinyu/tensorrtx)
+
diff --git a/gen_wts.py b/gen_wts.py
new file mode 100644
index 0000000..bad2c28
--- /dev/null
+++ b/gen_wts.py
@@ -0,0 +1,57 @@
+import sys
+import argparse
+import os
+import struct
+import torch
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Convert .pt file to .wts')
+ parser.add_argument('-w', '--weights', required=True,
+ help='Input weights (.pt) file path (required)')
+ parser.add_argument(
+ '-o', '--output', help='Output (.wts) file path (optional)')
+ parser.add_argument(
+ '-t', '--type', type=str, default='detect', choices=['detect', 'cls', 'seg'],
+ help='determines the model is detection/classification')
+ args = parser.parse_args()
+ if not os.path.isfile(args.weights):
+ raise SystemExit('Invalid input file')
+ if not args.output:
+ args.output = os.path.splitext(args.weights)[0] + '.wts'
+ elif os.path.isdir(args.output):
+ args.output = os.path.join(
+ args.output,
+ os.path.splitext(os.path.basename(args.weights))[0] + '.wts')
+ return args.weights, args.output, args.type
+
+
+pt_file, wts_file, m_type = parse_args()
+
+print(f'Generating .wts for {m_type} model')
+
+# Load model
+print(f'Loading {pt_file}')
+
+# Initialize
+device = 'cpu'
+
+# Load model
+model = torch.load(pt_file, map_location=device)['model'].float() # load to FP32
+
+if m_type in ['detect', 'seg']:
+ anchor_grid = model.model[-1].anchors * model.model[-1].stride[..., None, None]
+
+ delattr(model.model[-1], 'anchors')
+
+model.to(device).eval()
+
+with open(wts_file, 'w') as f:
+ f.write('{}\n'.format(len(model.state_dict().keys())))
+ for k, v in model.state_dict().items():
+ vr = v.reshape(-1).cpu().numpy()
+ f.write('{} {} '.format(k, len(vr)))
+ for vv in vr:
+ f.write(' ')
+ f.write(struct.pack('>f', float(vv)).hex())
+ f.write('\n')
diff --git a/include/block.h b/include/block.h
new file mode 100644
index 0000000..6ba5934
--- /dev/null
+++ b/include/block.h
@@ -0,0 +1,24 @@
+#pragma once
+#include