Skip to content

Commit 657cd4e

Browse files
committed
release yolov8face
1 parent fd393d0 commit 657cd4e

18 files changed

+1020
-47
lines changed

FaceAlgorithm/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
add_subdirectory ("face_detect")
33
add_subdirectory ("face_detect_yolov5face")
44
add_subdirectory ("face_detect_yolov7face")
5+
add_subdirectory ("face_detect_yolov8face")
56
add_subdirectory ("face_recognition")
67
add_subdirectory ("mask_recognition")
78
add_subdirectory ("gender_age_recognition")

FaceAlgorithm/face_detect_yolov7face/detector_yolov7face.h

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,25 +35,25 @@
3535

3636

3737
using namespace nvinfer1;
38-
struct affineMatrix //letter_box 仿射变换矩阵
39-
{
40-
float i2d[6]; //仿射变换正变换
41-
float d2i[6]; //仿射变换逆变换
42-
};
43-
struct bbox
44-
{
45-
float x1,x2,y1,y2;
46-
float landmarks[10]; //5个关键点
47-
float score;
48-
};
49-
const float color_list[5][3] =
50-
{
51-
{255, 0, 0},
52-
{0, 255, 0},
53-
{0, 0, 255},
54-
{0, 255, 255},
55-
{255,255,0},
56-
};
38+
// struct affineMatrix //letter_box 仿射变换矩阵
39+
// {
40+
// float i2d[6]; //仿射变换正变换
41+
// float d2i[6]; //仿射变换逆变换
42+
// };
43+
// struct bbox
44+
// {
45+
// float x1,x2,y1,y2;
46+
// float landmarks[10]; //5个关键点
47+
// float score;
48+
// };
49+
// const float color_list[5][3] =
50+
// {
51+
// {255, 0, 0},
52+
// {0, 255, 0},
53+
// {0, 0, 255},
54+
// {0, 255, 255},
55+
// {255,255,0},
56+
// };
5757
class Detector_Yolov7Face
5858
{
5959
public:

FaceAlgorithm/face_detect_yolov7face/yolov7face_postprocess.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ static __global__ void yolov7face_decode_kernel(float* predict,int NUM_BOX_ELEME
7878
*pout_item++ = left; //1
7979
*pout_item++ = top; //2
8080
*pout_item++ = right; //3
81-
*pout_item++ = bottom; //4
81+
*pout_item++= bottom; //4
8282
*pout_item++ = confidence; //5
8383
*pout_item++ = label; //6
8484
*pout_item++ = 1; //7 1 = keep, 0 = ignore
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
cmake_minimum_required (VERSION 2.6)
2+
3+
project(Yolov8FaceDetect)
4+
set(CMAKE_CXX_FLAGS "-std=c++11 -Wall -O0 -g -rdynamic -g2 -ggdb")
5+
6+
7+
SET(LIBRARY_OUTPUT_PATH ${CMAKE_SOURCE_DIR}/lib)
8+
9+
find_package(OpenCV REQUIRED)
10+
find_package(CUDA REQUIRED)
11+
12+
13+
include_directories(${CMAKE_SOURCE_DIR}/include)
14+
include_directories(${CMAKE_SOURCE_DIR}/common)
15+
include_directories(${OpenCV_INCLUDE_DIRS})
16+
include_directories(/usr/local/cuda/include)
17+
18+
file(GLOB lib_header "${CMAKE_CURRENT_SOURCE_DIR}/*.h")
19+
file(GLOB common_header "${CMAKE_SOURCE_DIR}/common/*.h")
20+
21+
file(GLOB lib_src "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
22+
file(GLOB common_src "${CMAKE_SOURCE_DIR}/common/*.cpp")
23+
24+
25+
include_directories(${TensorRT_INCLUDE})
26+
link_directories(${TensorRT_LIB})
27+
28+
cuda_add_library(${PROJECT_NAME} SHARED ${lib_src} ${common_src} yolov8face_preprocess.cu yolov8face_postprocess.cu)
29+
30+
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
31+
target_link_libraries(${PROJECT_NAME} "/usr/local/cuda/targets/aarch64-linux/lib/libcudart.so")
32+
else()
33+
target_link_libraries(${PROJECT_NAME} "/usr/local/cuda/lib64/libcudart.so")
34+
endif()
35+
target_link_libraries(${PROJECT_NAME} "${TensorRT_LIB}/libnvinfer.so")
36+
target_link_libraries(${PROJECT_NAME} "${TensorRT_LIB}/libnvonnxparser.so")
37+
target_link_libraries(${PROJECT_NAME} "${TensorRT_LIB}/libnvinfer_plugin.so")
38+
target_link_libraries(${PROJECT_NAME} "${TensorRT_LIB}/libcudnn.so")
39+
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
#include "detector_yolov8face.h"
2+
using namespace std;
3+
4+
Detector_Yolov8Face::Detector_Yolov8Face()
5+
{
6+
}
7+
Detector_Yolov8Face::~Detector_Yolov8Face()
8+
{
9+
}
10+
HZFLAG Detector_Yolov8Face::InitDetector_Yolov8Face(Config& config)
11+
{
12+
13+
this->conf_thresh=config.yolov8face_confidence_thresh;
14+
this->nms_thresh=config.yolov8face_nms_thresh;
15+
this->batch_size=config.yolov8face_detect_bs;
16+
17+
this->NUM_CLASSES=1;
18+
this->CKPT_NUM=5;
19+
this->NUM_BOX_ELEMENT=7+CKPT_NUM*2;
20+
21+
this->INPUT_BLOB_NAME = "images";
22+
this->OUTPUT_BLOB_NAME = "output0";
23+
cudaSetDevice(config.gpu_id);
24+
std::string directory;
25+
const size_t last_slash_idx=config.Yolov8FactDetectModelPath.rfind(".onnx");
26+
if (std::string::npos != last_slash_idx)
27+
{
28+
directory = config.Yolov8FactDetectModelPath.substr(0, last_slash_idx);
29+
}
30+
std::string out_engine=directory+"_batch="+std::to_string(config.yolov8face_detect_bs)+".engine";
31+
bool enginemodel=model_exists(out_engine);
32+
if (!enginemodel)
33+
{
34+
std::cout << "Building engine, please wait for a while..." << std::endl;
35+
bool wts_model=model_exists(config.Yolov8FactDetectModelPath);
36+
if (!wts_model)
37+
{
38+
std::cout<<"yolov8s-face.onnx is not Exist!!!Please Check!"<<std::endl;
39+
return HZ_WITHOUTMODEL;
40+
}
41+
Onnx2Ttr onnx2trt;
42+
//IHostMemory* modelStream{ nullptr };
43+
onnx2trt.onnxToTRTModel(gLogger,config.Yolov8FactDetectModelPath.c_str(),config.yolov8face_detect_bs,out_engine.c_str());
44+
}
45+
size_t size{0};
46+
std::ifstream file(out_engine, std::ios::binary);//out_engine"/home/pcb/FaceRecognition_Linux_Release/yolov8face_test/yolov8-face-tensorrt/yolov8s-face_batch=1.engine"
47+
if (file.good())
48+
{
49+
file.seekg(0, file.end);
50+
size = file.tellg();
51+
file.seekg(0, file.beg);
52+
trtModelStream = new char[size];
53+
assert(trtModelStream);
54+
file.read(trtModelStream, size);
55+
file.close();
56+
}
57+
else
58+
{
59+
std::cout<<"yolov8s-face.engine model file not exist!"<<std::endl;
60+
return HZ_WITHOUTMODEL;
61+
}
62+
63+
this->runtime = createInferRuntime(gLogger);
64+
assert(runtime != nullptr);
65+
this->engine = runtime->deserializeCudaEngine(trtModelStream, size);
66+
assert(engine != nullptr);
67+
this->context = engine->createExecutionContext();
68+
assert(context != nullptr);
69+
delete[] trtModelStream;
70+
assert(engine->getNbBindings() == 2);
71+
this->inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
72+
this->outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
73+
assert(inputIndex == 0);
74+
assert(outputIndex == 1);
75+
76+
//input nchw
77+
auto input_dims = engine->getBindingDimensions(0);
78+
this->INPUT_W = input_dims.d[3];
79+
this->INPUT_H = input_dims.d[2];
80+
81+
//1*20*8400
82+
auto output_dims = engine->getBindingDimensions(1);
83+
this->OUTPUT_ELEMENT=output_dims.d[1];
84+
this->OUTPUT_CANDIDATES = output_dims.d[2];
85+
this->OUTPUT_SIZE=this->OUTPUT_ELEMENT*this->OUTPUT_CANDIDATES;
86+
//1*20*8400
87+
88+
89+
// Create GPU buffers on device
90+
CHECK(cudaMalloc(&this->buffers[inputIndex], config.yolov8face_detect_bs * 3 * INPUT_H * INPUT_W * sizeof(float)));
91+
CHECK(cudaMalloc(&this->buffers[outputIndex], config.yolov8face_detect_bs * OUTPUT_SIZE * sizeof(float)));
92+
// Create stream
93+
CHECK(cudaStreamCreate(&stream));
94+
// prepare input data cache in pinned memory
95+
CHECK(cudaMallocHost((void**)&img_host, config.yolov8face_detect_bs*MAX_IMAGE_INPUT_SIZE_THRESH * 3*sizeof(uint8_t)));
96+
// prepare input data cache in device memory
97+
CHECK(cudaMalloc((void**)&img_device, config.yolov8face_detect_bs*MAX_IMAGE_INPUT_SIZE_THRESH * 3*sizeof(uint8_t)));
98+
99+
//postprocess input data cache in device memory
100+
CHECK(cudaMalloc(&decode_ptr_device,sizeof(float)*(1+MAX_OBJECTS*NUM_BOX_ELEMENT)));
101+
102+
CHECK(cudaMalloc((void**)&pre_predict, OUTPUT_SIZE * sizeof(float)));
103+
104+
CHECK(cudaMallocHost(&affine_matrix_d2i_host,sizeof(float)*6));
105+
106+
CHECK(cudaMalloc(&transpose_device, OUTPUT_SIZE * sizeof(float)));
107+
108+
this->affine_matrix_d2i_device=new float*[batch_size];
109+
this->decode_ptr_host=new float*[batch_size];
110+
for (size_t i = 0; i < batch_size; i++)
111+
{
112+
this->decode_ptr_host[i]= new float[(1+MAX_OBJECTS*NUM_BOX_ELEMENT)];
113+
CHECK(cudaMalloc(&this->affine_matrix_d2i_device[i],sizeof(float)*6));
114+
}
115+
return HZ_SUCCESS;
116+
}
117+
118+
HZFLAG Detector_Yolov8Face::Detect_Yolov8Face(std::vector<cv::Mat>&ImgVec,std::vector<std::vector<Det>>& dets)
119+
{
120+
// prepare input data ---------------------------
121+
int detector_batchsize=ImgVec.size();
122+
float* buffer_idx = (float*)this->buffers[inputIndex];
123+
for (int b = 0; b < detector_batchsize; b++)
124+
{
125+
if (ImgVec[b].empty()||ImgVec[b].data==NULL)
126+
{
127+
continue;
128+
}
129+
//proprecess
130+
affineMatrix afmt;
131+
getd2i(afmt,cv::Size(INPUT_W,INPUT_H),cv::Size(ImgVec[b].cols,ImgVec[b].rows));
132+
size_t size_image = ImgVec[b].cols * ImgVec[b].rows * 3*sizeof(uint8_t);
133+
size_t size_image_dst = INPUT_H * INPUT_W * 3*sizeof(uint8_t);
134+
memcpy(affine_matrix_d2i_host,afmt.d2i,sizeof(afmt.d2i));
135+
memcpy(img_host, ImgVec[b].data, size_image);
136+
CHECK(cudaMemcpy(img_device, img_host, size_image, cudaMemcpyHostToDevice));
137+
CHECK(cudaMemcpy(affine_matrix_d2i_device[b],affine_matrix_d2i_host,sizeof(afmt.d2i),cudaMemcpyHostToDevice));
138+
yolov8face_preprocess_kernel_img(img_device, ImgVec[b].cols, ImgVec[b].rows, buffer_idx, INPUT_W, INPUT_H,affine_matrix_d2i_device[b], stream);
139+
buffer_idx += size_image_dst;
140+
}
141+
//inference
142+
//(*context).enqueue(detector_batchsize,(void**)this->buffers, stream, nullptr);
143+
(*context).enqueueV2((void**)this->buffers, stream, nullptr);
144+
145+
//postprocess
146+
float *predict = (float *)this->buffers[outputIndex];
147+
for (size_t i = 0; i < detector_batchsize; i++)
148+
{
149+
CHECK(cudaMemsetAsync(decode_ptr_device,0,sizeof(int),stream));
150+
151+
CHECK(cudaMemcpyAsync(pre_predict,predict,OUTPUT_SIZE * sizeof(float),cudaMemcpyDeviceToDevice, stream));
152+
//transpose [1 20 8400] convert to [1 8400 0]
153+
yolov8_transpose(pre_predict, this->OUTPUT_CANDIDATES,this->OUTPUT_ELEMENT,transpose_device, stream);
154+
155+
yolov8face_decode_kernel_invoker(transpose_device,NUM_BOX_ELEMENT,OUTPUT_CANDIDATES,NUM_CLASSES,CKPT_NUM,
156+
this->conf_thresh,affine_matrix_d2i_device[i],decode_ptr_device,MAX_OBJECTS,stream); //cuda decode
157+
yolov8face_nms_kernel_invoker(decode_ptr_device,this->nms_thresh, MAX_OBJECTS, stream,NUM_BOX_ELEMENT); //cuda nms
158+
CHECK(cudaMemcpyAsync(decode_ptr_host[i],decode_ptr_device,sizeof(float)*(1+MAX_OBJECTS*NUM_BOX_ELEMENT),cudaMemcpyDeviceToHost,stream));
159+
predict+=OUTPUT_SIZE;
160+
}
161+
cudaStreamSynchronize(stream);
162+
for (size_t k = 0; k < detector_batchsize; k++)
163+
{
164+
std::vector<Det>det;
165+
int count = std::min((int)*decode_ptr_host[k],MAX_OBJECTS);
166+
for (int i = 0; i<count;i++)
167+
{
168+
int basic_pos = 1+i*NUM_BOX_ELEMENT;
169+
int keep_flag= decode_ptr_host[k][basic_pos+6];
170+
if (keep_flag==1)
171+
{
172+
Det det_temp;
173+
det_temp.bbox.xmin = decode_ptr_host[k][basic_pos+0];
174+
det_temp.bbox.ymin = decode_ptr_host[k][basic_pos+1];
175+
det_temp.bbox.xmax = decode_ptr_host[k][basic_pos+2];
176+
det_temp.bbox.ymax = decode_ptr_host[k][basic_pos+3];
177+
det_temp.confidence= decode_ptr_host[k][basic_pos+4];
178+
int landmark_pos = basic_pos+7;
179+
for (int id = 0; id<CKPT_NUM; id+=1)
180+
{
181+
det_temp.key_points.push_back(decode_ptr_host[k][landmark_pos+2*id]);
182+
det_temp.key_points.push_back(decode_ptr_host[k][landmark_pos+2*id+1]);
183+
}
184+
det.push_back(det_temp);
185+
}
186+
}
187+
dets.push_back(det);
188+
}
189+
return HZ_SUCCESS;
190+
}
191+
HZFLAG Detector_Yolov8Face::ReleaseDetector_Yolov8Face()
192+
{
193+
context->destroy();
194+
engine->destroy();
195+
runtime->destroy();
196+
for (size_t i = 0; i < batch_size; i++)
197+
{
198+
CHECK(cudaFree(affine_matrix_d2i_device[i]));
199+
delete decode_ptr_host[i];
200+
}
201+
delete [] decode_ptr_host;
202+
delete [] affine_matrix_d2i_device;
203+
CHECK(cudaFreeHost(affine_matrix_d2i_host));
204+
CHECK(cudaFree(img_device));
205+
CHECK(cudaFreeHost(img_host));
206+
CHECK(cudaFree(buffers[inputIndex]));
207+
CHECK(cudaFree(buffers[outputIndex]));
208+
CHECK(cudaFree(decode_ptr_device));
209+
CHECK(cudaFree(pre_predict));
210+
CHECK(cudaFree(transpose_device));
211+
return HZ_SUCCESS;
212+
}
213+
214+
void Detector_Yolov8Face::affine_project(float *d2i,float x,float y,float *ox,float *oy) //通过仿射变换逆矩阵,恢复成原图的坐标
215+
{
216+
*ox = d2i[0]*x+d2i[1]*y+d2i[2];
217+
*oy = d2i[3]*x+d2i[4]*y+d2i[5];
218+
}
219+
220+
void Detector_Yolov8Face::getd2i(affineMatrix &afmt,cv::Size to,cv::Size from) //计算仿射变换的矩阵和逆矩阵
221+
{
222+
float scale = std::min(1.0*to.width/from.width, 1.0*to.height/from.height);
223+
afmt.i2d[0]=scale;
224+
afmt.i2d[1]=0;
225+
afmt.i2d[2]=-scale*from.width*0.5+to.width*0.5;
226+
afmt.i2d[3]=0;
227+
afmt.i2d[4]=scale;
228+
afmt.i2d[5]=-scale*from.height*0.5+to.height*0.5;
229+
cv::Mat i2d_mat(2,3,CV_32F,afmt.i2d);
230+
cv::Mat d2i_mat(2,3,CV_32F,afmt.d2i);
231+
cv::invertAffineTransform(i2d_mat,d2i_mat);
232+
memcpy(afmt.d2i, d2i_mat.ptr<float>(0), sizeof(afmt.d2i));
233+
}

0 commit comments

Comments
 (0)