Skip to content

Commit 821a8df

Browse files
authored
C++ Demo - Facial Expression Recognition (#233)
* cpp demo for facial expression recognition * minor pr fix * add empty line * specified cxx version in the cmake list
1 parent fd2da74 commit 821a8df

File tree

3 files changed

+352
-0
lines changed

3 files changed

+352
-0
lines changed

Diff for: models/facial_expression_recognition/CMakeLists.txt

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
cmake_minimum_required(VERSION 3.24)
2+
set(CMAKE_CXX_STANDARD 11)
3+
set(project_name "opencv_zoo_face_expression_recognition")
4+
5+
PROJECT (${project_name})
6+
7+
set(OPENCV_VERSION "4.9.0")
8+
set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation")
9+
find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH})
10+
# Find OpenCV, you may need to set OpenCV_DIR variable
11+
# to the absolute path to the directory containing OpenCVConfig.cmake file
12+
# via the command line or GUI
13+
14+
file(GLOB SourceFile
15+
"demo.cpp")
16+
# If the package has been found, several variables will
17+
# be set, you can find the full list with descriptions
18+
# in the OpenCVConfig.cmake file.
19+
# Print some message showing some of them
20+
message(STATUS "OpenCV library status:")
21+
message(STATUS " config: ${OpenCV_DIR}")
22+
message(STATUS " version: ${OpenCV_VERSION}")
23+
message(STATUS " libraries: ${OpenCV_LIBS}")
24+
message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
25+
26+
# Declare the executable target built from your sources
27+
add_executable(${project_name} ${SourceFile})
28+
29+
# Link your application with OpenCV libraries
30+
target_link_libraries(${project_name} PRIVATE ${OpenCV_LIBS})

Diff for: models/facial_expression_recognition/README.md

+18
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,30 @@ Results of accuracy evaluation on [RAF-DB](http://whdeng.cn/RAF/model1.html).
1919

2020
***NOTE***: This demo uses [../face_detection_yunet](../face_detection_yunet) as face detector, which supports 5-landmark detection for now (2021sep).
2121

22+
### Python
2223
Run the following command to try the demo:
2324
```shell
2425
# recognize the facial expression on images
2526
python demo.py --input /path/to/image -v
2627
```
2728

29+
### C++
30+
31+
Install latest OpenCV and CMake >= 3.24.0 to get started with:
32+
33+
```shell
34+
# A typical and default installation path of OpenCV is /usr/local
35+
cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation .
36+
cmake --build build
37+
38+
# detect on camera input
39+
./build/opencv_zoo_face_expression_recognition
40+
# detect on an image
41+
./build/opencv_zoo_face_expression_recognition -i=/path/to/image
42+
# get help messages
43+
./build/opencv_zoo_face_expression_recognition -h
44+
```
45+
2846
### Example outputs
2947

3048
Note: Zoom in to to see the recognized facial expression in the top-left corner of each face boxes.

Diff for: models/facial_expression_recognition/demo.cpp

+304
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
#include "opencv2/opencv.hpp"
2+
3+
#include <map>
4+
#include <vector>
5+
#include <string>
6+
#include <iostream>
7+
8+
using namespace std;
9+
using namespace cv;
10+
using namespace dnn;
11+
12+
std::vector<std::pair<int, int>> backend_target_pairs = {
13+
{DNN_BACKEND_OPENCV, DNN_TARGET_CPU},
14+
{DNN_BACKEND_CUDA, DNN_TARGET_CUDA},
15+
{DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16},
16+
{DNN_BACKEND_TIMVX, DNN_TARGET_NPU},
17+
{DNN_BACKEND_CANN, DNN_TARGET_NPU}
18+
};
19+
20+
class FER
21+
{
22+
private:
23+
Net model;
24+
string modelPath;
25+
float std[5][2] = {
26+
{38.2946, 51.6963},
27+
{73.5318, 51.5014},
28+
{56.0252, 71.7366},
29+
{41.5493, 92.3655},
30+
{70.7299, 92.2041}
31+
};
32+
vector<String> expressionEnum = {
33+
"angry", "disgust", "fearful",
34+
"happy", "neutral", "sad", "surprised"
35+
};
36+
Mat stdPoints = Mat(5, 2, CV_32F, this->std);
37+
Size patchSize = Size(112,112);
38+
Scalar imageMean = Scalar(0.5,0.5,0.5);
39+
Scalar imageStd = Scalar(0.5,0.5,0.5);
40+
41+
const String inputNames = "data";
42+
const String outputNames = "label";
43+
44+
int backend_id;
45+
int target_id;
46+
47+
public:
48+
FER(const string& modelPath,
49+
int backend_id = 0,
50+
int target_id = 0)
51+
: modelPath(modelPath), backend_id(backend_id), target_id(target_id)
52+
{
53+
this->model = readNet(modelPath);
54+
this->model.setPreferableBackend(backend_id);
55+
this->model.setPreferableTarget(target_id);
56+
}
57+
58+
Mat preprocess(const Mat image, const Mat points)
59+
{
60+
// image alignment
61+
Mat transformation = estimateAffine2D(points, this->stdPoints);
62+
Mat aligned = Mat::zeros(this->patchSize.height, this->patchSize.width, image.type());
63+
warpAffine(image, aligned, transformation, this->patchSize);
64+
65+
// image normalization
66+
aligned.convertTo(aligned, CV_32F, 1.0 / 255.0);
67+
aligned -= imageMean;
68+
aligned /= imageStd;
69+
70+
return blobFromImage(aligned);;
71+
}
72+
73+
String infer(const Mat image, const Mat facePoints)
74+
{
75+
Mat points = facePoints(Rect(4, 0, facePoints.cols-5, facePoints.rows)).reshape(2, 5);
76+
Mat inputBlob = preprocess(image, points);
77+
78+
this->model.setInput(inputBlob, this->inputNames);
79+
Mat outputBlob = this->model.forward(this->outputNames);
80+
81+
Point maxLoc;
82+
minMaxLoc(outputBlob, nullptr, nullptr, nullptr, &maxLoc);
83+
84+
return getDesc(maxLoc.x);
85+
}
86+
87+
String getDesc(int ind)
88+
{
89+
90+
if (ind >= 0 && ind < this->expressionEnum.size())
91+
{
92+
return this->expressionEnum[ind];
93+
}
94+
else
95+
{
96+
cerr << "Error: Index out of bounds." << endl;
97+
return "";
98+
}
99+
}
100+
101+
};
102+
103+
class YuNet
104+
{
105+
public:
106+
YuNet(const string& model_path,
107+
const Size& input_size = Size(320, 320),
108+
float conf_threshold = 0.6f,
109+
float nms_threshold = 0.3f,
110+
int top_k = 5000,
111+
int backend_id = 0,
112+
int target_id = 0)
113+
: model_path_(model_path), input_size_(input_size),
114+
conf_threshold_(conf_threshold), nms_threshold_(nms_threshold),
115+
top_k_(top_k), backend_id_(backend_id), target_id_(target_id)
116+
{
117+
model = FaceDetectorYN::create(model_path_, "", input_size_, conf_threshold_, nms_threshold_, top_k_, backend_id_, target_id_);
118+
}
119+
120+
void setBackendAndTarget(int backend_id, int target_id)
121+
{
122+
backend_id_ = backend_id;
123+
target_id_ = target_id;
124+
model = FaceDetectorYN::create(model_path_, "", input_size_, conf_threshold_, nms_threshold_, top_k_, backend_id_, target_id_);
125+
}
126+
127+
/* Overwrite the input size when creating the model. Size format: [Width, Height].
128+
*/
129+
void setInputSize(const Size& input_size)
130+
{
131+
input_size_ = input_size;
132+
model->setInputSize(input_size_);
133+
}
134+
135+
Mat infer(const Mat image)
136+
{
137+
Mat res;
138+
model->detect(image, res);
139+
return res;
140+
}
141+
142+
private:
143+
Ptr<FaceDetectorYN> model;
144+
145+
string model_path_;
146+
Size input_size_;
147+
float conf_threshold_;
148+
float nms_threshold_;
149+
int top_k_;
150+
int backend_id_;
151+
int target_id_;
152+
};
153+
154+
cv::Mat visualize(const cv::Mat& image, const cv::Mat& faces, const vector<String> expressions, float fps = -1.f)
155+
{
156+
static cv::Scalar box_color{0, 255, 0};
157+
static std::vector<cv::Scalar> landmark_color{
158+
cv::Scalar(255, 0, 0), // right eye
159+
cv::Scalar( 0, 0, 255), // left eye
160+
cv::Scalar( 0, 255, 0), // nose tip
161+
cv::Scalar(255, 0, 255), // right mouth corner
162+
cv::Scalar( 0, 255, 255) // left mouth corner
163+
};
164+
static cv::Scalar text_color{0, 255, 0};
165+
166+
auto output_image = image.clone();
167+
168+
if (fps >= 0)
169+
{
170+
cv::putText(output_image, cv::format("FPS: %.2f", fps), cv::Point(0, 15), cv::FONT_HERSHEY_SIMPLEX, 0.5, text_color, 2);
171+
}
172+
173+
for (int i = 0; i < faces.rows; ++i)
174+
{
175+
// Draw bounding boxes
176+
int x1 = static_cast<int>(faces.at<float>(i, 0));
177+
int y1 = static_cast<int>(faces.at<float>(i, 1));
178+
int w = static_cast<int>(faces.at<float>(i, 2));
179+
int h = static_cast<int>(faces.at<float>(i, 3));
180+
cv::rectangle(output_image, cv::Rect(x1, y1, w, h), box_color, 2);
181+
182+
// Expression as text
183+
String exp = expressions[i];
184+
cv::putText(output_image, exp, cv::Point(x1, y1+12), cv::FONT_HERSHEY_DUPLEX, 0.5, text_color);
185+
186+
// Draw landmarks
187+
for (int j = 0; j < landmark_color.size(); ++j)
188+
{
189+
int x = static_cast<int>(faces.at<float>(i, 2*j+4)), y = static_cast<int>(faces.at<float>(i, 2*j+5));
190+
cv::circle(output_image, cv::Point(x, y), 2, landmark_color[j], 2);
191+
}
192+
}
193+
return output_image;
194+
}
195+
196+
string keys =
197+
"{ help h | | Print help message. }"
198+
"{ model m | facial_expression_recognition_mobilefacenet_2022july.onnx | Usage: Path to the model, defaults to facial_expression_recognition_mobilefacenet_2022july.onnx }"
199+
"{ yunet_model ym | ../face_detection_yunet/face_detection_yunet_2023mar.onnx | Usage: Path to the face detection yunet model, defaults to face_detection_yunet_2023mar.onnx }"
200+
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
201+
"{ backend_target t | 0 | Choose one of the backend-target pair to run this demo:\n"
202+
"0: (default) OpenCV implementation + CPU,\n"
203+
"1: CUDA + GPU (CUDA),\n"
204+
"2: CUDA + GPU (CUDA FP16),\n"
205+
"3: TIM-VX + NPU,\n"
206+
"4: CANN + NPU}"
207+
"{ save s | false | Specify to save results.}"
208+
"{ vis v | true | Specify to open a window for result visualization.}"
209+
;
210+
211+
212+
int main(int argc, char** argv)
213+
{
214+
CommandLineParser parser(argc, argv, keys);
215+
216+
parser.about("Facial Expression Recognition");
217+
if (parser.has("help"))
218+
{
219+
parser.printMessage();
220+
return 0;
221+
}
222+
223+
string modelPath = parser.get<string>("model");
224+
string yunetModelPath = parser.get<string>("yunet_model");
225+
string inputPath = parser.get<string>("input");
226+
uint8_t backendTarget = parser.get<uint8_t>("backend_target");
227+
bool saveFlag = parser.get<bool>("save");
228+
bool visFlag = parser.get<bool>("vis");
229+
230+
if (modelPath.empty())
231+
CV_Error(Error::StsError, "Model file " + modelPath + " not found");
232+
233+
if (yunetModelPath.empty())
234+
CV_Error(Error::StsError, "Face Detection Model file " + yunetModelPath + " not found");
235+
236+
YuNet faceDetectionModel(yunetModelPath);
237+
FER expressionRecognitionModel(modelPath, backend_target_pairs[backendTarget].first, backend_target_pairs[backendTarget].second);
238+
239+
VideoCapture cap;
240+
if (!inputPath.empty())
241+
cap.open(samples::findFile(inputPath));
242+
else
243+
cap.open(0);
244+
245+
if (!cap.isOpened())
246+
CV_Error(Error::StsError, "Cannot opend video or file");
247+
248+
Mat frame;
249+
static const std::string kWinName = "Facial Expression Demo";
250+
251+
252+
while (waitKey(1) < 0)
253+
{
254+
cap >> frame;
255+
256+
if (frame.empty())
257+
{
258+
if(inputPath.empty())
259+
cout << "Frame is empty" << endl;
260+
break;
261+
}
262+
263+
faceDetectionModel.setInputSize(frame.size());
264+
265+
Mat faces = faceDetectionModel.infer(frame);
266+
vector<String> expressions;
267+
268+
for (int i = 0; i < faces.rows; ++i)
269+
{
270+
Mat face = faces.row(i);
271+
String exp = expressionRecognitionModel.infer(frame, face);
272+
expressions.push_back(exp);
273+
274+
int x1 = static_cast<int>(faces.at<float>(i, 0));
275+
int y1 = static_cast<int>(faces.at<float>(i, 1));
276+
int w = static_cast<int>(faces.at<float>(i, 2));
277+
int h = static_cast<int>(faces.at<float>(i, 3));
278+
float conf = faces.at<float>(i, 14);
279+
280+
std::cout << cv::format("%d: x1=%d, y1=%d, w=%d, h=%d, conf=%.4f expression=%s\n", i, x1, y1, w, h, conf, exp.c_str());
281+
282+
}
283+
284+
Mat res_frame = visualize(frame, faces, expressions);
285+
286+
if(visFlag || inputPath.empty())
287+
{
288+
imshow(kWinName, res_frame);
289+
if(!inputPath.empty())
290+
waitKey(0);
291+
}
292+
if(saveFlag)
293+
{
294+
cout << "Results are saved to result.jpg" << endl;
295+
296+
cv::imwrite("result.jpg", res_frame);
297+
}
298+
}
299+
300+
301+
return 0;
302+
303+
}
304+

0 commit comments

Comments
 (0)