Skip to content

Commit 3f545dc

Browse files
author
Roman Donchenko
authored
Merge pull request openvinotoolkit#1415 from Wovchena/gaze_estimation-add-images_capture
gaze_estimation: add images_capture
2 parents b2e6625 + 5018c49 commit 3f545dc

File tree

9 files changed

+116
-103
lines changed

9 files changed

+116
-103
lines changed

demos/common/include/samples/args_helper.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include <string>
1515
#include <vector>
1616

17+
#include <opencv2/core/types.hpp>
18+
1719
/**
1820
* @brief This function checks input args and existence of specified files in a given folder
1921
* @param arg path to a file to be checked for existence
@@ -34,3 +36,5 @@ std::vector<std::string> parseDevices(const std::string& device_string);
3436

3537
std::map<std::string, uint32_t> parseValuePerDevice(const std::set<std::string>& devices,
3638
const std::string& values_string);
39+
40+
cv::Size stringToSize(const std::string& str);

demos/common/include/samples/images_capture.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,5 @@ class ImagesCapture {
2929
// }
3030
std::unique_ptr<ImagesCapture> openImagesCapture(const std::string &input,
3131
bool loop, size_t initialImageId=0, // Non camera options
32-
size_t readLengthLimit=std::numeric_limits<size_t>::max() // General option
33-
);
32+
size_t readLengthLimit=std::numeric_limits<size_t>::max(), // General option
33+
cv::Size cameraResolution={1280, 720});

demos/common/monitors/src/presenter.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,11 @@ void Presenter::drawGraphs(cv::Mat& frame) {
141141

142142
if (cpuMonitor.getHistorySize() > 1 && possibleHistorySize > 1 && --numberOfEnabledMonitors >= 0) {
143143
std::deque<std::vector<double>> lastHistory = cpuMonitor.getLastHistory();
144-
cv::Mat graph = frame(cv::Rect{cv::Point{graphPos, yPos}, graphSize} & cv::Rect(0, 0, frame.cols, frame.rows));
144+
cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows};
145+
if (!intersection.area()) {
146+
return;
147+
}
148+
cv::Mat graph = frame(intersection);
145149
graph = graph / 2 + cv::Scalar{127, 127, 127};
146150

147151
int lineXPos = graph.cols - 1;
@@ -182,7 +186,11 @@ void Presenter::drawGraphs(cv::Mat& frame) {
182186

183187
if (distributionCpuEnabled && --numberOfEnabledMonitors >= 0) {
184188
std::deque<std::vector<double>> lastHistory = cpuMonitor.getLastHistory();
185-
cv::Mat graph = frame(cv::Rect{cv::Point{graphPos, yPos}, graphSize} & cv::Rect(0, 0, frame.cols, frame.rows));
189+
cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows};
190+
if (!intersection.area()) {
191+
return;
192+
}
193+
cv::Mat graph = frame(intersection);
186194
graph = graph / 2 + cv::Scalar{127, 127, 127};
187195

188196
if (!lastHistory.empty()) {
@@ -227,7 +235,11 @@ void Presenter::drawGraphs(cv::Mat& frame) {
227235

228236
if (memoryMonitor.getHistorySize() > 1 && possibleHistorySize > 1 && --numberOfEnabledMonitors >= 0) {
229237
std::deque<std::pair<double, double>> lastHistory = memoryMonitor.getLastHistory();
230-
cv::Mat graph = frame(cv::Rect{cv::Point{graphPos, yPos}, graphSize} & cv::Rect(0, 0, frame.cols, frame.rows));
238+
cv::Rect intersection = cv::Rect{cv::Point(graphPos, yPos), graphSize} & cv::Rect{0, 0, frame.cols, frame.rows};
239+
if (!intersection.area()) {
240+
return;
241+
}
242+
cv::Mat graph = frame(intersection);
231243
graph = graph / 2 + cv::Scalar{127, 127, 127};
232244
int histxPos = graph.cols - 1;
233245
double range = std::min(memoryMonitor.getMaxMemTotal() + memoryMonitor.getMaxSwap(),

demos/common/src/args_helper.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,11 @@ std::map<std::string, uint32_t> parseValuePerDevice(const std::set<std::string>&
128128
}
129129
return result;
130130
}
131+
132+
cv::Size stringToSize(const std::string& str) {
133+
std::vector<std::string> strings = split(str, 'x');
134+
if (strings.size() != 2) {
135+
throw std::invalid_argument("Can't convert std::string to cv::Size. The string must contain exactly one x");
136+
}
137+
return {std::stoi(strings[0]), std::stoi(strings[1])};
138+
}

demos/common/src/images_capture.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,14 +112,15 @@ class VideoCapWrapper : public ImagesCapture {
112112
size_t readLengthLimit;
113113

114114
public:
115-
VideoCapWrapper(const std::string &input, bool loop, size_t initialImageId, size_t readLengthLimit)
115+
VideoCapWrapper(const std::string &input, bool loop, size_t initialImageId, size_t readLengthLimit,
116+
cv::Size cameraResolution)
116117
: ImagesCapture{loop}, nextImgId{0}, initialImageId{static_cast<double>(initialImageId)} {
117118
try {
118119
cap.open(std::stoi(input));
119120
this->readLengthLimit = loop ? std::numeric_limits<size_t>::max() : readLengthLimit;
120121
cap.set(cv::CAP_PROP_BUFFERSIZE, 1);
121-
cap.set(cv::CAP_PROP_FRAME_WIDTH, 1280);
122-
cap.set(cv::CAP_PROP_FRAME_HEIGHT, 720);
122+
cap.set(cv::CAP_PROP_FRAME_WIDTH, cameraResolution.width);
123+
cap.set(cv::CAP_PROP_FRAME_HEIGHT, cameraResolution.height);
123124
cap.set(cv::CAP_PROP_AUTOFOCUS, true);
124125
cap.set(cv::CAP_PROP_FOURCC, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'));
125126
} catch (const std::invalid_argument&) {
@@ -162,7 +163,7 @@ class VideoCapWrapper : public ImagesCapture {
162163
};
163164

164165
std::unique_ptr<ImagesCapture> openImagesCapture(const std::string &input, bool loop, size_t initialImageId,
165-
size_t readLengthLimit) {
166+
size_t readLengthLimit, cv::Size cameraResolution) {
166167
if (readLengthLimit == 0) throw std::runtime_error{"Read length limit must be positive"};
167168
try {
168169
return std::unique_ptr<ImagesCapture>(new ImreadWrapper{input, loop});
@@ -171,7 +172,8 @@ std::unique_ptr<ImagesCapture> openImagesCapture(const std::string &input, bool
171172
return std::unique_ptr<ImagesCapture>(new DirReader{input, loop, initialImageId, readLengthLimit});
172173
} catch (const InvalidInput &) {}
173174
try {
174-
return std::unique_ptr<ImagesCapture>(new VideoCapWrapper{input, loop, initialImageId, readLengthLimit});
175+
return std::unique_ptr<ImagesCapture>(new VideoCapWrapper{input, loop, initialImageId, readLengthLimit,
176+
cameraResolution});
175177
} catch (const InvalidInput &) {}
176178
throw std::runtime_error{"Can't read " + input};
177179
}

demos/gaze_estimation_demo/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ Other demo objectives are:
2929

3030
Running the application with the `-h` option yields the following usage message:
3131
```
32-
./gaze_estimation_demo -h
3332
InferenceEngine:
3433
API version ............ <version>
3534
Build .................. <number>
@@ -38,7 +37,9 @@ gaze_estimation_demo [OPTION]
3837
Options:
3938
4039
-h Print a usage message.
41-
-i "<path>" Optional. Path to a video file. Default value is "cam" to work with camera.
40+
-i Required. An input to process. The input must be a single image, a folder of images or anything that cv::VideoCapture can process.
41+
-loop Optional. Enable reading the input in a loop.
42+
-res "<WxH>" Optional. Set camera resolution in format WxH.
4243
-m "<path>" Required. Path to an .xml file with a trained Gaze Estimation model.
4344
-m_fd "<path>" Required. Path to an .xml file with a trained Face Detection model.
4445
-m_hp "<path>" Required. Path to an .xml file with a trained Head Pose Estimation model.
@@ -48,8 +49,7 @@ Options:
4849
-d_fd "<device>" Optional. Target device for Face Detection network (the list of available devices is shown below). Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin. The demo will look for a suitable plugin for a specified device. Default value is "CPU".
4950
-d_hp "<device>" Optional. Target device for Head Pose Estimation network (the list of available devices is shown below). Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin. The demo will look for a suitable plugin for a specified device. Default value is "CPU".
5051
-d_lm "<device>" Optional. Target device for Facial Landmarks Estimation network (the list of available devices is shown below). Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin. The demo will look for a suitable plugin for a specified device. Default value is "CPU".
51-
-d_es "<device>" Optional. Target device for Open/Closed Eye Estimation network (the list of available devices is shown below). Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin. The demo will look for a suitable plugin for a specified device. Default value is "CPU".
52-
-res "<WxH>" Optional. Set camera resolution in format WxH.
52+
-d_es "<device>" Optional. Target device for Open/Closed Eye network (the list of available devices is shown below). Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin. The demo will look for a suitable plugin for a specified device. Default value is "CPU".
5353
-fd_reshape Optional. Reshape Face Detector network so that its input resolution has the same aspect ratio as the input frame.
5454
-no_show Optional. Do not show processed video.
5555
-pc Optional. Enable per-layer performance report.

demos/gaze_estimation_demo/gaze_estimation_demo.hpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,12 @@
1010
#include <gflags/gflags.h>
1111
#include <iostream>
1212

13+
#include <samples/default_flags.hpp>
14+
15+
DEFINE_INPUT_FLAGS
16+
1317
static const char help_message[] = "Print a usage message.";
14-
static const char video_message[] = "Optional. Path to a video file. Default value is \"cam\" to work with camera.";
18+
static const char camera_resolution_message[] = "Optional. Set camera resolution in format WxH.";
1519
static const char gaze_estimation_model_message[] = "Required. Path to an .xml file with a trained Gaze Estimation model.";
1620
static const char face_detection_model_message[] = "Required. Path to an .xml file with a trained Face Detection model.";
1721
static const char head_pose_model_message[] = "Required. Path to an .xml file with a trained Head Pose Estimation model.";
@@ -30,11 +34,10 @@ static const char target_device_message_hp[] = "Optional. Target device for Head
3034
"The demo will look for a suitable plugin for a specified device. Default value is \"CPU\".";
3135
static const char target_device_message_lm[] = "Optional. Target device for Facial Landmarks Estimation network "
3236
"(the list of available devices is shown below). Use \"-d HETERO:<comma-separated_devices_list>\" format to specify HETERO plugin. "
33-
"The demo will look for a suitable plugin for device specified. Default value is \"CPU\".";
37+
"The demo will look for a suitable plugin for a specified device. Default value is \"CPU\".";
3438
static const char target_device_message_es[] = "Optional. Target device for Open/Closed Eye network "
3539
"(the list of available devices is shown below). Use \"-d HETERO:<comma-separated_devices_list>\" format to specify HETERO plugin. "
36-
"The demo will look for a suitable plugin for device specified. Default value is \"CPU\".";
37-
static const char camera_resolution_message[] = "Optional. Set camera resolution in format WxH.";
40+
"The demo will look for a suitable plugin for a specified device. Default value is \"CPU\".";
3841
static const char performance_counter_message[] = "Optional. Enable per-layer performance report.";
3942
static const char thresh_output_message[] = "Optional. Probability threshold for Face Detector. The default value is 0.5.";
4043
static const char raw_output_message[] = "Optional. Output inference results as raw values.";
@@ -43,7 +46,7 @@ static const char no_show_processed_video[] = "Optional. Do not show processed v
4346
static const char utilization_monitors_message[] = "Optional. List of monitors to show initially.";
4447

4548
DEFINE_bool(h, false, help_message);
46-
DEFINE_string(i, "cam", video_message);
49+
DEFINE_string(res, "1280x720", camera_resolution_message);
4750
DEFINE_string(m, "", gaze_estimation_model_message);
4851
DEFINE_string(m_fd, "", face_detection_model_message);
4952
DEFINE_string(m_hp, "", head_pose_model_message);
@@ -54,7 +57,6 @@ DEFINE_string(d_fd, "CPU", target_device_message_fd);
5457
DEFINE_string(d_hp, "CPU", target_device_message_hp);
5558
DEFINE_string(d_lm, "CPU", target_device_message_lm);
5659
DEFINE_string(d_es, "CPU", target_device_message_es);
57-
DEFINE_string(res, "", camera_resolution_message);
5860
DEFINE_bool(fd_reshape, false, fd_reshape_message);
5961
DEFINE_bool(pc, false, performance_counter_message);
6062
DEFINE_bool(r, false, raw_output_message);
@@ -72,7 +74,9 @@ static void showUsage() {
7274
std::cout << "Options:" << std::endl;
7375
std::cout << std::endl;
7476
std::cout << " -h " << help_message << std::endl;
75-
std::cout << " -i \"<path>\" " << video_message << std::endl;
77+
std::cout << " -i " << input_message << std::endl;
78+
std::cout << " -loop " << loop_message << std::endl;
79+
std::cout << " -res \"<WxH>\" " << camera_resolution_message << std::endl;
7680
std::cout << " -m \"<path>\" " << gaze_estimation_model_message << std::endl;
7781
std::cout << " -m_fd \"<path>\" " << face_detection_model_message << std::endl;
7882
std::cout << " -m_hp \"<path>\" " << head_pose_model_message << std::endl;
@@ -83,7 +87,6 @@ static void showUsage() {
8387
std::cout << " -d_hp \"<device>\" " << target_device_message_hp << std::endl;
8488
std::cout << " -d_lm \"<device>\" " << target_device_message_lm << std::endl;
8589
std::cout << " -d_es \"<device>\" " << target_device_message_es << std::endl;
86-
std::cout << " -res \"<WxH>\" " << camera_resolution_message << std::endl;
8790
std::cout << " -fd_reshape " << fd_reshape_message << std::endl;
8891
std::cout << " -no_show " << no_show_processed_video << std::endl;
8992
std::cout << " -pc " << performance_counter_message << std::endl;

demos/gaze_estimation_demo/main.cpp

Lines changed: 32 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
#include <inference_engine.hpp>
2929

3030
#include <monitors/presenter.h>
31+
#include <samples/args_helper.hpp>
32+
#include <samples/images_capture.h>
3133
#include <samples/ocv_common.hpp>
3234
#include <samples/slog.hpp>
3335

@@ -87,36 +89,6 @@ int main(int argc, char *argv[]) {
8789
return 0;
8890
}
8991

90-
slog::info << "Reading input" << slog::endl;
91-
cv::VideoCapture cap;
92-
93-
if (!(FLAGS_i == "cam" ? cap.open(0) : cap.open(FLAGS_i))) {
94-
throw std::logic_error("Cannot open input file or camera: " + FLAGS_i);
95-
}
96-
97-
// Parse camera resolution parameter and set camera resolution
98-
if (FLAGS_i == "cam" && FLAGS_res != "") {
99-
auto xPos = FLAGS_res.find("x");
100-
if (xPos == std::string::npos)
101-
throw std::runtime_error("Incorrect -res parameter format, please use 'x' to separate width and height");
102-
int frameWidth, frameHeight;
103-
std::stringstream widthStream(FLAGS_res.substr(0, xPos));
104-
widthStream >> frameWidth;
105-
std::stringstream heightStream(FLAGS_res.substr(xPos + 1));
106-
heightStream >> frameHeight;
107-
cap.set(cv::CAP_PROP_FRAME_WIDTH, frameWidth);
108-
cap.set(cv::CAP_PROP_FRAME_HEIGHT, frameHeight);
109-
}
110-
111-
// read input (video) frame
112-
cv::Mat frame;
113-
if (!cap.read(frame)) {
114-
throw std::logic_error("Failed to get frame from cv::VideoCapture");
115-
}
116-
117-
bool flipImage = false;
118-
ResultsMarker resultsMarker(false, false, false, true, true);
119-
12092
// Loading Inference Engine
12193
std::vector<std::pair<std::string, std::string>> cmdOptions = {
12294
{FLAGS_d, FLAGS_m}, {FLAGS_d_fd, FLAGS_m_fd},
@@ -150,10 +122,21 @@ int main(int argc, char *argv[]) {
150122
ExponentialAverager overallTimeAverager(smoothingFactor, 30.);
151123
ExponentialAverager inferenceTimeAverager(smoothingFactor, 30.);
152124

125+
bool flipImage = false;
126+
ResultsMarker resultsMarker(false, false, false, true, true);
153127
int delay = 1;
154128
std::string windowName = "Gaze estimation demo";
155-
cv::Size graphSize{static_cast<int>(cap.get(cv::CAP_PROP_FRAME_WIDTH) / 4), 60};
156-
Presenter presenter(FLAGS_u, static_cast<int>(cap.get(cv::CAP_PROP_FRAME_HEIGHT)) - graphSize.height - 10, graphSize);
129+
130+
std::unique_ptr<ImagesCapture> cap = openImagesCapture(FLAGS_i, FLAGS_loop, 0,
131+
std::numeric_limits<size_t>::max(), stringToSize(FLAGS_res));
132+
cv::Mat frame = cap->read();
133+
if (!frame.data) {
134+
throw std::runtime_error("Can't read an image from the input");
135+
}
136+
137+
cv::Size graphSize{frame.cols / 4, 60};
138+
Presenter presenter(FLAGS_u, frame.rows - graphSize.height - 10, graphSize);
139+
157140
auto tIterationBegins = cv::getTickCount();
158141
do {
159142
if (flipImage) {
@@ -192,10 +175,6 @@ int main(int argc, char *argv[]) {
192175
}
193176
}
194177

195-
if (FLAGS_no_show) {
196-
continue;
197-
}
198-
199178
presenter.drawGraphs(frame);
200179

201180
// Display the results
@@ -204,20 +183,23 @@ int main(int argc, char *argv[]) {
204183
}
205184
putTimingInfoOnFrame(frame, overallTimeAverager.getAveragedValue(),
206185
inferenceTimeAverager.getAveragedValue());
207-
cv::imshow(windowName, frame);
208-
209-
// Controls the information being displayed while demo runs
210-
int key = cv::waitKey(delay);
211-
resultsMarker.toggle(key);
212-
213-
// Press 'Esc' to quit, 'f' to flip the video horizontally
214-
if (key == 27)
215-
break;
216-
else if (key == 'f')
217-
flipImage = !flipImage;
218-
else
219-
presenter.handleKey(key);
220-
} while (cap.read(frame));
186+
if (!FLAGS_no_show) {
187+
cv::imshow(windowName, frame);
188+
189+
// Controls the information being displayed while demo runs
190+
int key = cv::waitKey(delay);
191+
resultsMarker.toggle(key);
192+
193+
// Press 'Esc' to quit, 'f' to flip the video horizontally
194+
if (key == 27)
195+
break;
196+
if (key == 'f')
197+
flipImage = !flipImage;
198+
else
199+
presenter.handleKey(key);
200+
}
201+
frame = cap->read();
202+
} while (frame.data);
221203
std::cout << presenter.reportMeans() << '\n';
222204
}
223205
catch (const std::exception& error) {

0 commit comments

Comments
 (0)