2525#include < inference_engine.hpp>
2626
2727#include < monitors/presenter.h>
28+ #include < samples/images_capture.h>
2829#include < samples/ocv_common.hpp>
2930#include < samples/slog.hpp>
3031
@@ -61,10 +62,6 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) {
6162 if (FLAGS_n_hp < 1 ) {
6263 throw std::logic_error (" Parameter -n_hp cannot be 0" );
6364 }
64-
65- // no need to wait for a key press from a user if an output image/video file is not shown.
66- FLAGS_no_wait |= FLAGS_no_show;
67-
6865 return true ;
6966}
7067
@@ -77,27 +74,6 @@ int main(int argc, char *argv[]) {
7774 return 0 ;
7875 }
7976
80- slog::info << " Reading input" << slog::endl;
81- cv::VideoCapture cap;
82- if (!(FLAGS_i == " cam" ? cap.open (0 ) : cap.open (FLAGS_i))) {
83- throw std::logic_error (" Cannot open input file or camera: " + FLAGS_i);
84- }
85-
86- Timer timer;
87- // read input (video) frame
88- cv::Mat frame;
89- if (!cap.read (frame)) {
90- throw std::logic_error (" Failed to get frame from cv::VideoCapture" );
91- }
92-
93- const size_t width = static_cast <size_t >(frame.cols );
94- const size_t height = static_cast <size_t >(frame.rows );
95-
96- cv::VideoWriter videoWriter;
97- if (!FLAGS_o.empty ()) {
98- videoWriter.open (FLAGS_o, cv::VideoWriter::fourcc (' I' , ' Y' , ' U' , ' V' ), 25 , cv::Size (width, height));
99- }
100- // ---------------------------------------------------------------------------------------------------
10177 // --------------------------- 1. Loading Inference Engine -----------------------------
10278
10379 Core ie;
@@ -163,73 +139,75 @@ int main(int argc, char *argv[]) {
163139 Load (facialLandmarksDetector).into (ie, FLAGS_d_lm, FLAGS_dyn_lm);
164140 // ----------------------------------------------------------------------------------------------------
165141
166- // --------------------------- 3. Doing inference -----------------------------------------------------
167- // Starting inference & calculating performance
168- slog::info << " Start inference " << slog::endl;
169-
170142 bool isFaceAnalyticsEnabled = ageGenderDetector.enabled () || headPoseDetector.enabled () ||
171143 emotionsDetector.enabled () || facialLandmarksDetector.enabled ();
172144
145+ Timer timer;
173146 std::ostringstream out;
174147 size_t framesCounter = 0 ;
175- int delay = 1 ;
176- double msrate = -1 ;
177- cv::Mat prev_frame, next_frame;
148+ double msrate = 1000.0 / FLAGS_fps;
178149 std::list<Face::Ptr> faces;
179150 size_t id = 0 ;
180151
181- if (FLAGS_fps > 0 ) {
182- msrate = 1000 .f / FLAGS_fps;
152+ std::unique_ptr<ImagesCapture> cap = openImagesCapture (FLAGS_i, FLAGS_loop);
153+ cv::Mat frame = cap->read ();
154+ if (!frame.data ) {
155+ throw std::runtime_error (" Can't read an image from the input" );
183156 }
184157
185- Visualizer::Ptr visualizer;
186- if (!FLAGS_no_show || !FLAGS_o.empty ()) {
187- visualizer = std::make_shared<Visualizer>(cv::Size (width, height));
188- if (!FLAGS_no_show_emotion_bar && emotionsDetector.enabled ()) {
189- visualizer->enableEmotionBar (emotionsDetector.emotionsVec );
158+ const cv::Point THROUGHPUT_METRIC_POSITION{10 , 45 };
159+ Presenter presenter (FLAGS_u, THROUGHPUT_METRIC_POSITION.y + 15 , {frame.cols / 4 , 60 });
160+
161+ Visualizer visualizer{frame.size ()};
162+ if (!FLAGS_no_show_emotion_bar && emotionsDetector.enabled ()) {
163+ visualizer.enableEmotionBar (emotionsDetector.emotionsVec );
164+ }
165+
166+ cv::VideoWriter videoWriter;
167+ if (!FLAGS_o.empty ()) {
168+ videoWriter.open (FLAGS_o, cv::VideoWriter::fourcc (' I' , ' Y' , ' U' , ' V' ),
169+ !FLAGS_no_show && FLAGS_fps > 0.0 ? FLAGS_fps : cap->fps (), frame.size ());
170+ if (!videoWriter.isOpened ()) {
171+ throw std::runtime_error (" Can't open video writer" );
190172 }
191173 }
192174
193175 // Detecting all faces on the first frame and reading the next one
194176 faceDetector.enqueue (frame);
195177 faceDetector.submitRequest ();
196178
197- prev_frame = frame.clone ();
198-
199- // Reading the next frame
200- bool frameReadStatus = cap.read (frame);
179+ cv::Mat next_frame = cap->read ();
201180
202181 std::cout << " To close the application, press 'CTRL+C' here" ;
203182 if (!FLAGS_no_show) {
204183 std::cout << " or switch to the output window and press Q or Esc" ;
205184 }
206185 std::cout << std::endl;
207186
208- const cv::Point THROUGHPUT_METRIC_POSITION{10 , 45 };
209-
210- cv::Size graphSize{static_cast <int >(cap.get (cv::CAP_PROP_FRAME_WIDTH) / 4 ), 60 };
211- Presenter presenter (FLAGS_u, THROUGHPUT_METRIC_POSITION.y + 15 , graphSize);
212-
213- while (true ) {
187+ while (frame.data ) {
214188 timer.start (" total" );
189+ cv::Mat prev_frame = std::move (frame);
190+ frame = std::move (next_frame);
215191 framesCounter++;
216- bool isLastFrame = !frameReadStatus;
217192
218193 // Retrieving face detection results for the previous frame
219194 faceDetector.wait ();
220195 faceDetector.fetchResults ();
221196 auto prev_detection_results = faceDetector.results ;
222197
223198 // No valid frame to infer if previous frame is the last
224- if (!isLastFrame) {
199+ if (frame.data ) {
200+ if (frame.size () != prev_frame.size ()) {
201+ throw std::runtime_error (" Images of different size are not supported" );
202+ }
225203 faceDetector.enqueue (frame);
226204 faceDetector.submitRequest ();
227205 }
228206
229207 // Filling inputs of face analytics networks
230208 for (auto &&face : prev_detection_results) {
231209 if (isFaceAnalyticsEnabled) {
232- auto clippedRect = face.location & cv::Rect (0 , 0 , width, height );
210+ cv::Rect clippedRect = face.location & cv::Rect ({ 0 , 0 }, prev_frame. size () );
233211 cv::Mat face = prev_frame (clippedRect);
234212 ageGenderDetector.enqueue (face);
235213 headPoseDetector.enqueue (face);
@@ -246,16 +224,8 @@ int main(int argc, char *argv[]) {
246224 facialLandmarksDetector.submitRequest ();
247225 }
248226
249- // Reading the next frame if the current one is not the last
250- if (!isLastFrame) {
251- frameReadStatus = cap.read (next_frame);
252- if (FLAGS_loop_video && !frameReadStatus) {
253- if (!(FLAGS_i == " cam" ? cap.open (0 ) : cap.open (FLAGS_i))) {
254- throw std::logic_error (" Cannot open input file or camera: " + FLAGS_i);
255- }
256- frameReadStatus = cap.read (next_frame);
257- }
258- }
227+ // Read the next frame while waiting for inference results
228+ next_frame = cap->read ();
259229
260230 if (isFaceAnalyticsEnabled) {
261231 ageGenderDetector.wait ();
@@ -276,7 +246,7 @@ int main(int argc, char *argv[]) {
276246 // For every detected face
277247 for (size_t i = 0 ; i < prev_detection_results.size (); i++) {
278248 auto & result = prev_detection_results[i];
279- cv::Rect rect = result.location & cv::Rect (0 , 0 , width, height );
249+ cv::Rect rect = result.location & cv::Rect ({ 0 , 0 }, prev_frame. size () );
280250
281251 Face::Ptr face;
282252 if (!FLAGS_no_smooth) {
@@ -327,44 +297,23 @@ int main(int argc, char *argv[]) {
327297
328298 presenter.drawGraphs (prev_frame);
329299
330- // Visualizing results
331- if (!FLAGS_no_show || !FLAGS_o.empty ()) {
332- out.str (" " );
333- out << " Total image throughput: " << std::fixed << std::setprecision (2 )
334- << 1000 .f / (timer[" total" ].getSmoothedDuration ()) << " fps" ;
335- cv::putText (prev_frame, out.str (), THROUGHPUT_METRIC_POSITION, cv::FONT_HERSHEY_TRIPLEX, 1 ,
336- cv::Scalar (255 , 0 , 0 ), 2 );
337-
338- // drawing faces
339- visualizer->draw (prev_frame, faces);
340-
341- if (!FLAGS_no_show) {
342- cv::imshow (" Detection results" , prev_frame);
343- }
344- }
345-
346- if (!FLAGS_o.empty ()) {
347- videoWriter.write (prev_frame);
348- }
349-
350- prev_frame = frame;
351- frame = next_frame;
352- next_frame = cv::Mat ();
300+ // drawing faces
301+ visualizer.draw (prev_frame, faces);
353302
354303 timer.finish (" total" );
304+ out.str (" " );
305+ out << " Total image throughput: " << std::fixed << std::setprecision (1 )
306+ << 1000.0 / (timer[" total" ].getSmoothedDuration ()) << " fps" ;
307+ cv::putText (prev_frame, out.str (), THROUGHPUT_METRIC_POSITION, cv::FONT_HERSHEY_TRIPLEX, 1 ,
308+ cv::Scalar (255 , 0 , 0 ), 2 );
355309
356- if (FLAGS_fps > 0 ) {
357- delay = std::max ( 1 , static_cast < int >(msrate - timer[ " total " ]. getLastCallDuration ()) );
310+ if (videoWriter. isOpened () ) {
311+ videoWriter. write (prev_frame );
358312 }
359313
360- // End of file (or a single frame file like an image). The last frame is displayed to let you check what is shown
361- if (isLastFrame) {
362- if (!FLAGS_no_wait) {
363- std::cout << " No more frames to process!" << std::endl;
364- cv::waitKey (0 );
365- }
366- break ;
367- } else if (!FLAGS_no_show) {
314+ int delay = std::max (1 , static_cast <int >(msrate - timer[" total" ].getLastCallDuration ()));
315+ if (!FLAGS_no_show) {
316+ cv::imshow (" Detection results" , prev_frame);
368317 int key = cv::waitKey (delay);
369318 if (27 == key || ' Q' == key || ' q' == key) {
370319 break ;
@@ -374,7 +323,7 @@ int main(int argc, char *argv[]) {
374323 }
375324
376325 slog::info << " Number of processed frames: " << framesCounter << slog::endl;
377- slog::info << " Total image throughput: " << framesCounter * (1000 .f / timer[" total" ].getTotalDuration ()) << " fps" << slog::endl;
326+ slog::info << " Total image throughput: " << framesCounter * (1000.0 / timer[" total" ].getTotalDuration ()) << " fps" << slog::endl;
378327
379328 // Showing performance results
380329 if (FLAGS_pc) {
@@ -386,17 +335,6 @@ int main(int argc, char *argv[]) {
386335 }
387336
388337 std::cout << presenter.reportMeans () << ' \n ' ;
389- // ---------------------------------------------------------------------------------------------------
390-
391- if (!FLAGS_o.empty ()) {
392- videoWriter.release ();
393- }
394-
395- // release input video stream
396- cap.release ();
397-
398- // close windows
399- cv::destroyAllWindows ();
400338 }
401339 catch (const std::exception& error) {
402340 slog::err << error.what () << slog::endl;
0 commit comments