support ssd

zhangdanfeng · zhangdanfeng · commit a4b9c0215495 · 2023-01-04T16:53:18.000+08:00
Signed-off-by: zhangdanfeng &lt;craft.zhang@space-t.cn&gt;
diff --git a/Makefile-rv b/Makefile-rv
@@ -112,9 +112,9 @@ segmentation/tflite_segmentation: segmentation.cc $(COMMON_SRC)
 	mkdir -p segmentation
 	$(CXX) segmentation.cc $(COMMON_SRC) -o segmentation/tflite_segmentation $(LDFLAGS) $(LIBS) $(CXXFLAGS) $(CCFLAGS) $(INCLUDES)
 
-detection/tflite_detection: detection.cc yolov5.cc yolov3.cc $(COMMON_SRC)
+detection/tflite_detection: detection.cc yolov5.cc yolov3.cc mobilenetssd.cc $(COMMON_SRC)
 	mkdir -p detection
-	$(CXX) detection.cc yolov5.cc yolov3.cc $(COMMON_SRC) -o detection/tflite_detection $(LDFLAGS) $(LIBS) $(CXXFLAGS) $(CCFLAGS) $(INCLUDES)
+	$(CXX) detection.cc yolov5.cc yolov3.cc mobilenetssd.cc $(COMMON_SRC) -o detection/tflite_detection $(LDFLAGS) $(LIBS) $(CXXFLAGS) $(CCFLAGS) $(INCLUDES)
 
 clean:
 	rm -rf classification/tflite_classification segmentation/tflite_segmentation detection/tflite_detection
diff --git a/detection.cc b/detection.cc
@@ -31,6 +31,7 @@
 #include <utility>
 #include <vector>
 
+#include "mobilenetssd.h"
 #include "model_utils.h"
 #include "opencv2/core.hpp"
 #include "opencv2/highgui.hpp"
@@ -51,8 +52,12 @@ detection/yolov5s_ultralytics_640_quantized.tflite -i detection/bus.jpg -l \
 detection/labels.txt -c 1 -b 0 -s 255 -t 1 -v 5
 
 qemu-riscv64 detection/tflite_detection -m \
-detection/yolov3_keras_416_quantized.tflite -i detection/bus.jpg -l \
+detection/yolov3_keras_416_quantized.tflite -i detection/grace_hopper.bmp -l \
 detection/labels.txt -c 1 -b 0 -s 255 -t 1 -v 3
+
+qemu-riscv64 detection/tflite_detection -m \
+/ssd_mobilenet_v2_2.tflite -i detection/zidane.jpg \
+-l detection/coco.txt -c 1 -b 0 -s 255 -t 1 -v ssd
 */
 
 /*
@@ -71,7 +76,7 @@ void display_usage() {
        << "--input_std, -s: input standard deviation\n"
        << "--profiling, -p: [0|1], profiling or not\n"
        << "--threads, -t: number of threads\n"
-       << "--model-version, -v: yolo version\n"
+       << "--model-version, -v: yolo version or ssd\n"
        << "\n";
 }
 
@@ -158,9 +163,12 @@ int main(int argc, char **argv) {
   YOLOV5 *model = NULL;
   if (yolo_version == 3) {
     model = new YOLOV3;
-  } else {
+  } else if (yolo_version == 5) {
     model = new YOLOV5;
+  } else {
+    model = new MobileNetSSD;
   }
+
   if (!model) {
     exit(-1);
   }
@@ -205,7 +213,7 @@ int main(int argc, char **argv) {
     auto box = boxes[i];
     auto score = scores[i];
     auto label = labels[i];
-    cv::rectangle(show_image, box, cv::Scalar(255, 0, 0), 2);
+    cv::rectangle(show_image, box, cv::Scalar(0, 255, 0), 2);
     cv::putText(show_image, labelNames[label], cv::Point(box.x, box.y),
                 cv::FONT_HERSHEY_COMPLEX, 1.0, cv::Scalar(255, 255, 255), 1,
                 cv::LINE_AA);
diff --git a/detection/coco.txt b/detection/coco.txt
@@ -0,0 +1,91 @@
+???
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+???
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+???
+backpack
+umbrella
+???
+???
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+???
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+???
+dining table
+???
+???
+toilet
+???
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+???
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/detection/image.jpg b/detection/image.jpg
diff --git a/detection/ssd_mobilenet_v2_2.tflite b/detection/ssd_mobilenet_v2_2.tflite
diff --git a/mobilenetssd.cc b/mobilenetssd.cc
@@ -0,0 +1,82 @@
+#include "mobilenetssd.h"
+
+// void MobileNetSSD::preprocess(cv::Mat &image) {
+//   if (image.data) {
+//     // std::cout << "Cvt Color Space to RGB\n";
+//     // cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
+//     std::cout << "Resize to fit input Shape\n";
+//     cv::resize(image, image, cv::Size(_in_height, _in_width));
+//     // cv::imwrite("image.jpg", image);
+//     // image.convertTo(image, CV_32FC3);
+//   } else {
+//     std::cout << "input image is empty!\n";
+//     std::cout << __FILE__ << ": " << __LINE__ << std::endl;
+//     exit(-1);
+//   }
+// }
+
+void MobileNetSSD::run(cv::Mat &frame, Prediction &out_pred) {
+  do {
+    if (!frame.data) {
+      std::cout << "input image is empty!\n";
+      std::cout << __FILE__ << ": " << __LINE__ << std::endl;
+      exit(-1);
+    }
+
+    _img_height = frame.rows;
+    _img_width = frame.cols;
+
+    preprocess(frame);
+    if (_in_type == kTfLiteFloat32) {
+      fill(_input_f32, frame);
+    } else if (_in_type == kTfLiteUInt8) {
+      fill(_input_u8, frame);
+    }
+
+    // Inference
+    std::cout << "Run inference!!\n";
+    TfLiteStatus status = _interpreter->Invoke();
+    if (status != kTfLiteOk) {
+      std::cout << "\nFailed to run inference!!\n";
+      std::cout << __FILE__ << ": " << __LINE__ << std::endl;
+      exit(-1);
+    }
+
+    for (size_t i = 0; i < _interpreter->outputs().size(); i++) {
+      TfLiteIntArray *out_dims =
+          _interpreter->tensor(_interpreter->outputs()[i])->dims;
+      TfLiteType out_type =
+          _interpreter->tensor(_interpreter->outputs()[i])->type;
+      int out_batch = out_dims->data[0];
+      int out_row = out_dims->data[1];
+      int out_colum = out_dims->data[2];
+      int out_channel = out_dims->data[3];
+      std::cout << "YOLO Model Output Shape:[" << out_batch << "][" << out_row
+                << "][" << out_colum << "][" << out_channel << "] "
+                << " type: [" << out_type << "]\n";
+    }
+
+    size_t N = _interpreter->tensor(_interpreter->outputs()[5])->data.f[0];
+    float *detection_boxes =
+        _interpreter->tensor(_interpreter->outputs()[1])->data.f;
+    float *detection_classes =
+        _interpreter->tensor(_interpreter->outputs()[2])->data.f;
+    float *detection_scores =
+        _interpreter->tensor(_interpreter->outputs()[4])->data.f;
+
+    for (int i = 0; i < N; i++) {
+      if (detection_scores[i] > _conf_threshold) {
+        out_pred.scores.push_back(detection_scores[i]);
+        out_pred.labels.push_back(floor(detection_classes[i] + 0.5f));
+        cv::Rect bbox;
+        bbox.y = detection_boxes[4 * i] * _img_height;
+        bbox.x = detection_boxes[4 * i + 1] * _img_width;
+        bbox.height =
+            (detection_boxes[4 * i + 2] - detection_boxes[4 * i]) * _img_height;
+        bbox.width = (detection_boxes[4 * i + 3] - detection_boxes[4 * i + 1]) *
+                     _img_width;
+        out_pred.boxes.push_back(bbox);
+      }
+    }
+  } while (0);
+};
diff --git a/mobilenetssd.h b/mobilenetssd.h
@@ -0,0 +1,29 @@
+/*
+ * zhangdanfeng 2022
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "yolov5.h"
+
+class MobileNetSSD : public YOLOV5 {
+public:
+  MobileNetSSD() {
+    _mean = 0;
+    _std = 1;
+  };
+  // virtual void preprocess(cv::Mat &image) override;
+  virtual void run(cv::Mat &image, Prediction &out_pred) override;
+};
diff --git a/yolov5.cc b/yolov5.cc
@@ -61,7 +61,17 @@ void YOLOV5::loadModel(const std::string path) {
   _in_width = dims->data[2];
   _in_channels = dims->data[3];
   _in_type = _interpreter->tensor(_input)->type;
-  _input_f32 = _interpreter->typed_tensor<float_t>(_input);
+
+  std::cout << "YOLO Model Input type: " << _in_type << "\n";
+  if (_in_type == kTfLiteFloat32) {
+    _input_f32 = _interpreter->typed_tensor<float_t>(_input);
+  } else if (_in_type == kTfLiteUInt8) {
+    _input_u8 = _interpreter->typed_tensor<uint8_t>(_input);
+  } else {
+    std::cout << "YOLO Model Input type donot support yet\n";
+    exit(0);
+  }
+
   std::cout << "YOLO Model Input Shape:[1][" << _in_height << "][" << _in_width
             << "][" << _in_channels << "]\n";
   _interpreter->SetNumThreads(_n_threads);
@@ -87,7 +97,7 @@ template <typename T> void YOLOV5::fill(T *in, cv::Mat &src) {
     uchar *ptr = src.data;
     for (size_t i = 0; i < src.rows; i++) {
       for (size_t j = 0; j < src.cols * 3; j++) {
-        in[i * src.cols * 3 + j] = ((float_t)(ptr[j]) - _mean) / _std;
+        in[i * src.cols * 3 + j] = ((T)(ptr[j]) - _mean) / _std;
       }
       ptr += src.step;
     }
@@ -202,7 +212,11 @@ void YOLOV5::run(cv::Mat &frame, Prediction &out_pred) {
     _img_width = frame.cols;
 
     preprocess(frame);
-    fill(_input_f32, frame);
+    if (_in_type == kTfLiteFloat32) {
+      fill(_input_f32, frame);
+    } else if (_in_type == kTfLiteUInt8) {
+      fill(_input_u8, frame);
+    }
 
     // Inference
     std::cout << "Run inference!!\n";
@@ -216,12 +230,15 @@ void YOLOV5::run(cv::Mat &frame, Prediction &out_pred) {
     for (size_t i = 0; i < _interpreter->outputs().size(); i++) {
       TfLiteIntArray *out_dims =
           _interpreter->tensor(_interpreter->outputs()[i])->dims;
+      TfLiteType out_type =
+          _interpreter->tensor(_interpreter->outputs()[i])->type;
       int out_batch = out_dims->data[0];
       int out_row = out_dims->data[1];
       int out_colum = out_dims->data[2];
       int out_channel = out_dims->data[3];
       std::cout << "YOLO Model Output Shape:[" << out_batch << "][" << out_row
-                << "][" << out_colum << "][" << out_channel << "]\n";
+                << "][" << out_colum << "][" << out_channel << "] "
+                << " type: [" << out_type << "]\n";
     }
 
     std::vector<std::vector<float>> predV = tensorToVector2D();
diff --git a/yolov5.h b/yolov5.h
@@ -41,9 +41,9 @@ struct Prediction {
 class YOLOV5 {
 public:
   // Take a model path as string
-  void loadModel(const std::string path);
+  virtual void loadModel(const std::string path);
   // Take an image and return a prediction
-  void run(cv::Mat &image, Prediction &out_pred);
+  virtual void run(cv::Mat &image, Prediction &out_pred);
 
   void getLabelsName(std::string path, std::vector<std::string> &labelNames);
 
@@ -79,9 +79,10 @@ class YOLOV5 {
 
   // Input of the interpreter
   float_t *_input_f32;
+  uint8_t *_input_u8;
 
   template <typename T> void fill(T *in, cv::Mat &src);
-  void preprocess(cv::Mat &image);
+  virtual void preprocess(cv::Mat &image);
   virtual std::vector<std::vector<float>> tensorToVector2D();
   virtual void nonMaximumSupprition(std::vector<std::vector<float>> &predV,
                                     std::vector<cv::Rect> &boxes,