Skip to content

Commit a4b9c02

Browse files
author
zhangdanfeng
committed
support ssd
Signed-off-by: zhangdanfeng <[email protected]>
1 parent 6ea8ae2 commit a4b9c02

9 files changed

+241
-13
lines changed

Makefile-rv

+2-2
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,9 @@ segmentation/tflite_segmentation: segmentation.cc $(COMMON_SRC)
112112
mkdir -p segmentation
113113
$(CXX) segmentation.cc $(COMMON_SRC) -o segmentation/tflite_segmentation $(LDFLAGS) $(LIBS) $(CXXFLAGS) $(CCFLAGS) $(INCLUDES)
114114

115-
detection/tflite_detection: detection.cc yolov5.cc yolov3.cc $(COMMON_SRC)
115+
detection/tflite_detection: detection.cc yolov5.cc yolov3.cc mobilenetssd.cc $(COMMON_SRC)
116116
mkdir -p detection
117-
$(CXX) detection.cc yolov5.cc yolov3.cc $(COMMON_SRC) -o detection/tflite_detection $(LDFLAGS) $(LIBS) $(CXXFLAGS) $(CCFLAGS) $(INCLUDES)
117+
$(CXX) detection.cc yolov5.cc yolov3.cc mobilenetssd.cc $(COMMON_SRC) -o detection/tflite_detection $(LDFLAGS) $(LIBS) $(CXXFLAGS) $(CCFLAGS) $(INCLUDES)
118118

119119
clean:
120120
rm -rf classification/tflite_classification segmentation/tflite_segmentation detection/tflite_detection

detection.cc

+12-4
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <utility>
3232
#include <vector>
3333

34+
#include "mobilenetssd.h"
3435
#include "model_utils.h"
3536
#include "opencv2/core.hpp"
3637
#include "opencv2/highgui.hpp"
@@ -51,8 +52,12 @@ detection/yolov5s_ultralytics_640_quantized.tflite -i detection/bus.jpg -l \
5152
detection/labels.txt -c 1 -b 0 -s 255 -t 1 -v 5
5253
5354
qemu-riscv64 detection/tflite_detection -m \
54-
detection/yolov3_keras_416_quantized.tflite -i detection/bus.jpg -l \
55+
detection/yolov3_keras_416_quantized.tflite -i detection/grace_hopper.bmp -l \
5556
detection/labels.txt -c 1 -b 0 -s 255 -t 1 -v 3
57+
58+
qemu-riscv64 detection/tflite_detection -m \
59+
/ssd_mobilenet_v2_2.tflite -i detection/zidane.jpg \
60+
-l detection/coco.txt -c 1 -b 0 -s 255 -t 1 -v ssd
5661
*/
5762

5863
/*
@@ -71,7 +76,7 @@ void display_usage() {
7176
<< "--input_std, -s: input standard deviation\n"
7277
<< "--profiling, -p: [0|1], profiling or not\n"
7378
<< "--threads, -t: number of threads\n"
74-
<< "--model-version, -v: yolo version\n"
79+
<< "--model-version, -v: yolo version or ssd\n"
7580
<< "\n";
7681
}
7782

@@ -158,9 +163,12 @@ int main(int argc, char **argv) {
158163
YOLOV5 *model = NULL;
159164
if (yolo_version == 3) {
160165
model = new YOLOV3;
161-
} else {
166+
} else if (yolo_version == 5) {
162167
model = new YOLOV5;
168+
} else {
169+
model = new MobileNetSSD;
163170
}
171+
164172
if (!model) {
165173
exit(-1);
166174
}
@@ -205,7 +213,7 @@ int main(int argc, char **argv) {
205213
auto box = boxes[i];
206214
auto score = scores[i];
207215
auto label = labels[i];
208-
cv::rectangle(show_image, box, cv::Scalar(255, 0, 0), 2);
216+
cv::rectangle(show_image, box, cv::Scalar(0, 255, 0), 2);
209217
cv::putText(show_image, labelNames[label], cv::Point(box.x, box.y),
210218
cv::FONT_HERSHEY_COMPLEX, 1.0, cv::Scalar(255, 255, 255), 1,
211219
cv::LINE_AA);

detection/coco.txt

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
???
2+
person
3+
bicycle
4+
car
5+
motorcycle
6+
airplane
7+
bus
8+
train
9+
truck
10+
boat
11+
traffic light
12+
fire hydrant
13+
???
14+
stop sign
15+
parking meter
16+
bench
17+
bird
18+
cat
19+
dog
20+
horse
21+
sheep
22+
cow
23+
elephant
24+
bear
25+
zebra
26+
giraffe
27+
???
28+
backpack
29+
umbrella
30+
???
31+
???
32+
handbag
33+
tie
34+
suitcase
35+
frisbee
36+
skis
37+
snowboard
38+
sports ball
39+
kite
40+
baseball bat
41+
baseball glove
42+
skateboard
43+
surfboard
44+
tennis racket
45+
bottle
46+
???
47+
wine glass
48+
cup
49+
fork
50+
knife
51+
spoon
52+
bowl
53+
banana
54+
apple
55+
sandwich
56+
orange
57+
broccoli
58+
carrot
59+
hot dog
60+
pizza
61+
donut
62+
cake
63+
chair
64+
couch
65+
potted plant
66+
bed
67+
???
68+
dining table
69+
???
70+
???
71+
toilet
72+
???
73+
tv
74+
laptop
75+
mouse
76+
remote
77+
keyboard
78+
cell phone
79+
microwave
80+
oven
81+
toaster
82+
sink
83+
refrigerator
84+
???
85+
book
86+
clock
87+
vase
88+
scissors
89+
teddy bear
90+
hair drier
91+
toothbrush

detection/image.jpg

-220 KB
Binary file not shown.

detection/ssd_mobilenet_v2_2.tflite

7.4 MB
Binary file not shown.

mobilenetssd.cc

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#include "mobilenetssd.h"
2+
3+
// void MobileNetSSD::preprocess(cv::Mat &image) {
4+
// if (image.data) {
5+
// // std::cout << "Cvt Color Space to RGB\n";
6+
// // cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
7+
// std::cout << "Resize to fit input Shape\n";
8+
// cv::resize(image, image, cv::Size(_in_height, _in_width));
9+
// // cv::imwrite("image.jpg", image);
10+
// // image.convertTo(image, CV_32FC3);
11+
// } else {
12+
// std::cout << "input image is empty!\n";
13+
// std::cout << __FILE__ << ": " << __LINE__ << std::endl;
14+
// exit(-1);
15+
// }
16+
// }
17+
18+
void MobileNetSSD::run(cv::Mat &frame, Prediction &out_pred) {
19+
do {
20+
if (!frame.data) {
21+
std::cout << "input image is empty!\n";
22+
std::cout << __FILE__ << ": " << __LINE__ << std::endl;
23+
exit(-1);
24+
}
25+
26+
_img_height = frame.rows;
27+
_img_width = frame.cols;
28+
29+
preprocess(frame);
30+
if (_in_type == kTfLiteFloat32) {
31+
fill(_input_f32, frame);
32+
} else if (_in_type == kTfLiteUInt8) {
33+
fill(_input_u8, frame);
34+
}
35+
36+
// Inference
37+
std::cout << "Run inference!!\n";
38+
TfLiteStatus status = _interpreter->Invoke();
39+
if (status != kTfLiteOk) {
40+
std::cout << "\nFailed to run inference!!\n";
41+
std::cout << __FILE__ << ": " << __LINE__ << std::endl;
42+
exit(-1);
43+
}
44+
45+
for (size_t i = 0; i < _interpreter->outputs().size(); i++) {
46+
TfLiteIntArray *out_dims =
47+
_interpreter->tensor(_interpreter->outputs()[i])->dims;
48+
TfLiteType out_type =
49+
_interpreter->tensor(_interpreter->outputs()[i])->type;
50+
int out_batch = out_dims->data[0];
51+
int out_row = out_dims->data[1];
52+
int out_colum = out_dims->data[2];
53+
int out_channel = out_dims->data[3];
54+
std::cout << "YOLO Model Output Shape:[" << out_batch << "][" << out_row
55+
<< "][" << out_colum << "][" << out_channel << "] "
56+
<< " type: [" << out_type << "]\n";
57+
}
58+
59+
size_t N = _interpreter->tensor(_interpreter->outputs()[5])->data.f[0];
60+
float *detection_boxes =
61+
_interpreter->tensor(_interpreter->outputs()[1])->data.f;
62+
float *detection_classes =
63+
_interpreter->tensor(_interpreter->outputs()[2])->data.f;
64+
float *detection_scores =
65+
_interpreter->tensor(_interpreter->outputs()[4])->data.f;
66+
67+
for (int i = 0; i < N; i++) {
68+
if (detection_scores[i] > _conf_threshold) {
69+
out_pred.scores.push_back(detection_scores[i]);
70+
out_pred.labels.push_back(floor(detection_classes[i] + 0.5f));
71+
cv::Rect bbox;
72+
bbox.y = detection_boxes[4 * i] * _img_height;
73+
bbox.x = detection_boxes[4 * i + 1] * _img_width;
74+
bbox.height =
75+
(detection_boxes[4 * i + 2] - detection_boxes[4 * i]) * _img_height;
76+
bbox.width = (detection_boxes[4 * i + 3] - detection_boxes[4 * i + 1]) *
77+
_img_width;
78+
out_pred.boxes.push_back(bbox);
79+
}
80+
}
81+
} while (0);
82+
};

mobilenetssd.h

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* zhangdanfeng 2022
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include "yolov5.h"
20+
21+
class MobileNetSSD : public YOLOV5 {
22+
public:
23+
MobileNetSSD() {
24+
_mean = 0;
25+
_std = 1;
26+
};
27+
// virtual void preprocess(cv::Mat &image) override;
28+
virtual void run(cv::Mat &image, Prediction &out_pred) override;
29+
};

yolov5.cc

+21-4
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,17 @@ void YOLOV5::loadModel(const std::string path) {
6161
_in_width = dims->data[2];
6262
_in_channels = dims->data[3];
6363
_in_type = _interpreter->tensor(_input)->type;
64-
_input_f32 = _interpreter->typed_tensor<float_t>(_input);
64+
65+
std::cout << "YOLO Model Input type: " << _in_type << "\n";
66+
if (_in_type == kTfLiteFloat32) {
67+
_input_f32 = _interpreter->typed_tensor<float_t>(_input);
68+
} else if (_in_type == kTfLiteUInt8) {
69+
_input_u8 = _interpreter->typed_tensor<uint8_t>(_input);
70+
} else {
71+
std::cout << "YOLO Model Input type donot support yet\n";
72+
exit(0);
73+
}
74+
6575
std::cout << "YOLO Model Input Shape:[1][" << _in_height << "][" << _in_width
6676
<< "][" << _in_channels << "]\n";
6777
_interpreter->SetNumThreads(_n_threads);
@@ -87,7 +97,7 @@ template <typename T> void YOLOV5::fill(T *in, cv::Mat &src) {
8797
uchar *ptr = src.data;
8898
for (size_t i = 0; i < src.rows; i++) {
8999
for (size_t j = 0; j < src.cols * 3; j++) {
90-
in[i * src.cols * 3 + j] = ((float_t)(ptr[j]) - _mean) / _std;
100+
in[i * src.cols * 3 + j] = ((T)(ptr[j]) - _mean) / _std;
91101
}
92102
ptr += src.step;
93103
}
@@ -202,7 +212,11 @@ void YOLOV5::run(cv::Mat &frame, Prediction &out_pred) {
202212
_img_width = frame.cols;
203213

204214
preprocess(frame);
205-
fill(_input_f32, frame);
215+
if (_in_type == kTfLiteFloat32) {
216+
fill(_input_f32, frame);
217+
} else if (_in_type == kTfLiteUInt8) {
218+
fill(_input_u8, frame);
219+
}
206220

207221
// Inference
208222
std::cout << "Run inference!!\n";
@@ -216,12 +230,15 @@ void YOLOV5::run(cv::Mat &frame, Prediction &out_pred) {
216230
for (size_t i = 0; i < _interpreter->outputs().size(); i++) {
217231
TfLiteIntArray *out_dims =
218232
_interpreter->tensor(_interpreter->outputs()[i])->dims;
233+
TfLiteType out_type =
234+
_interpreter->tensor(_interpreter->outputs()[i])->type;
219235
int out_batch = out_dims->data[0];
220236
int out_row = out_dims->data[1];
221237
int out_colum = out_dims->data[2];
222238
int out_channel = out_dims->data[3];
223239
std::cout << "YOLO Model Output Shape:[" << out_batch << "][" << out_row
224-
<< "][" << out_colum << "][" << out_channel << "]\n";
240+
<< "][" << out_colum << "][" << out_channel << "] "
241+
<< " type: [" << out_type << "]\n";
225242
}
226243

227244
std::vector<std::vector<float>> predV = tensorToVector2D();

yolov5.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ struct Prediction {
4141
class YOLOV5 {
4242
public:
4343
// Take a model path as string
44-
void loadModel(const std::string path);
44+
virtual void loadModel(const std::string path);
4545
// Take an image and return a prediction
46-
void run(cv::Mat &image, Prediction &out_pred);
46+
virtual void run(cv::Mat &image, Prediction &out_pred);
4747

4848
void getLabelsName(std::string path, std::vector<std::string> &labelNames);
4949

@@ -79,9 +79,10 @@ class YOLOV5 {
7979

8080
// Input of the interpreter
8181
float_t *_input_f32;
82+
uint8_t *_input_u8;
8283

8384
template <typename T> void fill(T *in, cv::Mat &src);
84-
void preprocess(cv::Mat &image);
85+
virtual void preprocess(cv::Mat &image);
8586
virtual std::vector<std::vector<float>> tensorToVector2D();
8687
virtual void nonMaximumSupprition(std::vector<std::vector<float>> &predV,
8788
std::vector<cv::Rect> &boxes,

0 commit comments

Comments
 (0)