Skip to content

Commit 23376e6

Browse files
authored
Merge pull request #5603 from dreamdragon/master
Open souring mobile video object detection framework
2 parents b2522f9 + 77b2556 commit 23376e6

30 files changed

+4047
-1
lines changed

CODEOWNERS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
/research/lfads/ @jazcollins @susillo
3131
/research/lm_1b/ @oriolvinyals @panyx0718
3232
/research/lm_commonsense/ @thtrieu
33+
/research/lstm_object_detection/ @dreamdragon
3334
/research/marco/ @vincentvanhoucke
3435
/research/maskgan/ @a-dai
3536
/research/morph_net/ @gariel-google
@@ -63,4 +64,4 @@
6364
/tutorials/embedding/ @zffchen78 @a-dai
6465
/tutorials/image/ @sherrym @shlens
6566
/tutorials/image/cifar10_estimator/ @tfboyd @protoget
66-
/tutorials/rnn/ @lukaszkaiser @ebrevdo
67+
/tutorials/rnn/ @lukaszkaiser @ebrevdo

research/lstm_object_detection/README

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Tensorflow mobile video object detection implementation porposed in the following paper:
2+
Mobile Video Object Detection with Temporally-Aware Feature Maps (CVPR 2018).
3+
4+
http://openaccess.thecvf.com/content_cvpr_2018/papers/Liu_Mobile_Video_Object_CVPR_2018_paper.pdf
5+
6+
@article{liu2017mobile,
7+
title={Mobile Video Object Detection with Temporally-Aware Feature Maps},
8+
author={Liu, Mason and Zhu, Menglong},
9+
journal={CVPR},
10+
year={2018}
11+
}
12+
13+
If you have any questions regarding this codebase, please contact [email protected]

research/lstm_object_detection/__init__.py

Whitespace-only changes.
Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
16+
# For training on Imagenet Video with LSTM Mobilenet V1
17+
18+
[object_detection.protos.lstm_model] {
19+
train_unroll_length: 4
20+
eval_unroll_length: 4
21+
}
22+
23+
model {
24+
ssd {
25+
num_classes: 30
26+
box_coder {
27+
faster_rcnn_box_coder {
28+
y_scale: 10.0
29+
x_scale: 10.0
30+
height_scale: 5.0
31+
width_scale: 5.0
32+
}
33+
}
34+
matcher {
35+
argmax_matcher {
36+
matched_threshold: 0.5
37+
unmatched_threshold: 0.5
38+
ignore_thresholds: false
39+
negatives_lower_than_unmatched: true
40+
force_match_for_each_row: true
41+
}
42+
}
43+
similarity_calculator {
44+
iou_similarity {
45+
}
46+
}
47+
anchor_generator {
48+
ssd_anchor_generator {
49+
num_layers: 5
50+
min_scale: 0.2
51+
max_scale: 0.95
52+
aspect_ratios: 1.0
53+
aspect_ratios: 2.0
54+
aspect_ratios: 0.5
55+
aspect_ratios: 3.0
56+
aspect_ratios: 0.3333
57+
}
58+
}
59+
image_resizer {
60+
fixed_shape_resizer {
61+
height: 256
62+
width: 256
63+
}
64+
}
65+
box_predictor {
66+
convolutional_box_predictor {
67+
min_depth: 0
68+
max_depth: 0
69+
num_layers_before_predictor: 3
70+
use_dropout: false
71+
dropout_keep_probability: 0.8
72+
kernel_size: 3
73+
box_code_size: 4
74+
apply_sigmoid_to_scores: false
75+
use_depthwise: true
76+
conv_hyperparams {
77+
activation: RELU_6,
78+
regularizer {
79+
l2_regularizer {
80+
weight: 0.00004
81+
}
82+
}
83+
initializer {
84+
truncated_normal_initializer {
85+
stddev: 0.03
86+
mean: 0.0
87+
}
88+
}
89+
batch_norm {
90+
train: true,
91+
scale: true,
92+
center: true,
93+
decay: 0.9997,
94+
epsilon: 0.001,
95+
}
96+
}
97+
}
98+
}
99+
feature_extractor {
100+
type: 'lstm_mobilenet_v1'
101+
min_depth: 16
102+
depth_multiplier: 1.0
103+
use_depthwise: true
104+
conv_hyperparams {
105+
activation: RELU_6,
106+
regularizer {
107+
l2_regularizer {
108+
weight: 0.00004
109+
}
110+
}
111+
initializer {
112+
truncated_normal_initializer {
113+
stddev: 0.03
114+
mean: 0.0
115+
}
116+
}
117+
batch_norm {
118+
train: true,
119+
scale: true,
120+
center: true,
121+
decay: 0.9997,
122+
epsilon: 0.001,
123+
}
124+
}
125+
}
126+
loss {
127+
classification_loss {
128+
weighted_sigmoid {
129+
}
130+
}
131+
localization_loss {
132+
weighted_smooth_l1 {
133+
}
134+
}
135+
hard_example_miner {
136+
num_hard_examples: 3000
137+
iou_threshold: 0.99
138+
loss_type: CLASSIFICATION
139+
max_negatives_per_positive: 3
140+
min_negatives_per_image: 0
141+
}
142+
classification_weight: 1.0
143+
localization_weight: 4.0
144+
}
145+
normalize_loss_by_num_matches: true
146+
post_processing {
147+
batch_non_max_suppression {
148+
score_threshold: -20.0
149+
iou_threshold: 0.5
150+
max_detections_per_class: 100
151+
max_total_detections: 100
152+
}
153+
score_converter: SIGMOID
154+
}
155+
}
156+
}
157+
158+
train_config: {
159+
batch_size: 8
160+
data_augmentation_options {
161+
random_horizontal_flip {
162+
}
163+
}
164+
data_augmentation_options {
165+
ssd_random_crop {
166+
}
167+
}
168+
optimizer {
169+
use_moving_average: false
170+
rms_prop_optimizer: {
171+
learning_rate: {
172+
exponential_decay_learning_rate {
173+
initial_learning_rate: 0.002
174+
decay_steps: 200000
175+
decay_factor: 0.95
176+
}
177+
}
178+
momentum_optimizer_value: 0.9
179+
decay: 0.9
180+
epsilon: 1.0
181+
}
182+
}
183+
184+
from_detection_checkpoint: true
185+
gradient_clipping_by_norm: 10.0
186+
batch_queue_capacity: 12
187+
prefetch_queue_capacity: 4
188+
fine_tune_checkpoint: "/path/to/checkpoint/"
189+
fine_tune_checkpoint_type: "detection"
190+
}
191+
192+
193+
train_input_reader: {
194+
shuffle_buffer_size: 32
195+
queue_capacity: 12
196+
prefetch_size: 12
197+
min_after_dequeue: 4
198+
label_map_path: "path/to/label_map"
199+
external_input_reader {
200+
[lstm_object_detection.input_readers.GoogleInputReader.google_input_reader] {
201+
tf_record_video_input_reader: {
202+
input_path: "your/cns/path"
203+
data_type: TF_SEQUENCE_EXAMPLE
204+
video_length: 4
205+
}
206+
}
207+
}
208+
}
209+
210+
eval_config: {
211+
metrics_set: "coco_evaluation_last_frame"
212+
use_moving_averages: true
213+
min_score_threshold: 0.5
214+
max_num_boxes_to_visualize: 300
215+
visualize_groundtruth_boxes: true
216+
groundtruth_box_visualization_color: "red"
217+
}
218+
219+
eval_input_reader: {
220+
label_map_path: "path/to/label_map"
221+
external_input_reader {
222+
[lstm_object_detection.input_readers.GoogleInputReader.google_input_reader] {
223+
tf_record_video_input_reader: {
224+
input_path: "your/cns/path"
225+
data_type: TF_SEQUENCE_EXAMPLE
226+
video_length: 4
227+
}
228+
}
229+
}
230+
shuffle: true
231+
num_readers: 1
232+
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
16+
r"""Evaluation executable for detection models.
17+
18+
This executable is used to evaluate DetectionModels. Example usage:
19+
./eval \
20+
--logtostderr \
21+
--checkpoint_dir=path/to/checkpoint_dir \
22+
--eval_dir=path/to/eval_dir \
23+
--pipeline_config_path=pipeline_config.pbtxt
24+
"""
25+
26+
import functools
27+
import os
28+
import tensorflow as tf
29+
from google.protobuf import text_format
30+
from google3.pyglib import app
31+
from google3.pyglib import flags
32+
from lstm_object_detection import evaluator
33+
from lstm_object_detection import model_builder
34+
from lstm_object_detection import seq_dataset_builder
35+
from lstm_object_detection.utils import config_util
36+
from google3.third_party.tensorflow_models.object_detection.utils import label_map_util
37+
38+
tf.logging.set_verbosity(tf.logging.INFO)
39+
flags = tf.app.flags
40+
flags.DEFINE_boolean('eval_training_data', False,
41+
'If training data should be evaluated for this job.')
42+
flags.DEFINE_string('checkpoint_dir', '',
43+
'Directory containing checkpoints to evaluate, typically '
44+
'set to `train_dir` used in the training job.')
45+
flags.DEFINE_string('eval_dir', '', 'Directory to write eval summaries to.')
46+
flags.DEFINE_string('pipeline_config_path', '',
47+
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
48+
'file. If provided, other configs are ignored')
49+
flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of '
50+
'evaluation. Overrides the `max_evals` parameter in the '
51+
'provided config.')
52+
FLAGS = flags.FLAGS
53+
54+
55+
def main(unused_argv):
56+
assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
57+
assert FLAGS.eval_dir, '`eval_dir` is missing.'
58+
if FLAGS.pipeline_config_path:
59+
configs = config_util.get_configs_from_pipeline_file(
60+
FLAGS.pipeline_config_path)
61+
else:
62+
configs = config_util.get_configs_from_multiple_files(
63+
model_config_path=FLAGS.model_config_path,
64+
eval_config_path=FLAGS.eval_config_path,
65+
eval_input_config_path=FLAGS.input_config_path)
66+
67+
pipeline_proto = config_util.create_pipeline_proto_from_configs(configs)
68+
config_text = text_format.MessageToString(pipeline_proto)
69+
tf.gfile.MakeDirs(FLAGS.eval_dir)
70+
with tf.gfile.Open(os.path.join(FLAGS.eval_dir, 'pipeline.config'),
71+
'wb') as f:
72+
f.write(config_text)
73+
74+
model_config = configs['model']
75+
lstm_config = configs['lstm_model']
76+
eval_config = configs['eval_config']
77+
input_config = configs['eval_input_config']
78+
79+
if FLAGS.eval_training_data:
80+
input_config.external_input_reader.CopyFrom(
81+
configs['train_input_config'].external_input_reader)
82+
lstm_config.eval_unroll_length = lstm_config.train_unroll_length
83+
84+
model_fn = functools.partial(
85+
model_builder.build,
86+
model_config=model_config,
87+
lstm_config=lstm_config,
88+
is_training=False)
89+
90+
def get_next(config, model_config, lstm_config, unroll_length):
91+
return seq_dataset_builder.build(config, model_config, lstm_config,
92+
unroll_length)
93+
94+
create_input_dict_fn = functools.partial(get_next, input_config, model_config,
95+
lstm_config,
96+
lstm_config.eval_unroll_length)
97+
98+
label_map = label_map_util.load_labelmap(input_config.label_map_path)
99+
max_num_classes = max([item.id for item in label_map.item])
100+
categories = label_map_util.convert_label_map_to_categories(
101+
label_map, max_num_classes)
102+
103+
if FLAGS.run_once:
104+
eval_config.max_evals = 1
105+
106+
evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories,
107+
FLAGS.checkpoint_dir, FLAGS.eval_dir)
108+
109+
if __name__ == '__main__':
110+
app.run()

0 commit comments

Comments
 (0)