tensorflow
diff --git a/‎CODEOWNERS
Lines changed: 2 additions & 1 deletion b/‎CODEOWNERS
Lines changed: 2 additions & 1 deletion
diff --git a/‎research/lstm_object_detection/README
Lines changed: 13 additions & 0 deletions b/‎research/lstm_object_detection/README
Lines changed: 13 additions & 0 deletions
diff --git a/‎research/lstm_object_detection/__init__.py b/‎research/lstm_object_detection/__init__.py
diff --git a/‎research/lstm_object_detection/configs/lstm_ssd_mobilenet_v1_imagenet.config
Lines changed: 232 additions & 0 deletions b/‎research/lstm_object_detection/configs/lstm_ssd_mobilenet_v1_imagenet.config
Lines changed: 232 additions & 0 deletions
diff --git a/‎research/lstm_object_detection/eval.py
Lines changed: 110 additions & 0 deletions b/‎research/lstm_object_detection/eval.py
Lines changed: 110 additions & 0 deletions
@@ -30,6 +30,7 @@
 /research/lfads/ @jazcollins @susillo
 /research/lm_1b/ @oriolvinyals @panyx0718
 /research/lm_commonsense/ @thtrieu
+/research/lstm_object_detection/ @dreamdragon
 /research/marco/ @vincentvanhoucke
 /research/maskgan/ @a-dai
 /research/morph_net/ @gariel-google
@@ -63,4 +64,4 @@
 /tutorials/embedding/ @zffchen78 @a-dai
 /tutorials/image/ @sherrym @shlens
 /tutorials/image/cifar10_estimator/ @tfboyd @protoget
-/tutorials/rnn/ @lukaszkaiser @ebrevdo
+/tutorials/rnn/ @lukaszkaiser @ebrevdo
@@ -0,0 +1,13 @@
+Tensorflow mobile video object detection implementation porposed in the following paper:
+Mobile Video Object Detection with Temporally-Aware Feature Maps (CVPR 2018).
+
+http://openaccess.thecvf.com/content_cvpr_2018/papers/Liu_Mobile_Video_Object_CVPR_2018_paper.pdf
+
+@article{liu2017mobile,
+  title={Mobile Video Object Detection with Temporally-Aware Feature Maps},
+  author={Liu, Mason and Zhu, Menglong},
+  journal={CVPR},
+  year={2018}
+}
+
+If you have any questions regarding this codebase, please contact [email protected]
@@ -0,0 +1,232 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# For training on Imagenet Video with LSTM Mobilenet V1
+
+[object_detection.protos.lstm_model] {
+  train_unroll_length: 4
+  eval_unroll_length: 4
+}
+
+model {
+  ssd {
+    num_classes: 30
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    anchor_generator {
+      ssd_anchor_generator {
+        num_layers: 5
+        min_scale: 0.2
+        max_scale: 0.95
+        aspect_ratios: 1.0
+        aspect_ratios: 2.0
+        aspect_ratios: 0.5
+        aspect_ratios: 3.0
+        aspect_ratios: 0.3333
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 256
+        width: 256
+      }
+    }
+    box_predictor {
+      convolutional_box_predictor {
+        min_depth: 0
+        max_depth: 0
+        num_layers_before_predictor: 3
+        use_dropout: false
+        dropout_keep_probability: 0.8
+        kernel_size: 3
+        box_code_size: 4
+        apply_sigmoid_to_scores: false
+        use_depthwise: true
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            truncated_normal_initializer {
+              stddev: 0.03
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            train: true,
+            scale: true,
+            center: true,
+            decay: 0.9997,
+            epsilon: 0.001,
+          }
+        }
+      }
+    }
+    feature_extractor {
+      type: 'lstm_mobilenet_v1'
+      min_depth: 16
+      depth_multiplier: 1.0
+      use_depthwise: true
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          train: true,
+          scale: true,
+          center: true,
+          decay: 0.9997,
+          epsilon: 0.001,
+        }
+      }
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid {
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      hard_example_miner {
+        num_hard_examples: 3000
+        iou_threshold: 0.99
+        loss_type: CLASSIFICATION
+        max_negatives_per_positive: 3
+        min_negatives_per_image: 0
+      }
+      classification_weight: 1.0
+      localization_weight: 4.0
+    }
+    normalize_loss_by_num_matches: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: -20.0
+        iou_threshold: 0.5
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  batch_size: 8
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    ssd_random_crop {
+    }
+  }
+  optimizer {
+    use_moving_average: false
+    rms_prop_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.002
+          decay_steps: 200000
+          decay_factor: 0.95
+        }
+      }
+      momentum_optimizer_value: 0.9
+      decay: 0.9
+      epsilon: 1.0
+    }
+  }
+
+  from_detection_checkpoint: true
+  gradient_clipping_by_norm: 10.0
+  batch_queue_capacity: 12
+  prefetch_queue_capacity: 4
+  fine_tune_checkpoint: "/path/to/checkpoint/"
+  fine_tune_checkpoint_type: "detection"
+}
+
+
+train_input_reader: {
+  shuffle_buffer_size: 32
+  queue_capacity: 12
+  prefetch_size: 12
+  min_after_dequeue: 4
+  label_map_path: "path/to/label_map"
+  external_input_reader {
+    [lstm_object_detection.input_readers.GoogleInputReader.google_input_reader] {
+      tf_record_video_input_reader: {
+        input_path: "your/cns/path"
+        data_type: TF_SEQUENCE_EXAMPLE
+        video_length: 4
+      }
+    }
+  }
+}
+
+eval_config: {
+  metrics_set: "coco_evaluation_last_frame"
+  use_moving_averages: true
+  min_score_threshold: 0.5
+  max_num_boxes_to_visualize: 300
+  visualize_groundtruth_boxes: true
+  groundtruth_box_visualization_color: "red"
+}
+
+eval_input_reader: {
+  label_map_path: "path/to/label_map"
+  external_input_reader {
+    [lstm_object_detection.input_readers.GoogleInputReader.google_input_reader] {
+      tf_record_video_input_reader: {
+        input_path: "your/cns/path"
+        data_type: TF_SEQUENCE_EXAMPLE
+        video_length: 4
+      }
+    }
+  }
+  shuffle: true
+  num_readers: 1
+}
@@ -0,0 +1,110 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Evaluation executable for detection models.
+
+This executable is used to evaluate DetectionModels. Example usage:
+    ./eval \
+        --logtostderr \
+        --checkpoint_dir=path/to/checkpoint_dir \
+        --eval_dir=path/to/eval_dir \
+        --pipeline_config_path=pipeline_config.pbtxt
+"""
+
+import functools
+import os
+import tensorflow as tf
+from google.protobuf import text_format
+from google3.pyglib import app
+from google3.pyglib import flags
+from lstm_object_detection import evaluator
+from lstm_object_detection import model_builder
+from lstm_object_detection import seq_dataset_builder
+from lstm_object_detection.utils import config_util
+from google3.third_party.tensorflow_models.object_detection.utils import label_map_util
+
+tf.logging.set_verbosity(tf.logging.INFO)
+flags = tf.app.flags
+flags.DEFINE_boolean('eval_training_data', False,
+                     'If training data should be evaluated for this job.')
+flags.DEFINE_string('checkpoint_dir', '',
+                    'Directory containing checkpoints to evaluate, typically '
+                    'set to `train_dir` used in the training job.')
+flags.DEFINE_string('eval_dir', '', 'Directory to write eval summaries to.')
+flags.DEFINE_string('pipeline_config_path', '',
+                    'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
+                    'file. If provided, other configs are ignored')
+flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of '
+                     'evaluation. Overrides the `max_evals` parameter in the '
+                     'provided config.')
+FLAGS = flags.FLAGS
+
+
+def main(unused_argv):
+  assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
+  assert FLAGS.eval_dir, '`eval_dir` is missing.'
+  if FLAGS.pipeline_config_path:
+    configs = config_util.get_configs_from_pipeline_file(
+        FLAGS.pipeline_config_path)
+  else:
+    configs = config_util.get_configs_from_multiple_files(
+        model_config_path=FLAGS.model_config_path,
+        eval_config_path=FLAGS.eval_config_path,
+        eval_input_config_path=FLAGS.input_config_path)
+
+  pipeline_proto = config_util.create_pipeline_proto_from_configs(configs)
+  config_text = text_format.MessageToString(pipeline_proto)
+  tf.gfile.MakeDirs(FLAGS.eval_dir)
+  with tf.gfile.Open(os.path.join(FLAGS.eval_dir, 'pipeline.config'),
+                     'wb') as f:
+    f.write(config_text)
+
+  model_config = configs['model']
+  lstm_config = configs['lstm_model']
+  eval_config = configs['eval_config']
+  input_config = configs['eval_input_config']
+
+  if FLAGS.eval_training_data:
+    input_config.external_input_reader.CopyFrom(
+        configs['train_input_config'].external_input_reader)
+    lstm_config.eval_unroll_length = lstm_config.train_unroll_length
+
+  model_fn = functools.partial(
+      model_builder.build,
+      model_config=model_config,
+      lstm_config=lstm_config,
+      is_training=False)
+
+  def get_next(config, model_config, lstm_config, unroll_length):
+    return seq_dataset_builder.build(config, model_config, lstm_config,
+                                     unroll_length)
+
+  create_input_dict_fn = functools.partial(get_next, input_config, model_config,
+                                           lstm_config,
+                                           lstm_config.eval_unroll_length)
+
+  label_map = label_map_util.load_labelmap(input_config.label_map_path)
+  max_num_classes = max([item.id for item in label_map.item])
+  categories = label_map_util.convert_label_map_to_categories(
+      label_map, max_num_classes)
+
+  if FLAGS.run_once:
+    eval_config.max_evals = 1
+
+  evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories,
+                     FLAGS.checkpoint_dir, FLAGS.eval_dir)
+
+if __name__ == '__main__':
+  app.run()