FIX: Fine tuning of ImageNet models, adding checkpoint scope parameter.

balancap · balancap · commit 6eb6e7425a33 · 2017-03-09T10:57:25.000Z
diff --git a/COMMANDS.md b/COMMANDS.md
@@ -83,16 +83,27 @@ python eval_ssd_network.py \
     --batch_size=1 \
     --max_num_batches=10
 
-
-
-python eval_image_classifier.py \
-    --alsologtostderr \
-    --checkpoint_path=${CHECKPOINT_PATH} \
+# =========================================================================== #
+# Fine tune VGG-based SSD network
+# =========================================================================== #
+DATASET_DIR=/media/paul/DataExt4/PascalVOC/dataset
+TRAIN_DIR=./logs/ssd_300_vgg_tmp
+CHECKPOINT_PATH=./checkpoints/vgg_16.ckpt
+python train_ssd_network.py \
+    --train_dir=${TRAIN_DIR} \
     --dataset_dir=${DATASET_DIR} \
-    --dataset_name=imagenet \
-    --dataset_split_name=validation \
-    --model_name=inception_v3
-
+    --dataset_name=pascalvoc_2007 \
+    --dataset_split_name=train \
+    --model_name=ssd_300_vgg \
+    --checkpoint_path=${CHECKPOINT_PATH} \
+    --checkpoint_model_scope=vgg_16 \
+    --checkpoint_exclude_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \
+    --save_summaries_secs=60 \
+    --save_interval_secs=600 \
+    --weight_decay=0.00001 \
+    --optimizer=rmsprop \
+    --learning_rate=0.0001 \
+    --batch_size=32
 
 
 python train_ssd_network.py     --train_dir=${TRAIN_DIR}     --dataset_dir=${DATASET_DIR}     --checkpoint_path=${CHECKPOINT_PATH}     --checkpoint_exclude_scopes=ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box     --dataset_name=kitti     --dataset_split_name=train     --model_name=ssd_300_vgg     --save_summaries_secs=60     --save_interval_secs=60     --weight_decay=0.0005     --optimizer=adam     --learning_rate=0.0001     --batch_size=8
diff --git a/README.md b/README.md
@@ -133,10 +133,11 @@ python train_ssd_network.py \
     --dataset_split_name=train \
     --model_name=ssd_300_vgg \
     --checkpoint_path=${CHECKPOINT_PATH} \
-    --checkpoint_exclude_scopes=ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \
+    --checkpoint_model_scope=vgg_16 \
+    --checkpoint_exclude_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box \
     --save_summaries_secs=60 \
     --save_interval_secs=600 \
-    --weight_decay=0.00001 \
+    --weight_decay=0.0005 \
     --optimizer=rmsprop \
     --learning_rate=0.0001 \
     --batch_size=32
diff --git a/inspect_checkpoint.py b/inspect_checkpoint.py
@@ -0,0 +1,131 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A simple script for inspect checkpoint files."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import sys
+
+import numpy as np
+
+from tensorflow.python import pywrap_tensorflow
+from tensorflow.python.platform import app
+from tensorflow.python.platform import flags
+
+FLAGS = None
+
+
+def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors):
+    """Prints tensors in a checkpoint file.
+
+    If no `tensor_name` is provided, prints the tensor names and shapes
+    in the checkpoint file.
+
+    If `tensor_name` is provided, prints the content of the tensor.
+
+    Args:
+        file_name: Name of the checkpoint file.
+        tensor_name: Name of the tensor in the checkpoint file to print.
+        all_tensors: Boolean indicating whether to print all tensors.
+    """
+    try:
+        reader = pywrap_tensorflow.NewCheckpointReader(file_name)
+        if all_tensors:
+            var_to_shape_map = reader.get_variable_to_shape_map()
+            for key in var_to_shape_map:
+                print("tensor_name: ", key)
+                print(reader.get_tensor(key))
+        elif not tensor_name:
+            print(reader.debug_string().decode("utf-8"))
+        else:
+            print("tensor_name: ", tensor_name)
+            print(reader.get_tensor(tensor_name))
+    except Exception as e:  # pylint: disable=broad-except
+        print(str(e))
+        if "corrupted compressed block contents" in str(e):
+            print("It's likely that your checkpoint file has been compressed "
+                  "with SNAPPY.")
+
+
+def parse_numpy_printoption(kv_str):
+    """Sets a single numpy printoption from a string of the form 'x=y'.
+
+    See documentation on numpy.set_printoptions() for details about what values
+    x and y can take. x can be any option listed there other than 'formatter'.
+
+    Args:
+        kv_str: A string of the form 'x=y', such as 'threshold=100000'
+
+    Raises:
+        argparse.ArgumentTypeError: If the string couldn't be used to set any
+                nump printoption.
+    """
+    k_v_str = kv_str.split("=", 1)
+    if len(k_v_str) != 2 or not k_v_str[0]:
+        raise argparse.ArgumentTypeError("'%s' is not in the form k=v." % kv_str)
+    k, v_str = k_v_str
+    printoptions = np.get_printoptions()
+    if k not in printoptions:
+        raise argparse.ArgumentTypeError("'%s' is not a valid printoption." % k)
+    v_type = type(printoptions[k])
+    if v_type is type(None):
+        raise argparse.ArgumentTypeError(
+                "Setting '%s' from the command line is not supported." % k)
+    try:
+        v = (v_type(v_str) if v_type is not bool
+             else flags.BooleanParser().Parse(v_str))
+    except ValueError as e:
+        raise argparse.ArgumentTypeError(e.message)
+    np.set_printoptions(**{k: v})
+
+
+def main(unused_argv):
+    if not FLAGS.file_name:
+        print("Usage: inspect_checkpoint --file_name=checkpoint_file_name "
+              "[--tensor_name=tensor_to_print]")
+        sys.exit(1)
+    else:
+        print_tensors_in_checkpoint_file(FLAGS.file_name, FLAGS.tensor_name,
+                                         FLAGS.all_tensors)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.register("type", "bool", lambda v: v.lower() == "true")
+    parser.add_argument(
+            "--file_name", type=str, default="", help="Checkpoint filename. "
+                                        "Note, if using Checkpoint V2 format, file_name is the "
+                                        "shared prefix between all files in the checkpoint.")
+    parser.add_argument(
+            "--tensor_name",
+            type=str,
+            default="",
+            help="Name of the tensor to inspect")
+    parser.add_argument(
+            "--all_tensors",
+            nargs="?",
+            const=True,
+            type="bool",
+            default=False,
+            help="If True, print the values of all the tensors.")
+    parser.add_argument(
+            "--printoptions",
+            nargs="*",
+            type=parse_numpy_printoption,
+            help="Argument for numpy.set_printoptions(), in the form 'k=v'.")
+    FLAGS, unparsed = parser.parse_known_args()
+    app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tf_utils.py b/tf_utils.py
@@ -179,6 +179,10 @@ def add_variables_summaries(learning_rate):
     return summaries
 
 
+def update_model_scope(var, ckpt_scope, new_scope):
+    return var.op.name.replace(new_scope,'vgg_16')
+
+
 def get_init_fn(flags):
     """Returns a function run by the chief worker to warm-start the training.
     Note that the init_fn is only run when initializing the model during the very
@@ -211,6 +215,13 @@ def get_init_fn(flags):
                 break
         if not excluded:
             variables_to_restore.append(var)
+    # Change model scope if necessary.
+    if flags.checkpoint_model_scope is not None:
+        variables_to_restore = \
+            {var.op.name.replace(flags.model_name,
+                                 flags.checkpoint_model_scope): var
+             for var in variables_to_restore}
+
 
     if tf.gfile.IsDirectory(flags.checkpoint_path):
         checkpoint_path = tf.train.latest_checkpoint(flags.checkpoint_path)
diff --git a/train_ssd_network.py b/train_ssd_network.py
@@ -141,7 +141,7 @@
     'evaluate the VGG and ResNet architectures which do not use a background '
     'class for the ImageNet dataset.')
 tf.app.flags.DEFINE_string(
-    'model_name', 'inception_v3', 'The name of the architecture to train.')
+    'model_name', 'ssd_300_vgg', 'The name of the architecture to train.')
 tf.app.flags.DEFINE_string(
     'preprocessing_name', None, 'The name of the preprocessing to use. If left '
     'as `None`, then the model_name flag is used.')
@@ -158,6 +158,9 @@
 tf.app.flags.DEFINE_string(
     'checkpoint_path', None,
     'The path to a checkpoint from which to fine-tune.')
+tf.app.flags.DEFINE_string(
+    'checkpoint_model_scope', None,
+    'Model scope in the checkpoint. None if the same as the trained model.')
 tf.app.flags.DEFINE_string(
     'checkpoint_exclude_scopes', None,
     'Comma-separated list of scopes of variables to exclude when restoring '