DELF updates (#9095)

dan-anghel · andrefaraujo · web-flow · commit a003b7c18c87 · 2020-08-13T20:48:33.000-07:00
* Merged commit includes the following changes:
326369548  by Andre Araujo:

    Fix import issues.

--
326159826  by Andre Araujo:

    Changed the implementation of the cosine weights from Keras layer to tf.Variable to manually control for L2 normalization.

--
326139082  by Andre Araujo:

    Support local feature matching using ratio test.

    To allow for easily choosing which matching type to use, we rename a flag/argument and modify all related files to avoid breakages.

    Also include a small change when computing nearest neighbors for geometric matching, to parallelize computation, which saves a little bit of time during execution (argument "n_jobs=-1").

--
326119848  by Andre Araujo:

    Option to measure DELG latency taking binarization into account.

--
324316608  by Andre Araujo:

    DELG global features training.

--
323693131  by Andre Araujo:

    PY3 conversion for delf public lib.

--
321046157  by Andre Araujo:

    Purely Google refactor

--

PiperOrigin-RevId: 326369548

* Added export of delg_model module.

Co-authored-by: Andre Araujo &lt;andrearaujo@google.com&gt;
diff --git a/research/delf/delf/python/delg/extract_features.py b/research/delf/delf/python/delg/extract_features.py
@@ -1,3 +1,4 @@
+# Lint as: python3
 # Copyright 2020 The TensorFlow Authors All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/research/delf/delf/python/delg/measure_latency.py b/research/delf/delf/python/delg/measure_latency.py
@@ -42,6 +42,11 @@
                     'Path to list of images whose features will be extracted.')
 flags.DEFINE_integer('repeat_per_image', 10,
                      'Number of times to repeat extraction per image.')
+flags.DEFINE_boolean(
+    'binary_local_features', False,
+    'Whether to binarize local features after extraction, and take this extra '
+    'latency into account. This should only be used if use_local_features is '
+    'set in the input DelfConfig from `delf_config_path`.')
 
 # Pace to report extraction log.
 _STATUS_CHECK_ITERATIONS = 100
@@ -103,6 +108,12 @@ def main(argv):
     # Extract and save features.
     extracted_features = extractor_fn(im)
 
+    # Binarize local features, if desired (and if there are local features).
+    if (config.use_local_features and FLAGS.binary_local_features and
+        extracted_features['local_features']['attention'].size):
+      packed_descriptors = np.packbits(
+          extracted_features['local_features']['descriptors'] > 0, axis=1)
+
 
 if __name__ == '__main__':
   app.run(main)
diff --git a/research/delf/delf/python/delg/perform_retrieval.py b/research/delf/delf/python/delg/perform_retrieval.py
@@ -1,3 +1,4 @@
+# Lint as: python3
 # Copyright 2020 The TensorFlow Authors All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -44,15 +45,19 @@
     'If True, performs re-ranking using local feature-based geometric '
     'verification.')
 flags.DEFINE_float(
-    'local_feature_distance_threshold', 1.0,
+    'local_descriptor_matching_threshold', 1.0,
     'Optional, only used if `use_geometric_verification` is True. '
-    'Distance threshold below which a pair of local descriptors is considered '
+    'Threshold below which a pair of local descriptors is considered '
     'a potential match, and will be fed into RANSAC.')
 flags.DEFINE_float(
     'ransac_residual_threshold', 20.0,
     'Optional, only used if `use_geometric_verification` is True. '
     'Residual error threshold for considering matches as inliers, used in '
     'RANSAC algorithm.')
+flags.DEFINE_boolean(
+    'use_ratio_test', False,
+    'Optional, only used if `use_geometric_verification` is True. '
+    'Whether to use ratio test for local feature matching.')
 flags.DEFINE_string(
     'output_dir', '/tmp/retrieval',
     'Directory where retrieval output will be written to. A file containing '
@@ -152,8 +157,10 @@ def main(argv):
           junk_ids=set(medium_ground_truth[i]['junk']),
           local_feature_extension=_DELG_LOCAL_EXTENSION,
           ransac_seed=0,
-          feature_distance_threshold=FLAGS.local_feature_distance_threshold,
-          ransac_residual_threshold=FLAGS.ransac_residual_threshold)
+          descriptor_matching_threshold=FLAGS
+          .local_descriptor_matching_threshold,
+          ransac_residual_threshold=FLAGS.ransac_residual_threshold,
+          use_ratio_test=FLAGS.use_ratio_test)
       hard_ranks_after_gv[i] = image_reranking.RerankByGeometricVerification(
           input_ranks=ranks_before_gv[i],
           initial_scores=similarities,
@@ -164,8 +171,10 @@ def main(argv):
           junk_ids=set(hard_ground_truth[i]['junk']),
           local_feature_extension=_DELG_LOCAL_EXTENSION,
           ransac_seed=0,
-          feature_distance_threshold=FLAGS.local_feature_distance_threshold,
-          ransac_residual_threshold=FLAGS.ransac_residual_threshold)
+          descriptor_matching_threshold=FLAGS
+          .local_descriptor_matching_threshold,
+          ransac_residual_threshold=FLAGS.ransac_residual_threshold,
+          use_ratio_test=FLAGS.use_ratio_test)
 
     elapsed = (time.time() - start)
     print('done! Retrieval for query %d took %f seconds' % (i, elapsed))
diff --git a/research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py b/research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py
@@ -1,3 +1,4 @@
+# Lint as: python3
 # Copyright 2019 The TensorFlow Authors All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/research/delf/delf/python/detect_to_retrieve/extract_query_features.py b/research/delf/delf/python/detect_to_retrieve/extract_query_features.py
@@ -1,3 +1,4 @@
+# Lint as: python3
 # Copyright 2017 The TensorFlow Authors All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/research/delf/delf/python/detect_to_retrieve/image_reranking.py b/research/delf/delf/python/detect_to_retrieve/image_reranking.py
@@ -47,12 +47,13 @@ def MatchFeatures(query_locations,
                   index_image_locations,
                   index_image_descriptors,
                   ransac_seed=None,
-                  feature_distance_threshold=0.9,
+                  descriptor_matching_threshold=0.9,
                   ransac_residual_threshold=10.0,
                   query_im_array=None,
                   index_im_array=None,
                   query_im_scale_factors=None,
-                  index_im_scale_factors=None):
+                  index_im_scale_factors=None,
+                  use_ratio_test=False):
   """Matches local features using geometric verification.
 
   First, finds putative local feature matches by matching `query_descriptors`
@@ -70,8 +71,10 @@ def MatchFeatures(query_locations,
     index_image_descriptors: Descriptors of local features for index image.
       NumPy array of shape [#index_image_features, depth].
     ransac_seed: Seed used by RANSAC. If None (default), no seed is provided.
-    feature_distance_threshold: Distance threshold below which a pair of
-      features is considered a potential match, and will be fed into RANSAC.
+    descriptor_matching_threshold: Threshold below which a pair of local
+      descriptors is considered a potential match, and will be fed into RANSAC.
+      If use_ratio_test==False, this is a simple distance threshold. If
+      use_ratio_test==True, this is Lowe's ratio test threshold.
     ransac_residual_threshold: Residual error threshold for considering matches
       as inliers, used in RANSAC algorithm.
     query_im_array: Optional. If not None, contains a NumPy array with the query
@@ -83,6 +86,8 @@ def MatchFeatures(query_locations,
       (ie, feature locations are not scaled).
     index_im_scale_factors: Optional. Same as `query_im_scale_factors`, but for
       index image.
+    use_ratio_test: If True, descriptor matching is performed via ratio test,
+      instead of distance-based threshold.
 
   Returns:
     score: Number of inliers of match. If no match is found, returns 0.
@@ -105,22 +110,38 @@ def MatchFeatures(query_locations,
         'Local feature dimensionality is not consistent for query and index '
         'images.')
 
-  # Find nearest-neighbor matches using a KD tree.
+  # Construct KD-tree used to find nearest neighbors.
   index_image_tree = spatial.cKDTree(index_image_descriptors)
-  _, indices = index_image_tree.query(
-      query_descriptors, distance_upper_bound=feature_distance_threshold)
-
-  # Select feature locations for putative matches.
-  query_locations_to_use = np.array([
-      query_locations[i,]
-      for i in range(num_features_query)
-      if indices[i] != num_features_index_image
-  ])
-  index_image_locations_to_use = np.array([
-      index_image_locations[indices[i],]
-      for i in range(num_features_query)
-      if indices[i] != num_features_index_image
-  ])
+  if use_ratio_test:
+    distances, indices = index_image_tree.query(
+        query_descriptors, k=2, n_jobs=-1)
+    query_locations_to_use = np.array([
+        query_locations[i,]
+        for i in range(num_features_query)
+        if distances[i][0] < descriptor_matching_threshold * distances[i][1]
+    ])
+    index_image_locations_to_use = np.array([
+        index_image_locations[indices[i][0],]
+        for i in range(num_features_query)
+        if distances[i][0] < descriptor_matching_threshold * distances[i][1]
+    ])
+  else:
+    _, indices = index_image_tree.query(
+        query_descriptors,
+        distance_upper_bound=descriptor_matching_threshold,
+        n_jobs=-1)
+
+    # Select feature locations for putative matches.
+    query_locations_to_use = np.array([
+        query_locations[i,]
+        for i in range(num_features_query)
+        if indices[i] != num_features_index_image
+    ])
+    index_image_locations_to_use = np.array([
+        index_image_locations[indices[i],]
+        for i in range(num_features_query)
+        if indices[i] != num_features_index_image
+    ])
 
   # If there are not enough putative matches, early return 0.
   if query_locations_to_use.shape[0] <= _MIN_RANSAC_SAMPLES:
@@ -175,8 +196,9 @@ def RerankByGeometricVerification(input_ranks,
                                   junk_ids,
                                   local_feature_extension=_DELF_EXTENSION,
                                   ransac_seed=None,
-                                  feature_distance_threshold=0.9,
-                                  ransac_residual_threshold=10.0):
+                                  descriptor_matching_threshold=0.9,
+                                  ransac_residual_threshold=10.0,
+                                  use_ratio_test=False):
   """Re-ranks retrieval results using geometric verification.
 
   Args:
@@ -195,10 +217,11 @@ def RerankByGeometricVerification(input_ranks,
     local_feature_extension: String, extension to use for loading local feature
       files.
     ransac_seed: Seed used by RANSAC. If None (default), no seed is provided.
-    feature_distance_threshold: Distance threshold below which a pair of local
-      features is considered a potential match, and will be fed into RANSAC.
+    descriptor_matching_threshold: Threshold used for local descriptor matching.
     ransac_residual_threshold: Residual error threshold for considering matches
       as inliers, used in RANSAC algorithm.
+    use_ratio_test: If True, descriptor matching is performed via ratio test,
+      instead of distance-based threshold.
 
   Returns:
     output_ranks: 1D NumPy array with index image indices, sorted from the most
@@ -258,8 +281,9 @@ def RerankByGeometricVerification(input_ranks,
         index_image_locations,
         index_image_descriptors,
         ransac_seed=ransac_seed,
-        feature_distance_threshold=feature_distance_threshold,
-        ransac_residual_threshold=ransac_residual_threshold)
+        descriptor_matching_threshold=descriptor_matching_threshold,
+        ransac_residual_threshold=ransac_residual_threshold,
+        use_ratio_test=use_ratio_test)
 
   # Sort based on (inliers_score, initial_score).
   def _InliersInitialScoresSorting(k):
diff --git a/research/delf/delf/python/detect_to_retrieve/perform_retrieval.py b/research/delf/delf/python/detect_to_retrieve/perform_retrieval.py
@@ -1,3 +1,4 @@
+# Lint as: python3
 # Copyright 2019 The TensorFlow Authors All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/research/delf/delf/python/examples/match_images.py b/research/delf/delf/python/examples/match_images.py
@@ -1,3 +1,4 @@
+# Lint as: python3
 # Copyright 2017 The TensorFlow Authors All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/research/delf/delf/python/training/model/__init__.py b/research/delf/delf/python/training/model/__init__.py
@@ -19,6 +19,7 @@
 
 # pylint: disable=unused-import
 from delf.python.training.model import delf_model
+from delf.python.training.model import delg_model
 from delf.python.training.model import export_model_utils
 from delf.python.training.model import resnet50
 # pylint: enable=unused-import
diff --git a/research/delf/delf/python/training/model/delf_model.py b/research/delf/delf/python/training/model/delf_model.py
@@ -89,12 +89,20 @@ class Delf(tf.keras.Model):
   from conv_4 are used to compute an attention map of the same resolution.
   """
 
-  def __init__(self, block3_strides=True, name='DELF'):
+  def __init__(self, block3_strides=True, name='DELF', pooling='avg',
+               gem_power=3.0, embedding_layer=False, embedding_layer_dim=2048):
     """Initialization of DELF model.
 
     Args:
       block3_strides: bool, whether to add strides to the output of block3.
       name: str, name to identify model.
+      pooling: str, pooling mode for global feature extraction; possible values
+        are 'None', 'avg', 'max', 'gem.'
+      gem_power: float, GeM power for GeM pooling. Only used if
+        pooling == 'gem'.
+      embedding_layer: bool, whether to create an embedding layer (FC whitening
+        layer).
+      embedding_layer_dim: int, size of the embedding layer.
     """
     super(Delf, self).__init__(name=name)
 
@@ -103,31 +111,38 @@ def __init__(self, block3_strides=True, name='DELF'):
         'channels_last',
         name='backbone',
         include_top=False,
-        pooling='avg',
+        pooling=pooling,
         block3_strides=block3_strides,
-        average_pooling=False)
+        average_pooling=False,
+        gem_power=gem_power,
+        embedding_layer=embedding_layer,
+        embedding_layer_dim=embedding_layer_dim)
 
     # Attention model.
     self.attention = AttentionModel(name='attention')
 
-  # Define classifiers for training backbone and attention models.
-  def init_classifiers(self, num_classes):
+  def init_classifiers(self, num_classes, desc_classification=None):
+    """Define classifiers for training backbone and attention models."""
     self.num_classes = num_classes
-    self.desc_classification = layers.Dense(
-        num_classes, activation=None, kernel_regularizer=None, name='desc_fc')
-
+    if desc_classification is None:
+      self.desc_classification = layers.Dense(num_classes,
+                                              activation=None,
+                                              kernel_regularizer=None,
+                                              name='desc_fc')
+    else:
+      self.desc_classification = desc_classification
     self.attn_classification = layers.Dense(
         num_classes, activation=None, kernel_regularizer=None, name='att_fc')
 
-  # Weights to optimize for descriptor fine tuning.
   @property
   def desc_trainable_weights(self):
+    """Weights to optimize for descriptor fine tuning."""
     return (self.backbone.trainable_weights +
             self.desc_classification.trainable_weights)
 
-  # Weights to optimize for attention model training.
   @property
   def attn_trainable_weights(self):
+    """Weights to optimize for attention model training."""
     return (self.attention.trainable_weights +
             self.attn_classification.trainable_weights)
 
diff --git a/research/delf/delf/python/training/model/delg_model.py b/research/delf/delf/python/training/model/delg_model.py
diff --git a/research/delf/delf/python/training/model/resnet50.py b/research/delf/delf/python/training/model/resnet50.py
diff --git a/research/delf/delf/python/training/model/resnet50_test.py b/research/delf/delf/python/training/model/resnet50_test.py
diff --git a/research/delf/delf/python/training/train.py b/research/delf/delf/python/training/train.py

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# Lint as: python3`
`1`	`2`	`# Copyright 2020 The TensorFlow Authors All Rights Reserved.`
`2`	`3`	`#`
`3`	`4`	`# Licensed under the Apache License, Version 2.0 (the "License");`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# Lint as: python3`
`1`	`2`	`# Copyright 2019 The TensorFlow Authors All Rights Reserved.`
`2`	`3`	`#`
`3`	`4`	`# Licensed under the Apache License, Version 2.0 (the "License");`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# Lint as: python3`
`1`	`2`	`# Copyright 2017 The TensorFlow Authors All Rights Reserved.`
`2`	`3`	`#`
`3`	`4`	`# Licensed under the Apache License, Version 2.0 (the "License");`