LPIPS loss

tristan-deep · tristan-deep · commit 22cf7d70df44 · 2025-02-02T20:54:16.000+01:00
diff --git a/keras/src/applications/lpips.py b/keras/src/applications/lpips.py
@@ -0,0 +1,188 @@
+from keras.src import backend
+from keras.src import layers
+from keras.src import ops
+from keras.src.api_export import keras_export
+from keras.src.applications import imagenet_utils
+from keras.src.applications import vgg16
+from keras.src.models import Functional
+from keras.src.utils import file_utils
+
+WEIGHTS_PATH = (
+    "https://storage.googleapis.com/tensorflow/keras-applications/"
+    "lpips/lpips_vgg16_weights.h5"
+)
+
+
+def vgg_backbone(layer_names):
+    """VGG backbone for LPIPS.
+
+    Args:
+        layer_names: list of layer names to extract features from
+
+    Returns:
+        Functional model with outputs at specified layers
+    """
+    vgg = vgg16.VGG16(include_top=False, weights=None)
+    outputs = [
+        layer.output for layer in vgg.layers if layer.name in layer_names
+    ]
+    return Functional(vgg.input, outputs)
+
+
+def linear_model(channels):
+    """Get the linear head model for LPIPS.
+    Combines feature differences from VGG backbone.
+
+    Args:
+        channels: list of channel sizes for feature differences
+
+    Returns:
+        Functional model
+    """
+    inputs, outputs = [], []
+    for ii, channel in enumerate(channels):
+        x = layers.Input(shape=(None, None, channel))
+        y = layers.Dropout(rate=0.5)(x)
+        y = layers.Conv2D(
+            filters=1,
+            kernel_size=1,
+            use_bias=False,
+            name=f"linear_{ii}",
+        )(y)
+        inputs.append(x)
+        outputs.append(y)
+
+    model = Functional(inputs=inputs, outputs=outputs, name="linear_model")
+    return model
+
+
+@keras_export(["keras.applications.lpips.LPIPS", "keras.applications.LPIPS"])
+def LPIPS(
+    weights="imagenet",
+    input_tensor=None,
+    input_shape=None,
+    network_type="vgg",
+    name="lpips",
+):
+    """Instantiates the LPIPS model.
+
+    Reference:
+    - [The Unreasonable Effectiveness of Deep Features as a Perceptual Metric](
+    https://arxiv.org/abs/1801.03924)
+
+    Args:
+        weights: one of `None` (random initialization),
+            `"imagenet"` (pre-training on ImageNet),
+            or the path to the weights file to be loaded.
+        input_tensor: optional Keras tensor for model input
+        input_shape: optional shape tuple, defaults to (None, None, 3)
+        network_type: backbone network type (currently only 'vgg' supported)
+        name: model name string
+
+    Returns:
+        A `Model` instance.
+    """
+    if network_type != "vgg":
+        raise ValueError(
+            "Currently only VGG backbone is supported. "
+            f"Got network_type={network_type}"
+        )
+
+    if not (weights in {"imagenet", None} or file_utils.exists(weights)):
+        raise ValueError(
+            "The `weights` argument should be either "
+            "`None` (random initialization), 'imagenet' "
+            "(pre-training on ImageNet), "
+            "or the path to the weights file to be loaded."
+        )
+
+    # Define inputs
+    if input_tensor is None:
+        img_input1 = layers.Input(
+            shape=input_shape or (None, None, 3), name="input1"
+        )
+        img_input2 = layers.Input(
+            shape=input_shape or (None, None, 3), name="input2"
+        )
+    else:
+        if not backend.is_keras_tensor(input_tensor):
+            img_input1 = layers.Input(tensor=input_tensor, shape=input_shape)
+            img_input2 = layers.Input(tensor=input_tensor, shape=input_shape)
+        else:
+            img_input1 = input_tensor
+            img_input2 = input_tensor
+
+    # VGG feature extraction
+    vgg_layers = [
+        "block1_conv2",
+        "block2_conv2",
+        "block3_conv3",
+        "block4_conv3",
+        "block5_conv3",
+    ]
+    vgg_net = vgg_backbone(vgg_layers)
+
+    # Process inputs
+    feat1 = vgg_net(img_input1)
+    feat2 = vgg_net(img_input2)
+
+    # Normalize features
+    def normalize(x):
+        return x * ops.rsqrt(ops.sum(ops.square(x), axis=-1, keepdims=True))
+
+    norm1 = [layers.Lambda(normalize)(f) for f in feat1]
+    norm2 = [layers.Lambda(normalize)(f) for f in feat2]
+
+    # Feature differences
+    diffs = [
+        layers.Lambda(lambda x: ops.square(x[0] - x[1]))([n1, n2])
+        for n1, n2 in zip(norm1, norm2)
+    ]
+
+    # Get shapes for linear model
+    channels = [f.shape[-1] for f in feat1]
+
+    linear_net = linear_model(channels)
+
+    lin_out = linear_net(diffs)
+
+    spatial_average = [
+        layers.Lambda(lambda x: ops.mean(x, axis=[1, 2]))(t) for t in lin_out
+    ]
+
+    output = layers.Lambda(
+        lambda x: ops.squeeze(
+            ops.sum(backend.convert_to_tensor(x), axis=0), axis=-1
+        )
+    )(spatial_average)
+
+    # Create model
+    model = Functional([img_input1, img_input2], output, name=name)
+
+    # Load weights
+    if weights == "imagenet":
+        weights_path = file_utils.get_file(
+            "lpips_vgg16_weights.h5",
+            WEIGHTS_PATH,
+            cache_subdir="models",
+            file_hash=None,  # TODO: add hash
+        )
+        model.load_weights(weights_path)
+    elif weights is not None:
+        model.load_weights(weights)
+
+    return model
+
+
+@keras_export("keras.applications.lpips.preprocess_input")
+def preprocess_input(x, data_format=None):
+    return imagenet_utils.preprocess_input(
+        x, data_format=data_format, mode="torch"
+    )
+
+
+preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format(
+    mode="",
+    ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_CAFFE,
+    error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC,
+)
diff --git a/keras/src/losses/__init__.py b/keras/src/losses/__init__.py
@@ -20,6 +20,7 @@
 from keras.src.losses.losses import MeanAbsolutePercentageError
 from keras.src.losses.losses import MeanSquaredError
 from keras.src.losses.losses import MeanSquaredLogarithmicError
+from keras.src.losses.losses import PerceptualSimilarity
 from keras.src.losses.losses import Poisson
 from keras.src.losses.losses import SparseCategoricalCrossentropy
 from keras.src.losses.losses import SquaredHinge
@@ -76,6 +77,8 @@
     Tversky,
     # Similarity
     Circle,
+    # Feature Extraction
+    PerceptualSimilarity,
     # Sequence
     CTC,
     # Probabilistic
diff --git a/keras/src/losses/losses.py b/keras/src/losses/losses.py
@@ -1504,6 +1504,75 @@ def get_config(self):
         return config
 
 
+class PerceptualSimilarity(LossFunctionWrapper):
+    """Computes the Learned Perceptual Image Patch Similarity (LPIPS) loss.
+
+    Reference:
+    - [The Unreasonable Effectiveness of Deep Features as a Perceptual Metric](
+    https://arxiv.org/abs/1801.03924)
+
+    LPIPS measures perceptual similarity between images by comparing deep
+    features, which is a more perceptually-aligned metric compared to
+    pixel-wise losses.
+
+    Args:
+        weights: one of `None` (random initialization),
+            `"imagenet"` (pre-training on ImageNet),
+            or the path to the weights file to be loaded.
+        network_type: backbone network type (currently only 'vgg' supported)
+        preprocess_inputs: Whether to preprocess inputs using the same
+            preprocessing function as the original LPIPS implementation.
+            Defaults to `True`. If set to `False`, the inputs are expected
+            to be normalized to the range [-1, 1] and in RGB format.
+            If set to `True`, the inputs are expected to to be in the
+            range [0, 255], after which they will be standardized to
+            imageNet mean and std.
+        reduction: Type of reduction to apply to the loss. In almost all cases
+            this should be `"sum_over_batch_size"`. Supported options are
+            `"sum"`, `"sum_over_batch_size"`, `"mean"`,
+            `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss,
+            `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the
+            sample size, and `"mean_with_sample_weight"` sums the loss and
+            divides by the sum of the sample weights. `"none"` and `None`
+            perform no aggregation. Defaults to `"sum_over_batch_size"`.
+        name: Optional name for the loss instance.
+        dtype: The dtype of the loss's computations. Defaults to `None`, which
+            means using `keras.backend.floatx()`. `keras.backend.floatx()` is a
+            `"float32"` unless set to different value
+            (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is
+            provided, then the `compute_dtype` will be utilized.
+    """
+
+    def __init__(
+        self,
+        weights="imagenet",
+        network_type="vgg",
+        preprocess_inputs=True,
+        reduction="sum_over_batch_size",
+        name="lpips",
+        dtype=None,
+    ):
+        from keras.src.applications import lpips  # lazy import
+
+        lpips_model = lpips.LPIPS(weights=weights, network_type=network_type)
+
+        def _lpips_wrapper(y_true, y_pred):
+            if preprocess_inputs:
+                y_true = lpips.preprocess_input(y_true)
+                y_pred = lpips.preprocess_input(y_pred)
+            return lpips_model([y_true, y_pred])
+
+        super().__init__(
+            _lpips_wrapper,
+            name=name,
+            reduction=reduction,
+            dtype=dtype,
+        )
+
+    def get_config(self):
+        return Loss.get_config(self)
+
+
 def convert_binary_labels_to_hinge(y_true):
     """Converts binary labels into -1/1 for hinge loss/metric calculation."""
     are_zeros = ops.equal(y_true, 0)