pytorch
diff --git a/‎docs/source/transforms.rst‎
Lines changed: 3 additions & 0 deletions b/‎docs/source/transforms.rst‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎test/test_transforms_v2.py‎
Lines changed: 40 additions & 30 deletions b/‎test/test_transforms_v2.py‎
Lines changed: 40 additions & 30 deletions
diff --git a/‎torchvision/transforms/v2/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎torchvision/transforms/v2/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎torchvision/transforms/v2/_auto_augment.py‎
Lines changed: 35 additions & 20 deletions b/‎torchvision/transforms/v2/_auto_augment.py‎
Lines changed: 35 additions & 20 deletions
@@ -342,6 +342,9 @@ Functionals
     v2.functional.perspective
     v2.functional.elastic
 
+.. autoclass:: torchvision.transforms.v2.InterpolationMode
+    :members:
+
 Color
 ^^^^^
 
 
@@ -495,6 +495,7 @@ def adapt_fill(value, *, dtype):
     transforms.InterpolationMode.BICUBIC,
     transforms.InterpolationMode.LANCZOS,
 ]
+INTERPOLATION_MODES_STR = ["nearest", "nearest-exact", "bilinear", "bicubic", "lanczos"]
 
 
 def reference_affine_bounding_boxes_helper(bounding_boxes, *, affine_matrix, new_canvas_size=None, clamp=True):
@@ -885,7 +886,7 @@ def _check_output_size(self, input, output, *, size, max_size):
     @pytest.mark.parametrize("size", OUTPUT_SIZES)
     # `InterpolationMode.NEAREST` is modeled after the buggy `INTER_NEAREST` interpolation of CV2.
     # The PIL equivalent of `InterpolationMode.NEAREST` is `InterpolationMode.NEAREST_EXACT`
-    @pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES) - {transforms.InterpolationMode.NEAREST})
+    @pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES_STR) - {"nearest"})
     @pytest.mark.parametrize("use_max_size", [True, False])
     @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
     def test_image_correctness(self, size, interpolation, use_max_size, fn):
@@ -898,7 +899,7 @@ def test_image_correctness(self, size, interpolation, use_max_size, fn):
         expected = F.to_image(F.resize(F.to_pil_image(image), size=size, interpolation=interpolation, **max_size_kwarg))
 
         self._check_output_size(image, actual, size=size, **max_size_kwarg)
-        atol = 2 if interpolation is transforms.InterpolationMode.LANCZOS else 1
+        atol = 2 if interpolation == "lanczos" else 1
         torch.testing.assert_close(actual, expected, atol=atol, rtol=0)
 
     def _reference_resize_bounding_boxes(self, bounding_boxes, format, *, size, max_size=None):
@@ -1096,6 +1097,26 @@ def test_interpolation_int(self, interpolation, make_input):
 
         assert_equal(actual, expected)
 
+    @pytest.mark.parametrize(
+        "interpolation_str, interpolation_enum",
+        [
+            ("nearest", transforms.InterpolationMode.NEAREST),
+            ("nearest-exact", transforms.InterpolationMode.NEAREST_EXACT),
+            ("bilinear", transforms.InterpolationMode.BILINEAR),
+            ("bicubic", transforms.InterpolationMode.BICUBIC),
+            ("lanczos", transforms.InterpolationMode.LANCZOS),
+        ],
+    )
+    @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
+    @pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_video])
+    def test_interpolation_str(self, interpolation_str, interpolation_enum, fn, make_input):
+        input = make_input(self.INPUT_SIZE)
+
+        expected = fn(input, size=self.OUTPUT_SIZES[0], interpolation=interpolation_enum, antialias=True)
+        actual = fn(input, size=self.OUTPUT_SIZES[0], interpolation=interpolation_str, antialias=True)
+
+        assert_equal(actual, expected)
+
     def test_transform_unknown_size_error(self):
         with pytest.raises(ValueError, match="size can be an integer, a sequence of one or two integers, or None"):
             transforms.Resize(size=object())
@@ -1554,9 +1575,7 @@ def test_transform(self, make_input, device):
     @pytest.mark.parametrize("scale", _CORRECTNESS_AFFINE_KWARGS["scale"])
     @pytest.mark.parametrize("shear", _CORRECTNESS_AFFINE_KWARGS["shear"])
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
-    @pytest.mark.parametrize(
-        "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
-    )
+    @pytest.mark.parametrize("interpolation", ["nearest", "bilinear"])
     @pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
     def test_functional_image_correctness(self, angle, translate, scale, shear, center, interpolation, fill):
         image = make_image(dtype=torch.uint8, device="cpu")
@@ -1587,12 +1606,10 @@ def test_functional_image_correctness(self, angle, translate, scale, shear, cent
         )
 
         mae = (actual.float() - expected.float()).abs().mean()
-        assert mae < 2 if interpolation is transforms.InterpolationMode.NEAREST else 8
+        assert mae < 2 if interpolation == "nearest" else 8
 
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
-    @pytest.mark.parametrize(
-        "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
-    )
+    @pytest.mark.parametrize("interpolation", ["nearest", "bilinear"])
     @pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
     @pytest.mark.parametrize("seed", list(range(5)))
     def test_transform_image_correctness(self, center, interpolation, fill, seed):
@@ -1611,7 +1628,7 @@ def test_transform_image_correctness(self, center, interpolation, fill, seed):
         expected = F.to_image(transform(F.to_pil_image(image)))
 
         mae = (actual.float() - expected.float()).abs().mean()
-        assert mae < 2 if interpolation is transforms.InterpolationMode.NEAREST else 8
+        assert mae < 2 if interpolation == "nearest" else 8
 
     def _compute_affine_matrix(self, *, angle, translate, scale, shear, center):
         rot = math.radians(angle)
@@ -2142,9 +2159,7 @@ def test_transform(self, make_input, device):
 
     @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
-    @pytest.mark.parametrize(
-        "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
-    )
+    @pytest.mark.parametrize("interpolation", ["nearest", "bilinear"])
     @pytest.mark.parametrize("expand", [False, True])
     @pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
     def test_functional_image_correctness(self, angle, center, interpolation, expand, fill):
@@ -2160,12 +2175,10 @@ def test_functional_image_correctness(self, angle, center, interpolation, expand
         )
 
         mae = (actual.float() - expected.float()).abs().mean()
-        assert mae < 1 if interpolation is transforms.InterpolationMode.NEAREST else 6
+        assert mae < 1 if interpolation == "nearest" else 6
 
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
-    @pytest.mark.parametrize(
-        "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
-    )
+    @pytest.mark.parametrize("interpolation", ["nearest", "bilinear"])
     @pytest.mark.parametrize("expand", [False, True])
     @pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
     @pytest.mark.parametrize("seed", list(range(5)))
@@ -2189,7 +2202,7 @@ def test_transform_image_correctness(self, center, interpolation, expand, fill,
         expected = F.to_image(transform(F.to_pil_image(image)))
 
         mae = (actual.float() - expected.float()).abs().mean()
-        assert mae < 1 if interpolation is transforms.InterpolationMode.NEAREST else 6
+        assert mae < 1 if interpolation == "nearest" else 6
 
     def _compute_output_canvas_size(self, *, expand, canvas_size, affine_matrix):
         if not expand:
@@ -4150,6 +4163,9 @@ class TestAutoAugmentTransforms:
     # rotate, are tested in their respective classes. The rest of the tests here are mostly smoke tests.
 
     def _reference_shear_translate(self, image, *, transform_id, magnitude, interpolation, fill):
+        if isinstance(interpolation, str):
+            interpolation = transforms.InterpolationMode(interpolation)
+
         if isinstance(image, PIL.Image.Image):
             input = image
         else:
@@ -4173,9 +4189,7 @@ def _reference_shear_translate(self, image, *, transform_id, magnitude, interpol
 
     @pytest.mark.parametrize("transform_id", ["ShearX", "ShearY", "TranslateX", "TranslateY"])
     @pytest.mark.parametrize("magnitude", [0.3, -0.2, 0.0])
-    @pytest.mark.parametrize(
-        "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
-    )
+    @pytest.mark.parametrize("interpolation", ["nearest", "bilinear"])
     @pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
     @pytest.mark.parametrize("input_type", ["Tensor", "PIL"])
     def test_correctness_shear_translate(self, transform_id, magnitude, interpolation, fill, input_type):
@@ -4208,7 +4222,7 @@ def test_correctness_shear_translate(self, transform_id, magnitude, interpolatio
 
         if "Shear" in transform_id and input_type == "Tensor":
             mae = (actual.float() - expected.float()).abs().mean()
-            assert mae < (12 if interpolation is transforms.InterpolationMode.NEAREST else 5)
+            assert mae < (12 if interpolation == "nearest" else 5)
         else:
             assert_close(actual, expected, rtol=0, atol=1)
 
@@ -4537,7 +4551,7 @@ def test_transform(self, param, value, make_input):
 
     # `InterpolationMode.NEAREST` is modeled after the buggy `INTER_NEAREST` interpolation of CV2.
     # The PIL equivalent of `InterpolationMode.NEAREST` is `InterpolationMode.NEAREST_EXACT`
-    @pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES) - {transforms.InterpolationMode.NEAREST})
+    @pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES_STR) - {"nearest"})
     def test_functional_image_correctness(self, interpolation):
         image = make_image(self.INPUT_SIZE, dtype=torch.uint8)
 
@@ -4550,9 +4564,7 @@ def test_functional_image_correctness(self, interpolation):
             )
         )
 
-        torch.testing.assert_close(
-            actual, expected, atol=2 if interpolation is transforms.InterpolationMode.LANCZOS else 1, rtol=0
-        )
+        torch.testing.assert_close(actual, expected, atol=2 if interpolation == "lanczos" else 1, rtol=0)
 
     def _reference_resized_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, width, size):
         new_height, new_width = size
@@ -5257,9 +5269,7 @@ def test_transform_error(self, distortion_scale):
             transforms.RandomPerspective(distortion_scale=distortion_scale)
 
     @pytest.mark.parametrize("coefficients", COEFFICIENTS)
-    @pytest.mark.parametrize(
-        "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
-    )
+    @pytest.mark.parametrize("interpolation", ["nearest", "bilinear"])
     @pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
     def test_image_functional_correctness(self, coefficients, interpolation, fill):
         image = make_image(dtype=torch.uint8, device="cpu")
@@ -5278,7 +5288,7 @@ def test_image_functional_correctness(self, coefficients, interpolation, fill):
             )
         )
 
-        if interpolation is transforms.InterpolationMode.BILINEAR:
+        if interpolation == "bilinear":
             abs_diff = (actual.float() - expected.float()).abs()
             assert (abs_diff > 1).float().mean() < 7e-2
             mae = abs_diff.mean()
 
@@ -1,4 +1,5 @@
-from torchvision.transforms import AutoAugmentPolicy, InterpolationMode  # usort: skip
+from torchvision.transforms import AutoAugmentPolicy  # usort: skip
+from torchvision.transforms.functional import InterpolationMode  # usort: skip
 
 from . import functional  # usort: skip
 
 
@@ -8,7 +8,6 @@
 from torchvision import transforms as _transforms, tv_tensors
 from torchvision.transforms import _functional_tensor as _FT
 from torchvision.transforms.v2 import AutoAugmentPolicy, functional as F, InterpolationMode, Transform
-from torchvision.transforms.v2.functional._geometry import _check_interpolation
 from torchvision.transforms.v2.functional._meta import get_size
 from torchvision.transforms.v2.functional._utils import _FillType, _FillTypeJIT
 
@@ -22,11 +21,11 @@ class _AutoAugmentBase(Transform):
     def __init__(
         self,
         *,
-        interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
+        interpolation: Union[str, InterpolationMode, int] = "nearest",
         fill: Union[_FillType, dict[Union[type, str], _FillType]] = None,
     ) -> None:
         super().__init__()
-        self.interpolation = _check_interpolation(interpolation)
+        self.interpolation = interpolation
         self.fill = fill
         self._fill = _setup_fill_arg(fill)
 
@@ -91,7 +90,7 @@ def _apply_image_or_video_transform(
         image: ImageOrVideo,
         transform_id: str,
         magnitude: float,
-        interpolation: Union[InterpolationMode, int],
+        interpolation: Union[str, InterpolationMode, int],
         fill: dict[Union[type, str], _FillTypeJIT],
     ) -> ImageOrVideo:
         # Note: this cast is wrong and is only here to make mypy happy (it disagrees with torchscript)
@@ -188,9 +187,13 @@ class AutoAugment(_AutoAugmentBase):
     Args:
         policy (AutoAugmentPolicy, optional): Desired policy enum defined by
             :class:`torchvision.transforms.autoaugment.AutoAugmentPolicy`. Default is ``AutoAugmentPolicy.IMAGENET``.
-        interpolation (InterpolationMode, optional): Desired interpolation enum defined by
-            :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
-            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+        interpolation (str or InterpolationMode, optional): Desired interpolation enum defined by
+            :class:`torchvision.transforms.v2.InterpolationMode`.
+            Accepted string values are ``"nearest"``, ``"nearest-exact"``, ``"bilinear"``, ``"bicubic"``,
+            ``"box"``, ``"hamming"``, and ``"lanczos"``.
+            ``"box"``, ``"hamming"``, and ``"lanczos"`` are only supported for PIL images.
+            The corresponding ``InterpolationMode`` enum values and Pillow integer
+            constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
         fill (sequence or number, optional): Pixel fill value for the area outside the transformed
             image. If given a number, the value is used for all bands respectively.
     """
@@ -226,7 +229,7 @@ class AutoAugment(_AutoAugmentBase):
     def __init__(
         self,
         policy: AutoAugmentPolicy = AutoAugmentPolicy.IMAGENET,
-        interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
+        interpolation: Union[str, InterpolationMode, int] = "nearest",
         fill: Union[_FillType, dict[Union[type, str], _FillType]] = None,
     ) -> None:
         super().__init__(interpolation=interpolation, fill=fill)
@@ -366,9 +369,13 @@ class RandAugment(_AutoAugmentBase):
             must be non-negative integer. Default: 2.
         magnitude (int, optional): Magnitude for all the transformations.
         num_magnitude_bins (int, optional): The number of different magnitude values.
-        interpolation (InterpolationMode, optional): Desired interpolation enum defined by
-            :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
-            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+        interpolation (str or InterpolationMode, optional): Desired interpolation enum defined by
+            :class:`torchvision.transforms.v2.InterpolationMode`.
+            Accepted string values are ``"nearest"``, ``"nearest-exact"``, ``"bilinear"``, ``"bicubic"``,
+            ``"box"``, ``"hamming"``, and ``"lanczos"``.
+            ``"box"``, ``"hamming"``, and ``"lanczos"`` are only supported for PIL images.
+            The corresponding ``InterpolationMode`` enum values and Pillow integer
+            constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
         fill (sequence or number, optional): Pixel fill value for the area outside the transformed
             image. If given a number, the value is used for all bands respectively.
     """
@@ -405,7 +412,7 @@ def __init__(
         num_ops: int = 2,
         magnitude: int = 9,
         num_magnitude_bins: int = 31,
-        interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
+        interpolation: Union[str, InterpolationMode, int] = "nearest",
         fill: Union[_FillType, dict[Union[type, str], _FillType]] = None,
     ) -> None:
         super().__init__(interpolation=interpolation, fill=fill)
@@ -447,9 +454,13 @@ class TrivialAugmentWide(_AutoAugmentBase):
 
     Args:
         num_magnitude_bins (int, optional): The number of different magnitude values.
-        interpolation (InterpolationMode, optional): Desired interpolation enum defined by
-            :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
-            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+        interpolation (str or InterpolationMode, optional): Desired interpolation enum defined by
+            :class:`torchvision.transforms.v2.InterpolationMode`.
+            Accepted string values are ``"nearest"``, ``"nearest-exact"``, ``"bilinear"``, ``"bicubic"``,
+            ``"box"``, ``"hamming"``, and ``"lanczos"``.
+            ``"box"``, ``"hamming"``, and ``"lanczos"`` are only supported for PIL images.
+            The corresponding ``InterpolationMode`` enum values and Pillow integer
+            constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
         fill (sequence or number, optional): Pixel fill value for the area outside the transformed
             image. If given a number, the value is used for all bands respectively.
     """
@@ -478,7 +489,7 @@ class TrivialAugmentWide(_AutoAugmentBase):
     def __init__(
         self,
         num_magnitude_bins: int = 31,
-        interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
+        interpolation: Union[str, InterpolationMode, int] = "nearest",
         fill: Union[_FillType, dict[Union[type, str], _FillType]] = None,
     ):
         super().__init__(interpolation=interpolation, fill=fill)
@@ -521,9 +532,13 @@ class AugMix(_AutoAugmentBase):
             Default is ``-1``.
         alpha (float, optional): The hyperparameter for the probability distributions. Default is ``1.0``.
         all_ops (bool, optional): Use all operations (including brightness, contrast, color and sharpness). Default is ``True``.
-        interpolation (InterpolationMode, optional): Desired interpolation enum defined by
-            :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
-            If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+        interpolation (str or InterpolationMode, optional): Desired interpolation enum defined by
+            :class:`torchvision.transforms.v2.InterpolationMode`.
+            Accepted string values are ``"nearest"``, ``"nearest-exact"``, ``"bilinear"``, ``"bicubic"``,
+            ``"box"``, ``"hamming"``, and ``"lanczos"``.
+            ``"box"``, ``"hamming"``, and ``"lanczos"`` are only supported for PIL images.
+            The corresponding ``InterpolationMode`` enum values and Pillow integer
+            constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
         fill (sequence or number, optional): Pixel fill value for the area outside the transformed
             image. If given a number, the value is used for all bands respectively.
     """
@@ -559,7 +574,7 @@ def __init__(
         chain_depth: int = -1,
         alpha: float = 1.0,
         all_ops: bool = True,
-        interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
+        interpolation: Union[str, InterpolationMode, int] = "bilinear",
         fill: Union[_FillType, dict[Union[type, str], _FillType]] = None,
     ) -> None:
         super().__init__(interpolation=interpolation, fill=fill)