huggingface
diff --git a/‎.github/workflows/test_onnxruntime.yml‎
Lines changed: 13 additions & 4 deletions b/‎.github/workflows/test_onnxruntime.yml‎
Lines changed: 13 additions & 4 deletions
diff --git a/‎optimum/exporters/onnx/__main__.py‎
Lines changed: 51 additions & 5 deletions b/‎optimum/exporters/onnx/__main__.py‎
Lines changed: 51 additions & 5 deletions
diff --git a/‎optimum/exporters/onnx/model_configs.py‎
Lines changed: 29 additions & 14 deletions b/‎optimum/exporters/onnx/model_configs.py‎
Lines changed: 29 additions & 14 deletions
diff --git a/‎optimum/exporters/tasks.py‎
Lines changed: 2 additions & 2 deletions b/‎optimum/exporters/tasks.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎optimum/modeling_base.py‎
Lines changed: 18 additions & 34 deletions b/‎optimum/modeling_base.py‎
Lines changed: 18 additions & 34 deletions
@@ -27,12 +27,20 @@ jobs:
       matrix:
         python-version: [3.9]
         runs-on: [ubuntu-22.04]
+        test_file: [
+            test_timm.py,
+            test_modeling.py, # todo: split into test_encoder, test_decoder and test_encoder_decoder
+            test_diffusion.py,
+            test_optimization.py,
+            test_quantization.py,
+            test_utils.py,
+          ]
 
     runs-on: ${{ matrix.runs-on }}
 
     steps:
       - name: Free Disk Space (Ubuntu)
-        if: matrix.runs-on == 'ubuntu-22.04'
+        if: matrix.test_file == 'test_modeling.py'
         uses: jlumbroso/free-disk-space@main
 
       - name: Checkout code
@@ -50,11 +58,12 @@ jobs:
           pip install .[tests,onnxruntime] diffusers
 
       - name: Test with pytest (in series)
+        if: matrix.test_file == 'test_modeling.py'
         run: |
-          pytest tests/onnxruntime -m "run_in_series" --durations=0 -vvvv
+          pytest tests/onnxruntime/test_modeling.py -m "run_in_series" --durations=0 -vvvv
 
       - name: Test with pytest (in parallel)
         run: |
-          pytest tests/onnxruntime -m "not run_in_series" --durations=0 -vvvv -n auto
+          pytest tests/onnxruntime/${{ matrix.test_file }} -m "not run_in_series" --durations=0 -vvvv -n auto
         env:
-          HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+          HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
@@ -25,7 +25,12 @@
 
 from ...commands.export.onnx import parse_args_onnx
 from ...utils import DEFAULT_DUMMY_SHAPES, logging
-from ...utils.import_utils import is_transformers_version
+from ...utils.import_utils import (
+    is_diffusers_available,
+    is_sentence_transformers_available,
+    is_timm_available,
+    is_transformers_version,
+)
 from ...utils.save_utils import maybe_load_preprocessors
 from ..tasks import TasksManager
 from ..utils import DisableCompileContextManager
@@ -223,12 +228,29 @@ def main_export(
             " and passing it is not required anymore."
         )
 
-    if task in ["stable-diffusion", "stable-diffusion-xl"]:
+    if task in ["stable-diffusion", "stable-diffusion-xl", "latent-consistency"]:
         logger.warning(
             f"The task `{task}` is deprecated and will be removed in a future release of Optimum. "
             "Please use one of the following tasks instead: `text-to-image`, `image-to-image`, `inpainting`."
         )
 
+    if library_name == "sentence_transformers" and not is_sentence_transformers_available():
+        raise ImportError(
+            "The library `sentence_transformers` was specified, but it is not installed. "
+            "Please install it with `pip install sentence-transformers`."
+        )
+
+    if library_name == "diffusers" and not is_diffusers_available():
+        raise ImportError(
+            "The library `diffusers` was specified, but it is not installed. "
+            "Please install it with `pip install diffusers`."
+        )
+
+    if library_name == "timm" and not is_timm_available():
+        raise ImportError(
+            "The library `timm` was specified, but it is not installed. Please install it with `pip install timm`."
+        )
+
     original_task = task
     task = TasksManager.map_from_synonym(task)
 
@@ -241,6 +263,22 @@ def main_export(
         library_name = TasksManager.infer_library_from_model(
             model_name_or_path, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token
         )
+        if library_name == "sentence_transformers" and not is_sentence_transformers_available():
+            logger.warning(
+                "The library name was inferred as `sentence_transformers`, which is not installed. "
+                "Falling back to `transformers` to avoid breaking the export."
+            )
+            library_name = "transformers"
+        elif library_name == "timm" and not is_timm_available():
+            raise ImportError(
+                "The library name was inferred as `timm`, which is not installed. "
+                "Please install it with `pip install timm`."
+            )
+        elif library_name == "diffusers" and not is_diffusers_available():
+            raise ImportError(
+                "The library name was inferred as `diffusers`, which is not installed. "
+                "Please install it with `pip install diffusers`."
+            )
 
     torch_dtype = None
     if framework == "pt":
@@ -258,7 +296,14 @@ def main_export(
 
     if task == "auto":
         try:
-            task = TasksManager.infer_task_from_model(model_name_or_path, library_name=library_name)
+            task = TasksManager.infer_task_from_model(
+                model_name_or_path,
+                subfolder=subfolder,
+                revision=revision,
+                cache_dir=cache_dir,
+                token=token,
+                library_name=library_name,
+            )
         except KeyError as e:
             raise KeyError(
                 f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
@@ -299,8 +344,9 @@ def main_export(
                 f"Asked to export a {model_type} model for the task {task}{autodetected_message}, but the Optimum ONNX exporter only supports the tasks {', '.join(model_tasks.keys())} for {model_type}. Please use a supported task. Please open an issue at https://github.com/huggingface/optimum/issues if you would like the task {task} to be supported in the ONNX export for {model_type}."
             )
 
-        # TODO: Fix in Transformers so that SdpaAttention class can be exported to ONNX. `attn_implementation` is introduced in Transformers 4.36.
-        if model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED and is_transformers_version(">=", "4.35.99"):
+        # TODO: Fix in Transformers so that SdpaAttention class can be exported to ONNX.
+        # This was fixed in transformers 4.42.0, we can remve it when minimum transformers version is updated to 4.42
+        if model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED and is_transformers_version("<", "4.42"):
             loading_kwargs["attn_implementation"] = "eager"
 
     with DisableCompileContextManager():
 
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Model specific ONNX configurations."""
+
 import math
 import random
 import warnings
@@ -1337,45 +1338,51 @@ class VaeEncoderOnnxConfig(VisionOnnxConfig):
     DEFAULT_ONNX_OPSET = 14
 
     NORMALIZED_CONFIG_CLASS = NormalizedConfig.with_args(
-        num_channels="in_channels",
-        image_size="sample_size",
-        allow_new=True,
+        num_channels="in_channels", image_size="sample_size", allow_new=True
     )
 
     @property
     def inputs(self) -> Dict[str, Dict[int, str]]:
         return {
-            "sample": {0: "batch_size", 2: "height", 3: "width"},
+            "sample": {0: "batch_size", 2: "sample_height", 3: "sample_width"},
         }
 
     @property
     def outputs(self) -> Dict[str, Dict[int, str]]:
+        down_sampling_factor = 2 ** (len(self._normalized_config.down_block_types) - 1)
         return {
-            "latent_parameters": {0: "batch_size", 2: "height_latent", 3: "width_latent"},
+            "latent_parameters": {
+                0: "batch_size",
+                2: f"sample_height / {down_sampling_factor}",
+                3: f"sample_width / {down_sampling_factor}",
+            },
         }
 
 
 class VaeDecoderOnnxConfig(VisionOnnxConfig):
-    ATOL_FOR_VALIDATION = 1e-4
+    ATOL_FOR_VALIDATION = 3e-4
     # The ONNX export of a CLIPText architecture, an other Stable Diffusion component, needs the Trilu
     # operator support, available since opset 14
     DEFAULT_ONNX_OPSET = 14
 
-    NORMALIZED_CONFIG_CLASS = NormalizedConfig.with_args(
-        num_channels="latent_channels",
-        allow_new=True,
-    )
+    NORMALIZED_CONFIG_CLASS = NormalizedConfig.with_args(num_channels="latent_channels", allow_new=True)
 
     @property
     def inputs(self) -> Dict[str, Dict[int, str]]:
         return {
-            "latent_sample": {0: "batch_size", 2: "height_latent", 3: "width_latent"},
+            "latent_sample": {0: "batch_size", 2: "latent_height", 3: "latent_width"},
         }
 
     @property
     def outputs(self) -> Dict[str, Dict[int, str]]:
+        upsampling_factor = 2 ** (len(self._normalized_config.up_block_types) - 1)
+
         return {
-            "sample": {0: "batch_size", 2: "height", 3: "width"},
+            "sample": {
+                0: "batch_size",
+                2: f"latent_height * {upsampling_factor}",
+                3: f"latent_width * {upsampling_factor}",
+            },
         }
 
 
@@ -1815,9 +1822,17 @@ class MusicgenOnnxConfig(OnnxSeq2SeqConfigWithPast):
     DEFAULT_ONNX_OPSET = 14
 
     VARIANTS = {
-        "text-conditional-with-past": "Exports Musicgen to ONNX to generate audio samples conditioned on a text prompt (Reference: https://huggingface.co/docs/transformers/model_doc/musicgen#text-conditional-generation). This uses the decoder KV cache. The following subcomponents are exported:\n\t\t* text_encoder.onnx: corresponds to the text encoder part in https://github.com/huggingface/transformers/blob/v4.39.1/src/transformers/models/musicgen/modeling_musicgen.py#L1457.\n\t\t* encodec_decode.onnx: corresponds to the Encodec audio encoder part in https://github.com/huggingface/transformers/blob/v4.39.1/src/transformers/models/musicgen/modeling_musicgen.py#L2472-L2480.\n\t\t* decoder_model.onnx: The Musicgen decoder, without past key values input, and computing cross attention. Not required at inference (use decoder_model_merged.onnx instead).\n\t\t* decoder_with_past_model.onnx: The Musicgen decoder, with past_key_values input (KV cache filled), not computing cross attention. Not required at inference (use decoder_model_merged.onnx instead).\n\t\t* decoder_model_merged.onnx: The two previous models fused in one, to avoid duplicating weights. A boolean input `use_cache_branch` allows to select the branch to use. In the first forward pass where the KV cache is empty, dummy past key values inputs need to be passed and are ignored with use_cache_branch=False.\n\t\t* build_delay_pattern_mask.onnx: A model taking as input `input_ids`, `pad_token_id`, `max_length`, and building a delayed pattern mask to the input_ids. Implements https://github.com/huggingface/transformers/blob/v4.39.3/src/transformers/models/musicgen/modeling_musicgen.py#L1054.",
+        "text-conditional-with-past": """Exports Musicgen to ONNX to generate audio samples conditioned on a text prompt (Reference: https://huggingface.co/docs/transformers/model_doc/musicgen#text-conditional-generation).
+        This uses the decoder KV cache. The following subcomponents are exported:
+        * text_encoder.onnx: corresponds to the text encoder part in https://github.com/huggingface/transformers/blob/v4.39.1/src/transformers/models/musicgen/modeling_musicgen.py#L1457.
+        * encodec_decode.onnx: corresponds to the Encodec audio encoder part in https://github.com/huggingface/transformers/blob/v4.39.1/src/transformers/models/musicgen/modeling_musicgen.py#L2472-L2480.
+        * decoder_model.onnx: The Musicgen decoder, without past key values input, and computing cross attention. Not required at inference (use decoder_model_merged.onnx instead).
+        * decoder_with_past_model.onnx: The Musicgen decoder, with past_key_values input (KV cache filled), not computing cross attention. Not required at inference (use decoder_model_merged.onnx instead).
+        * decoder_model_merged.onnx: The two previous models fused in one, to avoid duplicating weights. A boolean input `use_cache_branch` allows to select the branch to use. In the first forward pass where the KV cache is empty, dummy past key values inputs need to be passed and are ignored with use_cache_branch=False.
+        * build_delay_pattern_mask.onnx: A model taking as input `input_ids`, `pad_token_id`, `max_length`, and building a delayed pattern mask to the input_ids. Implements https://github.com/huggingface/transformers/blob/v4.39.3/src/transformers/models/musicgen/modeling_musicgen.py#L1054.""",
     }
-    # TODO: support audio-prompted generation (- audio_encoder_encode.onnx: corresponds to the audio encoder part in https://github.com/huggingface/transformers/blob/f01e1609bf4dba146d1347c1368c8c49df8636f6/src/transformers/models/musicgen/modeling_musicgen.py#L2087.\n\t)
+    # TODO: support audio-prompted generation (audio_encoder_encode.onnx: corresponds to the audio encoder part
+    # in https://github.com/huggingface/transformers/blob/f01e1609bf4dba146d1347c1368c8c49df8636f6/src/transformers/models/musicgen/modeling_musicgen.py#L2087.)
     # With that, we have full Encodec support.
     DEFAULT_VARIANT = "text-conditional-with-past"
 
 
@@ -1935,7 +1935,7 @@ def infer_task_from_model(
                 token=token,
                 library_name=library_name,
             )
-        elif type(model) == type:
+        elif type(model) is type:
             inferred_task_name = cls._infer_task_from_model_or_model_class(model_class=model)
         else:
             inferred_task_name = cls._infer_task_from_model_or_model_class(model=model)
@@ -2089,7 +2089,7 @@ def infer_library_from_model(
                 cache_dir=cache_dir,
                 token=token,
             )
-        elif type(model) == type:
+        elif type(model) is type:
             library_name = cls._infer_library_from_model_or_model_class(model_class=model)
         else:
             library_name = cls._infer_library_from_model_or_model_class(model=model)
 
@@ -20,7 +20,7 @@
 import warnings
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import TYPE_CHECKING, Optional, Union
+from typing import TYPE_CHECKING, List, Optional, Union
 
 from huggingface_hub import HfApi
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
@@ -32,7 +32,17 @@
 
 
 if TYPE_CHECKING:
-    from transformers import PreTrainedModel, TFPreTrainedModel
+    from transformers import (
+        FeatureExtractionMixin,
+        ImageProcessingMixin,
+        PreTrainedModel,
+        ProcessorMixin,
+        SpecialTokensMixin,
+        TFPreTrainedModel,
+    )
+
+    PreprocessorT = Union[SpecialTokensMixin, FeatureExtractionMixin, ImageProcessingMixin, ProcessorMixin]
+    ModelT = Union["PreTrainedModel", "TFPreTrainedModel"]
 
 
 logger = logging.getLogger(__name__)
@@ -79,6 +89,7 @@
 """
 
 
+# TODO: Should be removed when we no longer use OptimizedModel for everything
 # workaround to enable compatibility between optimum models and transformers pipelines
 class PreTrainedModel(ABC):  # noqa: F811
     pass
@@ -89,11 +100,12 @@ class OptimizedModel(PreTrainedModel):
     base_model_prefix = "optimized_model"
     config_name = CONFIG_NAME
 
-    def __init__(self, model: Union["PreTrainedModel", "TFPreTrainedModel"], config: PretrainedConfig):
-        super().__init__()
+    def __init__(
+        self, model: Union["ModelT"], config: "PretrainedConfig", preprocessors: Optional[List["PreprocessorT"]] = None
+    ):
         self.model = model
         self.config = config
-        self.preprocessors = []
+        self.preprocessors = preprocessors or []
 
     def __call__(self, *args, **kwargs):
         return self.forward(*args, **kwargs)
@@ -291,27 +303,6 @@ def _from_pretrained(
         """Overwrite this method in subclass to define how to load your model from pretrained"""
         raise NotImplementedError("Overwrite this method in subclass to define how to load your model from pretrained")
 
-    @classmethod
-    def _from_transformers(
-        cls,
-        model_id: Union[str, Path],
-        config: PretrainedConfig,
-        use_auth_token: Optional[Union[bool, str]] = None,
-        token: Optional[Union[bool, str]] = None,
-        revision: Optional[str] = None,
-        force_download: bool = False,
-        cache_dir: str = HUGGINGFACE_HUB_CACHE,
-        subfolder: str = "",
-        local_files_only: bool = False,
-        trust_remote_code: bool = False,
-        **kwargs,
-    ) -> "OptimizedModel":
-        """Overwrite this method in subclass to define how to load your model from vanilla transformers model"""
-        raise NotImplementedError(
-            "`_from_transformers` method will be deprecated in a future release. Please override `_export` instead"
-            "to define how to load your model from vanilla transformers model"
-        )
-
     @classmethod
     def _export(
         cls,
@@ -366,13 +357,6 @@ def from_pretrained(
         if isinstance(model_id, Path):
             model_id = model_id.as_posix()
 
-        from_transformers = kwargs.pop("from_transformers", None)
-        if from_transformers is not None:
-            logger.warning(
-                "The argument `from_transformers` is deprecated, and will be removed in optimum 2.0.  Use `export` instead"
-            )
-            export = from_transformers
-
         if len(model_id.split("@")) == 2:
             logger.warning(
                 f"Specifying the `revision` as @{model_id.split('@')[1]} is deprecated and will be removed in v1.23, please use the `revision` argument instead."
@@ -436,7 +420,7 @@ def from_pretrained(
                 trust_remote_code=trust_remote_code,
             )
 
-        from_pretrained_method = cls._from_transformers if export else cls._from_pretrained
+        from_pretrained_method = cls._export if export else cls._from_pretrained
 
         return from_pretrained_method(
             model_id=model_id,