update

a-r-r-o-w · a-r-r-o-w · commit 3ae32b471cfc · 2025-01-17T08:35:03.000+01:00
diff --git a/docs/source/en/optimization/memory.md b/docs/source/en/optimization/memory.md
@@ -166,12 +166,18 @@ Typically, inference on most models is done with `torch.float16` or `torch.bfloa
 
 ```python
 import torch
-from diffusers import CogVideoXPipeline
+from diffusers import CogVideoXPipeline, CogVideoXTransformer3DModel
 from diffusers.utils import export_to_video
 
-pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16)
+model_id = "THUDM/CogVideoX-5b"
+
+# Load the model in bfloat16 and enable layerwise upcasting
+transformer = CogVideoXTransformer3DModel.from_pretrained(model_id, subfolder="transformer", torch_dtype=torch.bfloat16)
+transformer.enable_layerwise_upcasting(storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.bfloat16)
+
+# Load the pipeline
+pipe = CogVideoXPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch.bfloat16)
 pipe.to("cuda")
-pipe.transformer.enable_layerwise_upcasting(storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.bfloat16)
 
 prompt = (
     "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. "
diff --git a/src/diffusers/hooks/layerwise_upcasting.py b/src/diffusers/hooks/layerwise_upcasting.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import re
-from typing import Optional, Tuple, Type
+from typing import Optional, Tuple, Type, Union
 
 import torch
 
@@ -25,13 +25,13 @@
 
 
 # fmt: off
-_SUPPORTED_PYTORCH_LAYERS = (
+SUPPORTED_PYTORCH_LAYERS = (
     torch.nn.Conv1d, torch.nn.Conv2d, torch.nn.Conv3d,
     torch.nn.ConvTranspose1d, torch.nn.ConvTranspose2d, torch.nn.ConvTranspose3d,
     torch.nn.Linear,
 )
 
-_DEFAULT_SKIP_MODULES_PATTERN = ("pos_embed", "patch_embed", "norm")
+DEFAULT_SKIP_MODULES_PATTERN = ("pos_embed", "patch_embed", "norm", "^proj_in$", "^proj_out$")
 # fmt: on
 
 
@@ -74,8 +74,8 @@ def apply_layerwise_upcasting(
     module: torch.nn.Module,
     storage_dtype: torch.dtype,
     compute_dtype: torch.dtype,
-    skip_modules_pattern: Optional[Tuple[str]] = _DEFAULT_SKIP_MODULES_PATTERN,
-    skip_modules_classes: Optional[Tuple[Type[torch.nn.Module]]] = None,
+    skip_modules_pattern: Union[str, Tuple[str, ...]] = "default",
+    skip_modules_classes: Optional[Tuple[Type[torch.nn.Module], ...]] = None,
     non_blocking: bool = False,
     _prefix: str = "",
 ) -> None:
@@ -87,13 +87,14 @@ def apply_layerwise_upcasting(
 
     ```python
     >>> import torch
-    >>> from diffusers import CogVideoXPipeline, apply_layerwise_upcasting
+    >>> from diffusers import CogVideoXTransformer3DModel
 
-    >>> pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16)
-    >>> pipe.to("cuda")
+    >>> transformer = CogVideoXTransformer3DModel.from_pretrained(
+    ...     model_id, subfolder="transformer", torch_dtype=torch.bfloat16
+    ... )
 
     >>> apply_layerwise_upcasting(
-    ...     pipe.transformer,
+    ...     transformer,
     ...     storage_dtype=torch.float8_e4m3fn,
     ...     compute_dtype=torch.bfloat16,
     ...     skip_modules_pattern=["patch_embed", "norm"],
@@ -109,13 +110,17 @@ def apply_layerwise_upcasting(
             The dtype to cast the module to before/after the forward pass for storage.
         compute_dtype (`torch.dtype`):
             The dtype to cast the module to during the forward pass for computation.
-        skip_modules_pattern (`Tuple[str]`, defaults to `["pos_embed", "patch_embed", "norm"]`):
-            A list of patterns to match the names of the modules to skip during the layerwise upcasting process.
-        skip_modules_classes (`Tuple[Type[torch.nn.Module]]`, defaults to `None`):
+        skip_modules_pattern (`Tuple[str, ...]`, defaults to `"default"`):
+            A list of patterns to match the names of the modules to skip during the layerwise upcasting process. If set
+            to `"default"`, the default patterns are used.
+        skip_modules_classes (`Tuple[Type[torch.nn.Module], ...]`, defaults to `None`):
             A list of module classes to skip during the layerwise upcasting process.
         non_blocking (`bool`, defaults to `False`):
             If `True`, the weight casting operations are non-blocking.
     """
+    if skip_modules_pattern == "default":
+        skip_modules_pattern = DEFAULT_SKIP_MODULES_PATTERN
+
     if skip_modules_classes is None and skip_modules_pattern is None:
         apply_layerwise_upcasting_hook(module, storage_dtype, compute_dtype, non_blocking)
         return
@@ -127,7 +132,7 @@ def apply_layerwise_upcasting(
         logger.debug(f'Skipping layerwise upcasting for layer "{_prefix}"')
         return
 
-    if isinstance(module, _SUPPORTED_PYTORCH_LAYERS):
+    if isinstance(module, SUPPORTED_PYTORCH_LAYERS):
         logger.debug(f'Applying layerwise upcasting to layer "{_prefix}"')
         apply_layerwise_upcasting_hook(module, storage_dtype, compute_dtype, non_blocking)
         return
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
@@ -104,6 +104,17 @@ def get_parameter_dtype(parameter: torch.nn.Module) -> torch.dtype:
     """
     Returns the first found floating dtype in parameters if there is one, otherwise returns the last dtype it found.
     """
+    # 1. Check if we have attached any dtype modifying hooks (eg. layerwise upcasting)
+    if isinstance(parameter, nn.Module):
+        for name, submodule in parameter.named_modules():
+            if not hasattr(submodule, "_diffusers_hook"):
+                continue
+            registry = submodule._diffusers_hook
+            hook = registry.get_hook("layerwise_upcasting")
+            if hook is not None:
+                return hook.compute_dtype
+
+    # 2. If no dtype modifying hooks are attached, return the dtype of the first floating point parameter/buffer
     last_dtype = None
     for param in parameter.parameters():
         last_dtype = param.dtype
@@ -321,8 +332,8 @@ def enable_layerwise_upcasting(
         self,
         storage_dtype: torch.dtype = torch.float8_e4m3fn,
         compute_dtype: Optional[torch.dtype] = None,
-        skip_modules_pattern: Optional[List[str]] = None,
-        skip_modules_classes: Optional[List[Type[torch.nn.Module]]] = None,
+        skip_modules_pattern: Optional[Tuple[str, ...]] = None,
+        skip_modules_classes: Optional[Tuple[Type[torch.nn.Module], ...]] = None,
         non_blocking: bool = False,
     ) -> None:
         r"""
@@ -362,22 +373,24 @@ def enable_layerwise_upcasting(
                 The dtype to which the model should be cast for storage.
             compute_dtype (`torch.dtype`):
                 The dtype to which the model weights should be cast during the forward pass.
-            skip_modules_pattern (`List[str]`, *optional*):
+            skip_modules_pattern (`Tuple[str, ...]`, *optional*):
                 A list of patterns to match the names of the modules to skip during the layerwise upcasting process.
-            skip_modules_classes (`List[Type[torch.nn.Module]]`, *optional*):
+            skip_modules_classes (`Tuple[Type[torch.nn.Module], ...]`, *optional*):
                 A list of module classes to skip during the layerwise upcasting process.
             non_blocking (`bool`, *optional*, defaults to `False`):
                 If `True`, the weight casting operations are non-blocking.
         """
 
         user_provided_patterns = True
         if skip_modules_pattern is None:
-            skip_modules_pattern = []
+            from ..hooks.layerwise_upcasting import DEFAULT_SKIP_MODULES_PATTERN
+
+            skip_modules_pattern = DEFAULT_SKIP_MODULES_PATTERN
             user_provided_patterns = False
         if self._keep_in_fp32_modules is not None:
-            skip_modules_pattern.extend(self._keep_in_fp32_modules)
+            skip_modules_pattern += tuple(self._keep_in_fp32_modules)
         if self._always_upcast_modules is not None:
-            skip_modules_pattern.extend(self._always_upcast_modules)
+            skip_modules_pattern += tuple(self._always_upcast_modules)
         skip_modules_pattern = tuple(set(skip_modules_pattern))
 
         if is_peft_available() and not user_provided_patterns: