Handle meta tensors in FX quantization (#2622)

kausv · facebook-github-bot · commit efca1d6699af · 2024-12-21T01:58:50.000-08:00
Summary: Pull Request resolved: #2622 X-link: pytorch/pytorch#142262 If module being quantized contains a some meta tensors and some tensors with actual device, we should not fail quantization. Quantization should also not fail if new quantized module is created on a meta device. If devices contain meta, copying from meta to meta is not necessary, copying from another device to meta can be skipped. Reviewed By: emlin Differential Revision: D66895899 fbshipit-source-id: bba8de9ddc5f86292521985dc588f9dbe14b4b4c
diff --git a/torchrec/quant/embedding_modules.py b/torchrec/quant/embedding_modules.py
@@ -10,7 +10,18 @@
 import copy
 import itertools
 from collections import defaultdict
-from typing import Callable, cast, Dict, List, Optional, Sequence, Tuple, Type, Union
+from typing import (
+    Any,
+    Callable,
+    cast,
+    Dict,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    Union,
+)
 
 import torch
 import torch.nn as nn
@@ -971,6 +982,27 @@ def __init__(
         ) in self._managed_collision_collection._managed_collision_modules.values():
             managed_collision_module.reset_inference_mode()
 
+    def to(
+        self, *args: List[Any], **kwargs: Dict[str, Any]
+    ) -> "QuantManagedCollisionEmbeddingCollection":
+        device, dtype, non_blocking, _ = torch._C._nn._parse_to(
+            *args,  # pyre-ignore
+            **kwargs,  # pyre-ignore
+        )
+        for param in self.parameters():
+            if param.device.type != "meta":
+                param.to(device)
+
+        for buffer in self.buffers():
+            if buffer.device.type != "meta":
+                buffer.to(device)
+        # Skip device movement and continue with other args
+        super().to(
+            dtype=dtype,
+            non_blocking=non_blocking,
+        )
+        return self
+
     def forward(
         self,
         features: KeyedJaggedTensor,