Track only Stateful objects and not classes (#827)

zedsdead01 · facebook-github-bot · commit 735bfbcf9038 · 2024-05-10T01:04:00.000-07:00
Summary: Pull Request resolved: #827 `torchtnt.framework.callbacks.meta.model_store_checkpointer.ModelStoreCheckpointer` fails when a checkpointed unit contains an attribute which is a class (not a an object) implementing the `Stateful` interface. This is a typical case when a user specifies a type of an optimizer in an `AutoUnit` which is instantiated later in `AutoUnit.configure_optimizers_and_lr_scheduler`. The specific reason why the checkpointer fails is that this attribute then gets tracked because `isinstance(torch.optim.Optimizer, Stateful)` returns `True`. `MultiStateful` then tries to call `state_dict` on that attribute which fails because the attribute is not an object of a class. Reviewed By: JKSenthil Differential Revision: D57159095 fbshipit-source-id: 9224193f63803fa139c26553ff6090cd6ac9886d
diff --git a/tests/framework/test_app_state_mixin.py b/tests/framework/test_app_state_mixin.py
@@ -20,6 +20,7 @@
 from torchtnt.utils.env import init_from_env
 from torchtnt.utils.lr_scheduler import TLRScheduler
 from torchtnt.utils.prepare_module import FSDPOptimizerWrapper
+from torchtnt.utils.stateful import MultiStateful
 
 
 class Dummy(AppStateMixin):
@@ -32,6 +33,7 @@ def __init__(self) -> None:
             self.optimizer_c, step_size=30, gamma=0.1
         )
         self.grad_scaler_e = torch.cuda.amp.GradScaler()
+        self.optimizer_class_f = torch.optim.SGD
 
 
 class AppStateMixinTest(unittest.TestCase):
@@ -103,6 +105,15 @@ def test_miscellaneous_stateful(self) -> None:
         # assert that the grad scaler is stored in the app_state
         self.assertEqual(my_unit.app_state()["grad_scaler_e"], my_unit.grad_scaler_e)
 
+        # assert that only stateful class objects are being tracked
+        self.assertFalse("optimizer_class_f" in my_unit.tracked_misc_statefuls())
+
+        multi_stateful = MultiStateful(my_unit.tracked_misc_statefuls())
+        try:
+            _ = multi_stateful.state_dict()
+        except TypeError:
+            self.fail("Not able to get the state dict from my_unit.")
+
         # delete the attribute
         # pyre-fixme[8]: Attribute has type `GradScaler`; used as `None`.
         my_unit.grad_scaler_e = None
diff --git a/torchtnt/framework/unit.py b/torchtnt/framework/unit.py
@@ -7,6 +7,7 @@
 # pyre-strict
 
 
+import inspect
 import logging
 from abc import ABC, abstractmethod
 from typing import Any, cast, Dict, Generic, Iterator, TypeVar, Union
@@ -148,7 +149,7 @@ def __setattr__(self, name: str, value: object) -> None:
                 value,
                 self.__dict__.get("_progress"),
             )
-        elif isinstance(value, Stateful):
+        elif isinstance(value, Stateful) and not inspect.isclass(value):
             self._update_attr(
                 name,
                 value,