Update

Vincent Moens · Vincent Moens · commit 688d12b28613 · 2025-03-03T15:02:59.000Z
[ghstack-poisoned]
diff --git a/examples/rlhf/models/actor_critic.py b/examples/rlhf/models/actor_critic.py
@@ -2,6 +2,8 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
 from torchrl.modules.tensordict_module.actors import LMHeadActorValueOperator
 from torchrl.modules.tensordict_module.common import VmapModule
 
diff --git a/torchrl/envs/transforms/gym_transforms.py b/torchrl/envs/transforms/gym_transforms.py
@@ -10,7 +10,6 @@
 import warnings
 
 import torch
-import torchrl.objectives.common
 from tensordict import TensorDictBase
 from tensordict.utils import expand_as_right, NestedKey
 from torchrl.data.tensor_specs import Unbounded
@@ -189,7 +188,9 @@ def transform_observation_spec(self, observation_spec):
         )
         return observation_spec
 
-    def register_keys(self, loss_or_advantage: torchrl.objectives.common.LossModule):
+    def register_keys(
+        self, loss_or_advantage: torchrl.objectives.common.LossModule  # noqa
+    ):
         """Registers the end-of-life key at appropriate places within the loss.
 
         Args:
diff --git a/torchrl/envs/transforms/transforms.py b/torchrl/envs/transforms/transforms.py
@@ -87,7 +87,6 @@
     make_composite_from_td,
     step_mdp,
 )
-from torchrl.objectives.value.functional import reward2go
 
 _has_tv = importlib.util.find_spec("torchvision", None) is not None
 
@@ -8539,6 +8538,8 @@ def _call(self, next_tensordict: TensorDictBase) -> TensorDictBase:
     def _inv_apply_transform(
         self, reward: torch.Tensor, done: torch.Tensor
     ) -> torch.Tensor:
+        from torchrl.objectives.value.functional import reward2go
+
         return reward2go(reward, done, self.gamma)
 
     def set_container(self, container):
diff --git a/torchrl/modules/tensordict_module/__init__.py b/torchrl/modules/tensordict_module/__init__.py
@@ -3,7 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from .actors import (
+from torchrl.modules.tensordict_module.actors import (
     Actor,
     ActorCriticOperator,
     ActorCriticWrapper,
@@ -21,20 +21,20 @@
     TanhModule,
     ValueOperator,
 )
-from .common import SafeModule, VmapModule
-from .exploration import (
+from torchrl.modules.tensordict_module.common import SafeModule, VmapModule
+from torchrl.modules.tensordict_module.exploration import (
     AdditiveGaussianModule,
     AdditiveGaussianWrapper,
     EGreedyModule,
     EGreedyWrapper,
     OrnsteinUhlenbeckProcessModule,
     OrnsteinUhlenbeckProcessWrapper,
 )
-from .probabilistic import (
+from torchrl.modules.tensordict_module.probabilistic import (
     SafeProbabilisticModule,
     SafeProbabilisticTensorDictSequential,
 )
-from .rnn import (
+from torchrl.modules.tensordict_module.rnn import (
     GRU,
     GRUCell,
     GRUModule,
@@ -44,8 +44,8 @@
     recurrent_mode,
     set_recurrent_mode,
 )
-from .sequence import SafeSequential
-from .world_models import WorldModelWrapper
+from torchrl.modules.tensordict_module.sequence import SafeSequential
+from torchrl.modules.tensordict_module.world_models import WorldModelWrapper
 
 __all__ = [
     "Actor",
diff --git a/torchrl/modules/tensordict_module/rnn.py b/torchrl/modules/tensordict_module/rnn.py
@@ -19,11 +19,6 @@
 
 from torchrl._utils import _ContextManager, _DecoratorContextManager
 from torchrl.data.tensor_specs import Unbounded
-from torchrl.objectives.value.functional import (
-    _inv_pad_sequence,
-    _split_and_pad_sequence,
-)
-from torchrl.objectives.value.utils import _get_num_per_traj_init
 
 
 class LSTMCell(RNNCellBase):
@@ -718,6 +713,11 @@ def set_recurrent_mode(self, mode: bool = True):
 
     @dispatch
     def forward(self, tensordict: TensorDictBase):
+        from torchrl.objectives.value.functional import (
+            _inv_pad_sequence,
+            _split_and_pad_sequence,
+        )
+
         # we want to get an error if the value input is missing, but not the hidden states
         defaults = [NO_DEFAULT, None, None]
         shape = tensordict.shape
@@ -742,6 +742,8 @@ def forward(self, tensordict: TensorDictBase):
         is_init = tensordict_shaped["is_init"].squeeze(-1)
         splits = None
         if self.recurrent_mode and is_init[..., 1:].any():
+            from torchrl.objectives.value.utils import _get_num_per_traj_init
+
             # if we have consecutive trajectories, things get a little more complicated
             # we have a tensordict of shape [B, T]
             # we will split / pad things such that we get a tensordict of shape
@@ -1533,6 +1535,11 @@ def set_recurrent_mode(self, mode: bool = True):
     @dispatch
     @set_lazy_legacy(False)
     def forward(self, tensordict: TensorDictBase):
+        from torchrl.objectives.value.functional import (
+            _inv_pad_sequence,
+            _split_and_pad_sequence,
+        )
+
         # we want to get an error if the value input is missing, but not the hidden states
         defaults = [NO_DEFAULT, None]
         shape = tensordict.shape
@@ -1557,6 +1564,8 @@ def forward(self, tensordict: TensorDictBase):
         is_init = tensordict_shaped["is_init"].squeeze(-1)
         splits = None
         if self.recurrent_mode and is_init[..., 1:].any():
+            from torchrl.objectives.value.utils import _get_num_per_traj_init
+
             # if we have consecutive trajectories, things get a little more complicated
             # we have a tensordict of shape [B, T]
             # we will split / pad things such that we get a tensordict of shape
diff --git a/torchrl/objectives/__init__.py b/torchrl/objectives/__init__.py
@@ -3,24 +3,28 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from .a2c import A2CLoss
-from .common import LossModule
-from .cql import CQLLoss, DiscreteCQLLoss
-from .crossq import CrossQLoss
-from .ddpg import DDPGLoss
-from .decision_transformer import DTLoss, OnlineDTLoss
-from .dqn import DistributionalDQNLoss, DQNLoss
-from .dreamer import DreamerActorLoss, DreamerModelLoss, DreamerValueLoss
-from .gail import GAILLoss
-from .iql import DiscreteIQLLoss, IQLLoss
-from .multiagent import QMixerLoss
-from .ppo import ClipPPOLoss, KLPENPPOLoss, PPOLoss
-from .redq import REDQLoss
-from .reinforce import ReinforceLoss
-from .sac import DiscreteSACLoss, SACLoss
-from .td3 import TD3Loss
-from .td3_bc import TD3BCLoss
-from .utils import (
+from torchrl.objectives.a2c import A2CLoss
+from torchrl.objectives.common import LossModule
+from torchrl.objectives.cql import CQLLoss, DiscreteCQLLoss
+from torchrl.objectives.crossq import CrossQLoss
+from torchrl.objectives.ddpg import DDPGLoss
+from torchrl.objectives.decision_transformer import DTLoss, OnlineDTLoss
+from torchrl.objectives.dqn import DistributionalDQNLoss, DQNLoss
+from torchrl.objectives.dreamer import (
+    DreamerActorLoss,
+    DreamerModelLoss,
+    DreamerValueLoss,
+)
+from torchrl.objectives.gail import GAILLoss
+from torchrl.objectives.iql import DiscreteIQLLoss, IQLLoss
+from torchrl.objectives.multiagent import QMixerLoss
+from torchrl.objectives.ppo import ClipPPOLoss, KLPENPPOLoss, PPOLoss
+from torchrl.objectives.redq import REDQLoss
+from torchrl.objectives.reinforce import ReinforceLoss
+from torchrl.objectives.sac import DiscreteSACLoss, SACLoss
+from torchrl.objectives.td3 import TD3Loss
+from torchrl.objectives.td3_bc import TD3BCLoss
+from torchrl.objectives.utils import (
     default_value_kwargs,
     distance_loss,
     group_optimizers,
diff --git a/torchrl/objectives/cql.py b/torchrl/objectives/cql.py
@@ -20,7 +20,7 @@
 from torchrl.data.tensor_specs import Composite
 from torchrl.data.utils import _find_action_space
 from torchrl.envs.utils import ExplorationType, set_exploration_type
-from torchrl.modules import ProbabilisticActor, QValueActor
+from torchrl.modules.tensordict_module.actors import ProbabilisticActor, QValueActor
 from torchrl.modules.tensordict_module.common import ensure_tensordict_compatible
 from torchrl.objectives.common import LossModule
 from torchrl.objectives.utils import (
diff --git a/torchrl/trainers/helpers/envs.py b/torchrl/trainers/helpers/envs.py
@@ -7,13 +7,12 @@
 # Therefore we need Optional and Union
 # from __future__ import annotations
 
+import importlib.util
 from copy import copy
 from dataclasses import dataclass, field as dataclass_field
 from typing import Any, Callable, Optional, Sequence, Union
 
 import torch
-from omegaconf import DictConfig
-
 from torchrl._utils import logger as torchrl_logger, VERBOSE
 from torchrl.envs import ParallelEnv
 from torchrl.envs.common import EnvBase
@@ -49,6 +48,14 @@
     "dm_control": DMControlEnv,
 }
 
+_has_omegaconf = importlib.util.find_spec("omegaconf") is not None
+if _has_omegaconf:
+    from omegaconf import DictConfig
+else:
+
+    class DictConfig:  # noqa
+        ...
+
 
 def correct_for_frame_skip(cfg: DictConfig) -> DictConfig:  # noqa: F821
     """Correct the arguments for the input frame_skip, by dividing all the arguments that reflect a count of frames by the frame_skip.

Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,8 @@`
`2`	`2`	`#`
`3`	`3`	`# This source code is licensed under the MIT license found in the`
`4`	`4`	`# LICENSE file in the root directory of this source tree.`
	`5`	`+from __future__ import annotations`
	`6`	`+`
`5`	`7`	`from torchrl.modules.tensordict_module.actors import LMHeadActorValueOperator`
`6`	`8`	`from torchrl.modules.tensordict_module.common import VmapModule`
`7`	`9`