pytorch
diff --git a/‎docs/source/reference/envs.rst
Lines changed: 1 addition & 0 deletions b/‎docs/source/reference/envs.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/mocking_classes.py
Lines changed: 20 additions & 8 deletions b/‎test/mocking_classes.py
Lines changed: 20 additions & 8 deletions
diff --git a/‎test/test_env.py
Lines changed: 1 addition & 1 deletion b/‎test/test_env.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/test_specs.py
Lines changed: 4 additions & 2 deletions b/‎test/test_specs.py
Lines changed: 4 additions & 2 deletions
@@ -1083,6 +1083,7 @@ to be able to create this other composition:
     VIPTransform
     VecGymEnvTransform
     VecNorm
+    VecNormV2
     gSDENoise
 
 Environments with masked actions
 
@@ -1097,10 +1097,7 @@ def __init__(self, max_steps: int = 5, start_val: int = 0, **kwargs):
         self.done_spec = Categorical(
             2,
             dtype=torch.bool,
-            shape=(
-                *self.batch_size,
-                1,
-            ),
+            shape=(*self.batch_size, 1),
             device=self.device,
         )
         self.action_spec = Binary(n=1, shape=[*self.batch_size, 1], device=self.device)
@@ -1146,7 +1143,9 @@ def _step(
                 "observation": self.count.clone(),
                 "done": self.count > self.max_steps,
                 "terminated": self.count > self.max_steps,
-                "reward": torch.zeros_like(self.count, dtype=torch.float),
+                "reward": torch.zeros_like(
+                    self.count, dtype=self.full_reward_spec[self.reward_keys[0]].dtype
+                ),
             },
             batch_size=self.batch_size,
             device=self.device,
@@ -1300,7 +1299,11 @@ def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
                 source[group_name][agent_name] = TensorDict(
                     source={
                         "observation": torch.rand(
-                            (*self.batch_size, 3, 4), device=self.device
+                            (*self.batch_size, 3, 4),
+                            device=self.device,
+                            dtype=self.full_observation_spec[
+                                group_name, agent_name, "observation"
+                            ].dtype,
                         ),
                         "done": self.count > self.max_steps,
                         "terminated": self.count > self.max_steps,
@@ -1324,11 +1327,20 @@ def _step(
                 source[group_name][agent_name] = TensorDict(
                     source={
                         "observation": torch.rand(
-                            (*self.batch_size, 3, 4), device=self.device
+                            (*self.batch_size, 3, 4),
+                            device=self.device,
+                            dtype=self.full_observation_spec[
+                                group_name, agent_name, "observation"
+                            ].dtype,
                         ),
                         "done": self.count > self.max_steps,
                         "terminated": self.count > self.max_steps,
-                        "reward": torch.zeros_like(self.count, dtype=torch.float),
+                        "reward": torch.zeros_like(
+                            self.count,
+                            dtype=self.full_reward_spec[
+                                group_name, agent_name, "reward"
+                            ].dtype,
+                        ),
                     },
                     batch_size=self.batch_size,
                     device=self.device,
 
@@ -4257,7 +4257,7 @@ def test_all_actions(self, include_fen, include_pgn, stateful, mask_actions):
             if stateful:
                 all_actions = env.all_actions()
             else:
-                # Reset the the initial state first, just to make sure
+                # Reset theinitial state first, just to make sure
                 # `all_actions` knows how to get the board state from the input.
                 env.reset()
                 all_actions = env.all_actions(td.clone())
 
@@ -525,8 +525,10 @@ def test_repr(self, shape, is_complete, device, dtype):
 
     def test_device_cast_with_dtype_fails(self, shape, is_complete, device, dtype):
         ts = self._composite_spec(shape, is_complete, device, dtype)
-        with pytest.raises(ValueError, match="Only device casting is allowed"):
-            ts.to(torch.float16)
+        ts = ts.to(torch.float16)
+        for spec in ts.values(True, True):
+            if spec is not None:
+                assert spec.dtype == torch.float16
 
     @pytest.mark.parametrize("dest", get_available_devices())
     def test_device_cast(self, shape, is_complete, device, dtype, dest):