[Doc] Add tuto and time dim info in docs (#1130)

vmoens · web-flow · commit c671e8561003 · 2023-05-05T15:40:38.000+01:00
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -49,6 +49,7 @@ Intermediate
    tutorials/torchrl_envs
    tutorials/pretrained_models
    tutorials/dqn_with_rnn
+   tutorials/rb_tutorial
 
 Advanced
 --------
diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst
@@ -63,6 +63,9 @@ With these, the following methods are implemented:
   a maximum number of steps (``max_steps=N``) and using a policy (``policy=model``).
   The policy should be coded using a :class:`tensordict.nn.TensorDictModule`
   (or any other :class:`tensordict.TensorDict`-compatible module).
+  The resulting :class:`tensordict.TensorDict` instance will be marked with
+  a trailing ``"time"`` named dimension that can be used by other modules
+  to treat this batched dimension as it should.
 
 The following figure summarizes how a rollout is executed in torchrl.
 
diff --git a/tutorials/sphinx-tutorials/rb_tutorial.py b/tutorials/sphinx-tutorials/rb_tutorial.py
@@ -491,7 +491,7 @@ class MyData:
 # sampled items:
 #
 sample = rb.sample()
-sample["td_error"] = (data.numel() - sample["index"]).exp()
+sample["td_error"] = data.numel() - sample["index"]
 rb.update_tensordict_priority(sample)
 
 ######################################################################

Original file line number	Diff line number	Diff line change
`@@ -491,7 +491,7 @@ class MyData:`
`491`	`491`	`# sampled items:`
`492`	`492`	`#`
`493`	`493`	`sample = rb.sample()`
`494`		`-sample["td_error"] = (data.numel() - sample["index"]).exp()`
	`494`	`+sample["td_error"] = data.numel() - sample["index"]`
`495`	`495`	`rb.update_tensordict_priority(sample)`
`496`	`496`
`497`	`497`	`######################################################################`