Merge pull request #13 from HumanCompatibleAI/clean-interpret

dfilan · web-flow · commit e61bf0e4e4fe · 2022-11-04T20:34:55.000+01:00
Minor fixes related to interpret
diff --git a/src/reward_preprocessing/common/utils.py b/src/reward_preprocessing/common/utils.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Tuple
+from typing import List, Optional, Tuple, Union
 
 import PIL
 from imitation.data import rollout, types
@@ -26,14 +26,14 @@ def make_transition_to_tensor(num_acts):
     def transition_to_tensor(transition):
         obs = transition["obs"]
         if np.issubdtype(obs.dtype, np.integer):
-            obs = obs.float() / 255.0
+            obs = obs / 255.0
             # For floats we don't divide by 255.0. In that case we assume the
             # observation is already in the range [0, 1].
         act = int(transition["acts"])
         next_obs = transition["next_obs"]
 
         if np.issubdtype(next_obs.dtype, np.integer):
-            next_obs = next_obs.float() / 255.0
+            next_obs = next_obs / 255.0
 
         transp_obs = np.transpose(obs, (2, 0, 1))
         obs_height = transp_obs.shape[1]
@@ -70,27 +70,44 @@ def __len__(self):
         return self.base_dataset.__len__()
 
 
-def rollouts_to_dataloader(rollouts_paths, num_acts, batch_size):
+def rollouts_to_dataloader(
+    rollouts_paths: Union[str, List[str]],
+    num_acts: int,
+    batch_size: int,
+    n_trajectories: Optional[int] = None,
+):
     """Take saved rollouts of a policy, and produce a dataloader of transitions.
 
     Assumes that observations are (h,w,c)-formatted images and that actions are
     discrete.
 
     Args:
-        rollouts_path: Path to rollouts saved via imitation script, or list of
+        rollouts_paths: Path to rollouts saved via imitation script, or list of
             such paths.
         num_acts: Number of actions available to the agent (necessary because
             actions are saved as a number, not as a one-hot vector).
         batch_size: Int, size of batches that the dataloader serves. Note that
             a batch size of 2 will make the GAN algorithm think each batch is
             a (data, label) pair, which will mess up training.
+        n_trajectories: If not None, limit number of trajectories to use.
     """
     if isinstance(rollouts_paths, list):
         rollouts = []
         for path in rollouts_paths:
             rollouts += types.load_with_rewards(path)
     else:
         rollouts = types.load_with_rewards(rollouts_paths)
+
+    # Optionally limit the number of trajectories to use, similar to n_expert_demos in
+    # imitation.scripts.common.demonstrations.
+    if n_trajectories is not None:
+        if len(rollouts) < n_trajectories:
+            raise ValueError(
+                f"Want to use n_trajectories={n_trajectories} trajectories, but only "
+                f"{len(rollouts)} are available via {rollouts_paths}.",
+            )
+        rollouts = rollouts[:n_trajectories]
+
     flat_rollouts = rollout.flatten_trajectories_with_rew(rollouts)
     tensor_rollouts = TransformedDataset(
         flat_rollouts, make_transition_to_tensor(num_acts)
diff --git a/src/reward_preprocessing/interpret.py b/src/reward_preprocessing/interpret.py
@@ -3,6 +3,7 @@
 
 from PIL import Image
 from imitation.scripts.common import common as common_config
+from imitation.util.logger import HierarchicalLogger
 from lucent.modelzoo.util import get_model_layers
 from lucent.optvis import transform
 import matplotlib
@@ -116,11 +117,17 @@ def interpret(
             rollouts_paths=rollout_path,
             num_acts=15,
             batch_size=limit_num_obs,
+            # This is an upper bound of the number of trajectories we need, since every
+            # trajectory has at least 1 transition.
+            n_trajectories=limit_num_obs,
         )
         # For dim reductions and gettings activations in LayerNMF we want one big batch
         # of limit_num_obs transitions. So, we simply use that as batch_size and sample
         # the first element from the dataloader.
-        inputs = next(iter(transition_tensor_dataloader))
+        inputs: th.Tensor = next(iter(transition_tensor_dataloader))
+        inputs = inputs.to(device)
+        # Ensure loaded data is FloatTensor and not DoubleTensor.
+        inputs = inputs.float()
     else:  # When using GAN.
         # Inputs should be some samples of input vectors? Not sure if this is the best
         # way to do this, there might be better options.
@@ -151,6 +158,8 @@ def interpret(
     rows, columns = 1, num_features
     if pyplot:
         fig = plt.figure(figsize=(columns * 2, rows * 2))  # width, height in inches
+    else:
+        fig = None
 
     # Visualize
     if vis_type == "traditional":
@@ -213,17 +222,29 @@ def interpret(
 
 
 def plot_img(
-    columns, custom_logger, feature_i, fig, img, pyplot, rows, vis_scale, wandb_logging
+    columns: int,
+    custom_logger: HierarchicalLogger,
+    feature_i: int,
+    fig: Optional[matplotlib.figure.Figure],
+    img: np.ndarray,
+    pyplot: bool,
+    rows: int,
+    vis_scale: int,
+    wandb_logging: bool,
 ):
     """Plot the passed image to pyplot and wandb as appropriate."""
     _wandb_log(custom_logger, feature_i, img, vis_scale, wandb_logging)
-    if pyplot:
+    if fig is not None and pyplot:
         fig.add_subplot(rows, columns, feature_i + 1)
         plt.imshow(img)
 
 
 def _wandb_log(
-    custom_logger, feature_i: int, img: np.ndarray, vis_scale: int, wandb_logging: bool
+    custom_logger: HierarchicalLogger,
+    feature_i: int,
+    img: np.ndarray,
+    vis_scale: int,
+    wandb_logging: bool,
 ):
     """Plot to wandb if wandb logging is enabled."""
     if wandb_logging:
diff --git a/src/reward_preprocessing/scripts/train_gan.py b/src/reward_preprocessing/scripts/train_gan.py
@@ -3,8 +3,8 @@
 For use in reward function feature visualization.
 """
 
-import torch as th
 from sacred.observers import FileStorageObserver
+import torch as th
 
 from reward_preprocessing.common import utils
 from reward_preprocessing.scripts.config.train_gan import train_gan_ex
diff --git a/src/reward_preprocessing/vis/reward_vis.py b/src/reward_preprocessing/vis/reward_vis.py
@@ -177,13 +177,17 @@ def __init__(
 
         self.patch_h = self.model_inputs_full.shape[2] / activations.shape[2]
         self.patch_w = self.model_inputs_full.shape[3] / activations.shape[3]
+
+        # From here on activations should be numpy array and not pytorch tensor anymore.
+        activations = activations.detach().cpu().numpy()
+
         if self.reducer is None:  # No dimensionality reduction.
             # Activations are only used for dim reduction and to determine the shape
             # of the features. The former is compatible between torch and numpy (both
             # support .shape), so calling .numpy() is not really necessary. However,
             # for consistency we do it here. Consequently, self.acts_reduced is always
             # a numpy array.
-            self.acts_reduced = activations.numpy()
+            self.acts_reduced = activations
             self.channel_dirs = np.eye(self.acts_reduced.shape[1])
             self.transform = lambda acts: acts.copy()
             self.inverse_transform = lambda acts: acts.copy()
@@ -219,7 +223,9 @@ def __init__(
             )
         # Transform into torch tensor instead of numpy array, because this is expected
         # later on.
-        self.channel_dirs = th.tensor(self.channel_dirs)
+        self.channel_dirs = th.tensor(self.channel_dirs).to(
+            self.model_inputs_full.device
+        )
 
     def vis_traditional(
         self,