Merge pull request #27 from HumanCompatibleAI/dataset_viz_fixes

dfilan · web-flow · commit 8781b1b32195 · 2022-12-15T13:02:06.000-08:00
Dataset viz fixes
diff --git a/src/reward_preprocessing/common/utils.py b/src/reward_preprocessing/common/utils.py
@@ -129,30 +129,26 @@ def visualize_samples(samples: np.ndarray, save_dir):
     to turn act into a numpy array, before saving it.
     """
     for i, transition in enumerate(samples):
-        num_acts = transition.shape[0] - 6
-        s = transition[0:3, :, :]
+        s, act, s_ = ndarray_to_transition(transition)
         s = process_image_array(s)
-        act = transition[3 : 3 + num_acts, :, :]
-        s_ = transition[3 + num_acts : transition.shape[0], :, :]
         s_ = process_image_array(s_)
-        act_slim_mean = np.mean(act, axis=(1, 2))
-        act_slim_max = np.max(np.abs(act), axis=(1, 2))
         s_img = PIL.Image.fromarray(s)
         s__img = PIL.Image.fromarray(s_)
         (Path(save_dir) / str(i)).mkdir()
         s_img.save(Path(save_dir) / str(i) / "first_obs.png")
         s__img.save(Path(save_dir) / str(i) / "second_obs.png")
-        np.save(Path(save_dir) / str(i) / "act_vec_mean.npy", act_slim_mean)
-        np.save(Path(save_dir) / str(i) / "act_vec_max.npy", act_slim_max)
+        np.save(Path(save_dir) / str(i) / "act.npy", act)
 
 
 def process_image_array(img: np.ndarray) -> np.ndarray:
-    """Process a numpy array for feeding into PIL.Image.fromarray."""
+    """Process a numpy array for feeding into PIL.Image.fromarray.
+
+    Should already be in (h,w,c) format.
+    """
     up_multiplied = img * 255
     clipped = np.clip(up_multiplied, 0, 255)
     cast = clipped.astype(np.uint8)
-    transposed = np.transpose(cast, axes=(1, 2, 0))
-    return transposed
+    return cast
 
 
 def tensor_to_transition(
@@ -178,6 +174,22 @@ def tensor_to_transition(
     return obs_proc, act_proc, next_obs_proc
 
 
+def ndarray_to_transition(
+    np_trans: np.ndarray,
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Turn a numpy transition tensor into three bona fide transitions."""
+    if len(np_trans.shape) != 3:
+        raise ValueError("ndarray_to_transition assumes input has shape of length 3")
+    boosted_np_trans = np_trans[None, :, :, :]
+    th_trans = th.from_numpy(boosted_np_trans)
+    th_obs, th_act, th_next_obs = tensor_to_transition(th_trans)
+    np_obs, np_act, np_next_obs = map(
+        lambda th_result: th_result[0].detach().cpu().numpy(),
+        (th_obs, th_act, th_next_obs),
+    )
+    return np_obs, np_act, np_next_obs
+
+
 def process_image_tensor(obs: th.Tensor) -> th.Tensor:
     """Take a GAN image and processes it for use in a reward net."""
     clipped_obs = th.clamp(obs, 0, 1)
diff --git a/src/reward_preprocessing/interpret.py b/src/reward_preprocessing/interpret.py
@@ -18,6 +18,7 @@
     TensorTransitionWrapper,
     array_to_image,
     log_img_wandb,
+    ndarray_to_transition,
     rollouts_to_dataloader,
     tensor_to_transition,
 )
@@ -361,22 +362,36 @@ def interpret(
         for feature_i in range(num_features):
             custom_logger.log(f"Feature {feature_i}")
 
-            img, indices = nmf.vis_dataset_thumbnail(
+            dataset_thumbnails, indices = nmf.vis_dataset_thumbnail(
                 feature=feature_i, num_mult=4, expand_mult=1
             )
 
+            # remove opacity channel from dataset thumbnails
+            np_trans_tens = dataset_thumbnails[:-1, :, :]
+
+            obs, _, next_obs = ndarray_to_transition(np_trans_tens)
+
             _log_single_transition_wandb(
-                custom_logger, feature_i, img, vis_scale, wandb_logging
+                custom_logger, feature_i, (obs, next_obs), vis_scale, wandb_logging
             )
             _plot_img(
                 columns,
                 feature_i,
                 num_features,
                 fig,
-                img,
+                (obs, next_obs),
                 rows,
             )
 
+            if img_save_path is not None:
+                obs_PIL = array_to_image(obs, vis_scale)
+                obs_PIL.save(img_save_path + f"{feature_i}_obs.png")
+                next_obs_PIL = array_to_image(next_obs, vis_scale)
+                next_obs_PIL.save(img_save_path + f"{feature_i}_next_obs.png")
+                custom_logger.log(
+                    f"Saved feature {feature_i} viz in dir {img_save_path}."
+                )
+
     if pyplot:
         plt.show()
     custom_logger.log("Done with visualization.")
diff --git a/src/reward_preprocessing/vis/reward_vis.py b/src/reward_preprocessing/vis/reward_vis.py
@@ -340,18 +340,21 @@ def pad_obses(self, *, expand_mult=1):
                 % 2
             )  # Checkered pattern.
             self.padded_obses = self.padded_obses * 0.25 + 0.75  # Adjust color.
-            self.padded_obses = self.padded_obses.astype(self.model_inputs_full.dtype)
+            self.padded_obses = self.padded_obses.astype(
+                self.model_inputs_full.detach().cpu().numpy().dtype
+            )
             # Add dims for batch and channel.
             self.padded_obses = self.padded_obses[None, None, ...]
             # Repeat for correct number of images.
             self.padded_obses = self.padded_obses.repeat(
                 self.model_inputs_full.shape[0], axis=0
             )
             # Repeat channel dimension.
-            self.padded_obses = self.padded_obses.repeat(3, axis=1)
+            num_channels = self.model_inputs_full.shape[1]
+            self.padded_obses = self.padded_obses.repeat(num_channels, axis=1)
             self.padded_obses[
                 :, :, self.pad_h : -self.pad_h, self.pad_w : -self.pad_w
-            ] = self.model_inputs_full
+            ] = (self.model_inputs_full.detach().cpu().numpy())
 
     def get_patch(self, obs_index, pos_h, pos_w, *, expand_mult=1):
         left_h = self.pad_h + (pos_h - 0.5 * expand_mult) * self.patch_h
@@ -468,7 +471,7 @@ def vis_dataset_thumbnail(
         acts_single = acts_feature[
             range(acts_feature.shape[0]), pos_indices[0], pos_indices[1]
         ]
-        # Sort the activations in descending order and take the num_mult**2 strongest.
+        # Sort the activations in descending order and take the num_mult**2 strongest
         # activations.
         obs_indices = np.argsort(-acts_single, axis=0)[: num_mult**2]
         # Coordinates of the strongest activation in each observation.