Merge pull request #16 from HumanCompatibleAI/add_generator

PavelCz · web-flow · commit f501e9e50aae · 2022-11-08T09:41:38.000+01:00
Visualize using GAN
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,5 +1,5 @@
 pytest==6.2.5
 black[jupyter]==22.10
 flake8==3.9.2
-pytype==2021.8.24
+pytype==2022.10.26
 flake8-isort==4.0.0
diff --git a/src/reward_preprocessing/common/utils.py b/src/reward_preprocessing/common/utils.py
@@ -120,9 +120,10 @@ def rollouts_to_dataloader(
 
 
 def visualize_samples(samples: np.ndarray, save_dir):
-    """Visualize samples from a GAN. Saves obs and next obs as png files, and takes
-    mean over height and width dimensions to turn act into a numpy array, before
-    saving it.
+    """Visualize samples from a GAN.
+
+    Saves obs and next obs as png files, and takes mean over height and width dimensions
+    to turn act into a numpy array, before saving it.
     """
     for i, transition in enumerate(samples):
         num_acts = transition.shape[0] - 6
@@ -209,6 +210,25 @@ def forward(self, transition_tensor: th.Tensor) -> th.Tensor:
         return self.rew_net(state=obs, action=act, next_state=next_obs, done=dones)
 
 
+class RewardGeneratorCombo(nn.Module):
+    """Composition of a generative model and a RewardNet.
+
+    Assumes that the RewardNet normalizes observations to [0,1].
+    """
+
+    def __init__(self, reward_net: RewardNet, generator: nn.Module):
+        super().__init__()
+        self.reward_net = reward_net
+        self.generator = generator
+
+    def forward(self, latent_tens: th.Tensor):
+        latent_vec = th.mean(latent_tens, dim=[2, 3])
+        transition_tensor = self.generator(latent_vec)
+        obs, action_vec, next_obs = tensor_to_transition(transition_tensor)
+        done = th.zeros(action_vec.shape)
+        return self.reward_net.forward(obs, action_vec, next_obs, done)
+
+
 def save_loss_plots(losses, save_dir):
     """Save plots of generator/adversary losses over training."""
     fig, _ = vegans.utils.plot_losses(losses, show=False)
diff --git a/src/reward_preprocessing/generative_modelling/gen_models.py b/src/reward_preprocessing/generative_modelling/gen_models.py
@@ -69,6 +69,8 @@ class DCGanFourTo64Generator(nn.Module):
 
     def __init__(self, latent_shape, data_shape):
         super(DCGanFourTo64Generator, self).__init__()
+        # Identity op so that lucent can regularize L2 norm of input.
+        self.latent_vec = nn.Identity()
         self.project = nn.Linear(latent_shape[0], 1024 * 4 * 4)
         self.conv_body = nn.Sequential(
             nn.BatchNorm2d(1024),
@@ -90,6 +92,7 @@ def __init__(self, latent_shape, data_shape):
 
     def forward(self, x):
         batch_size = x.shape[0]
+        x = self.latent_vec(x)
         x = self.project(x)
         x = th.reshape(x, (batch_size, 1024, 4, 4))
         x = nn.functional.leaky_relu(x, negative_slope=0.1)
diff --git a/src/reward_preprocessing/interpret.py b/src/reward_preprocessing/interpret.py
@@ -1,5 +1,5 @@
 import os.path as osp
-from typing import Optional
+from typing import Optional, Tuple, Union
 
 from PIL import Image
 from imitation.scripts.common import common as common_config
@@ -14,6 +14,7 @@
 import wandb
 
 from reward_preprocessing.common.utils import (
+    RewardGeneratorCombo,
     TensorTransitionWrapper,
     rollouts_to_dataloader,
     tensor_to_transition,
@@ -40,7 +41,9 @@ def interpret(
     vis_type: str,
     layer_name: str,
     num_features: Optional[int],
-    gan_path: Optional[str],
+    gan_path: Optional[str] = None,
+    l2_coeff: Optional[float] = None,
+    img_save_path: Optional[str] = None,
 ):
     """Run visualization for interpretability.
 
@@ -74,6 +77,12 @@ def interpret(
             Path to the GAN model. This is used to regularize the output of the
             visualization. If None simply visualize reward net without the use
             of a GAN in the pipeline.
+        l2_coeff:
+            Strength with which to penalize the L2 norm of generated latent vector
+            "visualizations" of a GAN-reward model combination. If gan_path is not None,
+            this must also not be None.
+        img_save_path:
+            Directory to save images in. Must end in a /. If None, do not save images.
     """
     if limit_num_obs <= 0:
         raise ValueError(
@@ -82,6 +91,15 @@ def interpret(
             f"I don't think we actually ever want to use all so this is currently not "
             f"implemented."
         )
+    if vis_type not in ["dataset", "traditional"]:
+        raise ValueError(f"Unknown vis_type: {vis_type}")
+    if vis_type == "dataset" and gan_path is not None:
+        raise ValueError("GANs cannot be used with dataset visualization.")
+    if gan_path is not None and l2_coeff is None:
+        raise ValueError("When GANs are used, l2_coeff must be set.")
+    if img_save_path is not None and img_save_path[-1] != "/":
+        raise ValueError("img_save_path is not a directory, does not end in /")
+
     # Set up imitation-style logging.
     custom_logger, log_dir = common_config.setup_logging()
     wandb_logging = "wandb" in common["log_format_strs"]
@@ -101,7 +119,8 @@ def interpret(
         rew_net = TensorTransitionWrapper(rew_net)
     else:  # Use GAN
         # Combine rew net with GAN.
-        raise NotImplementedError()
+        gan = th.load(gan_path, map_location=th.device(device))
+        rew_net = RewardGeneratorCombo(reward_net=rew_net, generator=gan.generator)
 
     rew_net.eval()  # Eval for visualization.
 
@@ -129,17 +148,17 @@ def interpret(
         # Ensure loaded data is FloatTensor and not DoubleTensor.
         inputs = inputs.float()
     else:  # When using GAN.
-        # Inputs should be some samples of input vectors? Not sure if this is the best
-        # way to do this, there might be better options.
-        # The important part is that lucent expects 4D tensors as inputs, so increase
-        # dimensionality accordingly.
-        raise NotImplementedError()
+        # Inputs are GAN samples
+        samples = gan.sample(limit_num_obs)
+        inputs = samples[:, :, None, None]
+        inputs = inputs.to(device)
+        inputs = inputs.float()
 
     # The model to analyse should be a torch module that takes a single input, which
     # should be a torch Tensor.
     # In our case this is one of the following:
     # - A reward net that has been wrapped, so it accepts transition tensors.
-    # - A combo of GAN and reward net that accepts latent inputs vectors. (TODO)
+    # - A combo of GAN and reward net that accepts latent inputs vectors.
     model_to_analyse = rew_net
     nmf = LayerNMF(
         model=model_to_analyse,
@@ -157,43 +176,73 @@ def interpret(
     num_features = nmf.channel_dirs.shape[0]
     rows, columns = 1, num_features
     if pyplot:
-        fig = plt.figure(figsize=(columns * 2, rows * 2))  # width, height in inches
+        col_mult = 4 if vis_type == "traditional" else 2
+        # figsize is width, height in inches
+        fig = plt.figure(figsize=(columns * col_mult, rows * 2))
     else:
         fig = None
 
     # Visualize
     if vis_type == "traditional":
-        # List of transforms
-        transforms = [
-            transform.jitter(2),  # Jitters input by 2 pixel
-        ]
-
-        opt_transitions = nmf.vis_traditional(transforms=transforms)
-        # This gives as an array that optimizes the objectives, in the shape of the
-        # input which is a transition tensor. However, lucent helpfully transposes the
-        # output such that the channel dimension is last. Our functions expect channel
-        # dim before spatial dims, so we need to transpose it back.
-        opt_transitions = opt_transitions.transpose(0, 3, 1, 2)
-        # Split the optimized transitions, one for each feature, into separate
-        # observations and actions. This function only works with torch tensors.
-        obs, acts, next_obs = tensor_to_transition(th.tensor(opt_transitions))
-        # obs and next_obs output have channel dim last.
-        # acts is output as one-hot vector.
+
+        if gan_path is None:
+            # List of transforms
+            transforms = [
+                transform.jitter(2),  # Jitters input by 2 pixel
+            ]
+
+            opt_transitions = nmf.vis_traditional(transforms=transforms)
+            # This gives as an array that optimizes the objectives, in the shape of the
+            # input which is a transition tensor. However, lucent helpfully transposes
+            # the output such that the channel dimension is last. Our functions expect
+            # channel dim before spatial dims, so we need to transpose it back.
+            opt_transitions = opt_transitions.transpose(0, 3, 1, 2)
+            # Split the optimized transitions, one for each feature, into separate
+            # observations and actions. This function only works with torch tensors.
+            obs, acts, next_obs = tensor_to_transition(th.tensor(opt_transitions))
+            # obs and next_obs output have channel dim last.
+            # acts is output as one-hot vector.
+
+        else:
+            # We do not require the latent vectors to be transformed before optimizing.
+            # However, we do regularize the L2 norm of latent vectors, to ensure the
+            # resulting generated images are realistic.
+            opt_latent = nmf.vis_traditional(
+                transforms=[],
+                l2_coeff=l2_coeff,
+                l2_layer_name="generator_network_latent_vec",
+            )
+            # Now, we put the latent vector thru the generator to produce transition
+            # tensors that we can get observations, actions, etc out of
+            opt_latent = np.mean(opt_latent, axis=(1, 2))
+            opt_latent_th = th.from_numpy(opt_latent).to(th.device(device))
+            opt_transitions = gan.generator(opt_latent_th)
+            obs, acts, next_obs = tensor_to_transition(opt_transitions)
 
         # Set of images, one for each feature, add each to plot
         for feature_i in range(next_obs.shape[0]):
-            sub_img = next_obs[feature_i]
+            sub_img_obs = obs[feature_i].detach().cpu().numpy()
+            sub_img_next_obs = next_obs[feature_i].detach().cpu().numpy()
             plot_img(
                 columns,
                 custom_logger,
                 feature_i,
                 fig,
-                sub_img,
+                (sub_img_obs, sub_img_next_obs),
                 pyplot,
                 rows,
                 vis_scale,
                 wandb_logging,
             )
+            if img_save_path is not None:
+                obs_PIL = array_to_image(sub_img_obs, vis_scale)
+                obs_PIL.save(img_save_path + f"{feature_i}_obs.png")
+                next_obs_PIL = array_to_image(sub_img_next_obs, vis_scale)
+                next_obs_PIL.save(img_save_path + f"{feature_i}_next_obs.png")
+                custom_logger.log(
+                    f"Saved feature {feature_i} viz in dir {img_save_path}."
+                )
+
     elif vis_type == "dataset":
         for feature_i in range(num_features):
             custom_logger.log(f"Feature {feature_i}")
@@ -213,51 +262,93 @@ def interpret(
                 vis_scale,
                 wandb_logging,
             )
-    else:
-        raise ValueError(f"Unknown vis_type: {vis_type}.")
 
     if pyplot:
         plt.show()
     custom_logger.log("Done with dataset visualization.")
 
 
+def array_to_image(arr: np.ndarray, scale: int) -> Image:
+    """Take numpy array on [0,1] scale, return PIL image."""
+    return Image.fromarray(np.uint8(arr * 255), mode="RGB").resize(
+        size=(arr.shape[0] * scale, arr.shape[1] * scale),
+        resample=Image.NEAREST,
+    )
+
+
 def plot_img(
     columns: int,
     custom_logger: HierarchicalLogger,
     feature_i: int,
     fig: Optional[matplotlib.figure.Figure],
-    img: np.ndarray,
+    img: Union[Tuple[np.ndarray, np.ndarray], np.ndarray],
     pyplot: bool,
     rows: int,
     vis_scale: int,
     wandb_logging: bool,
 ):
-    """Plot the passed image to pyplot and wandb as appropriate."""
+    """Plot the passed image(s) to pyplot and wandb as appropriate."""
     _wandb_log(custom_logger, feature_i, img, vis_scale, wandb_logging)
-    if fig is not None and pyplot:
-        fig.add_subplot(rows, columns, feature_i + 1)
-        plt.imshow(img)
+    if pyplot:
+        if isinstance(img, tuple):
+            img_obs = img[0]
+            img_next_obs = img[1]
+            fig.add_subplot(rows, columns, 2 * feature_i + 1)
+            plt.imshow(img_obs)
+            fig.add_subplot(rows, columns, 2 * feature_i + 2)
+            plt.imshow(img_next_obs)
+        else:
+            fig.add_subplot(rows, columns, feature_i + 1)
+            plt.imshow(img)
 
 
 def _wandb_log(
     custom_logger: HierarchicalLogger,
     feature_i: int,
-    img: np.ndarray,
+    img: Union[Tuple[np.ndarray, np.ndarray], np.ndarray],
     vis_scale: int,
     wandb_logging: bool,
 ):
     """Plot to wandb if wandb logging is enabled."""
     if wandb_logging:
-        p_img = Image.fromarray(np.uint8(img * 255), mode="RGB").resize(
-            size=(img.shape[0] * vis_scale, img.shape[1] * vis_scale),
-            resample=Image.NEAREST,
-        )
-        wb_img = wandb.Image(p_img, caption=f"Feature {feature_i}")
-        custom_logger.record(f"feature_{feature_i}", wb_img)
+        if isinstance(img, tuple):
+            img_obs = img[0]
+            img_next_obs = img[1]
+            # TODO(df): check if I have to dump between these
+            _wandb_log_(img_obs, vis_scale, feature_i, "obs", custom_logger)
+            _wandb_log_(img_next_obs, vis_scale, feature_i, "next_obs", custom_logger)
+        else:
+            _wandb_log_(img, vis_scale, feature_i, "dataset_vis", custom_logger)
+
         # Can't re-use steps unfortunately, so each feature img gets its own step.
         custom_logger.dump(step=feature_i)
 
 
+def _wandb_log_(
+    arr: np.ndarray,
+    scale: int,
+    feature: int,
+    img_type: str,
+    logger: HierarchicalLogger,
+) -> None:
+    """Log visualized np.ndarray to wandb using given logger.
+
+    Args:
+        - arr: array to turn into image, save.
+        - scale: ratio by which to scale up the image.
+        - feature: which number feature is being visualized.
+        - img_type: "obs" or "next_obs"
+        - logger: logger to use.
+    """
+    if img_type not in ["obs", "next_obs"]:
+        err_str = f"img_type should be 'obs' or 'next_obs', but instead is {img_type}"
+        raise ValueError(err_str)
+
+    pil_img = array_to_image(arr, scale)
+    wb_img = wandb.Image(pil_img, caption=f"Feature {feature}, {img_type}")
+    logger.record(f"feature_{feature}_{img_type}", wb_img)
+
+
 def main():
     observer = FileStorageObserver(osp.join("output", "sacred", "interpret"))
     interpret_ex.observers.append(observer)
diff --git a/src/reward_preprocessing/scripts/visualize_my_samples.py b/src/reward_preprocessing/scripts/visualize_my_samples.py
@@ -0,0 +1,19 @@
+import torch as th
+
+from reward_preprocessing.common import utils
+
+GAN_TIMESTAMP = "20221104_163134"
+MODEL_NUMBER = "13720"
+
+if __name__ == "__main__":
+    gan_path = (
+        "/nas/ucb/daniel/gan_test_data_"
+        + GAN_TIMESTAMP
+        + "/models/model_"
+        + MODEL_NUMBER
+        + ".torch"
+    )
+    device = "cuda" if th.cuda.is_available() else "cpu"
+    gan = th.load(gan_path, map_location=th.device(device))
+    samples, _ = gan.get_training_results()
+    utils.visualize_samples(samples.detach().cpu().numpy(), gan.folder)
diff --git a/src/reward_preprocessing/vis/reward_vis.py b/src/reward_preprocessing/vis/reward_vis.py