Merge pull request #10 from HumanCompatibleAI/add_generator_minimal

dfilan · web-flow · commit 161dfa529826 · 2022-11-01T20:21:06.000+01:00
Add generator (minimal)
diff --git a/requirements.txt b/requirements.txt
@@ -18,3 +18,5 @@ git+https://github.com/openai/gym3.git@4c38246
 procgen @ git+https://github.com/JacobPfau/procgenAISC.git@7821f2c00b
 # Revert to this older version because some library won't work otherwise
 protobuf==3.19
+git+https://github.com/dfilan/vegans.git@76a3c45
+Pillow==9.2.0
diff --git a/src/reward_preprocessing/generative_modelling/gen_models.py b/src/reward_preprocessing/generative_modelling/gen_models.py
@@ -0,0 +1,155 @@
+import torch as th
+import torch.nn as nn
+
+# TODO(df): add test on initialization that shit has the right shape?
+
+
+class Small21To84Generator(nn.Module):
+    """
+    Small generative model that takes 21 x 21 noise to an 84 x 84 image.
+    """
+
+    def __init__(self, latent_shape, data_shape):
+        super(Small21To84Generator, self).__init__()
+        self.hidden_part = nn.Sequential(
+            nn.Conv2d(latent_shape[0], 32, kernel_size=3, padding=1),
+            nn.LeakyReLU(0.1),
+            nn.ConvTranspose2d(32, 32, kernel_size=4, padding=1, stride=2),
+            nn.LeakyReLU(0.1),
+            nn.Conv2d(32, 32, kernel_size=3, padding=1),
+            nn.LeakyReLU(0.1),
+            nn.ConvTranspose2d(32, 32, kernel_size=4, padding=1, stride=2),
+            nn.ReLU(),
+        )
+        self.output = nn.Conv2d(32, data_shape[0], kernel_size=3, padding=1)
+
+    def forward(self, x):
+        x = self.hidden_part(x)
+        x = self.output(x)
+        return x
+
+
+class SmallFourTo64Generator(nn.Module):
+    """
+    Small generative model that takes 4 x 4 noise to a 64 x 64 image.
+
+    Of use for generative modelling of procgen rollouts.
+    """
+
+    def __init__(self, latent_shape, data_shape):
+        super(SmallFourTo64Generator, self).__init__()
+        self.hidden_part = nn.Sequential(
+            nn.ConvTranspose2d(latent_shape[0], 32, kernel_size=4, padding=1, stride=2),
+            # now 8x8
+            nn.LeakyReLU(0.1),
+            nn.ConvTranspose2d(32, 32, kernel_size=4, padding=1, stride=2),
+            # now 16x16
+            nn.LeakyReLU(0.1),
+            nn.ConvTranspose2d(32, 32, kernel_size=4, padding=1, stride=2),
+            # now 32x32
+            nn.LeakyReLU(0.1),
+            nn.ConvTranspose2d(32, 32, kernel_size=4, padding=1, stride=2),
+            # now 64x64
+            nn.LeakyReLU(0.1),
+        )
+        self.output = nn.Conv2d(32, data_shape[0], kernel_size=3, padding=1)
+
+    def forward(self, x):
+        x = self.hidden_part(x)
+        x = self.output(x)
+        return x
+
+
+class DCGanFourTo64Generator(nn.Module):
+    """
+    DCGAN-based generative model that takes a 1-D latent vector to a 64x64 image.
+
+    Of use for generative modelling of procgen rollouts.
+    """
+
+    def __init__(self, latent_shape, data_shape):
+        super(DCGanFourTo64Generator, self).__init__()
+        self.project = nn.Linear(latent_shape[0], 1024 * 4 * 4)
+        self.conv_body = nn.Sequential(
+            nn.BatchNorm2d(1024),
+            nn.ConvTranspose2d(1024, 512, kernel_size=4, padding=1, stride=2),
+            # now 8x8
+            nn.LeakyReLU(0.1),
+            nn.BatchNorm2d(512),
+            nn.ConvTranspose2d(512, 256, kernel_size=4, padding=1, stride=2),
+            # now 16x16
+            nn.LeakyReLU(0.1),
+            nn.BatchNorm2d(256),
+            nn.ConvTranspose2d(256, 128, kernel_size=4, padding=1, stride=2),
+            # now 32x32
+            nn.LeakyReLU(0.1),
+            nn.ConvTranspose2d(128, data_shape[0], kernel_size=4, padding=1, stride=2),
+            # now 64x64
+            nn.LeakyReLU(0.1),
+        )
+
+    def forward(self, x):
+        batch_size = x.shape[0]
+        x = self.project(x)
+        x = th.reshape(x, (batch_size, 1024, 4, 4))
+        x = nn.functional.leaky_relu(x, negative_slope=0.1)
+        x = self.conv_body(x)
+        return x
+
+
+class SmallWassersteinCritic(nn.Module):
+    """
+    Small critic for use in the Wasserstein GAN algorithm.
+    """
+
+    def __init__(self, data_shape):
+        super(SmallWassersteinCritic, self).__init__()
+        self.hidden_part = nn.Sequential(
+            nn.Conv2d(data_shape[0], 32, kernel_size=3, padding=1),
+            nn.LeakyReLU(0.1),
+            nn.Conv2d(32, 32, kernel_size=3, padding=1),
+            nn.LeakyReLU(0.1),
+            nn.Conv2d(32, 32, kernel_size=3, padding=1),
+            nn.LeakyReLU(0.1),
+            nn.AdaptiveAvgPool2d(1),
+            nn.Flatten(),
+            nn.Linear(32, 1),
+        )
+        self.output = nn.Identity()
+
+    def forward(self, x):
+        x = self.hidden_part(x)
+        x = self.output(x)
+        return x
+
+
+class DCGanWassersteinCritic(nn.Module):
+    """
+    Wasserstein-GAN critic based off the DCGAN architecture.
+    """
+
+    def __init__(self, data_shape):
+        super(DCGanWassersteinCritic, self).__init__()
+        self.network = nn.Sequential(
+            nn.Conv2d(data_shape[0], 128, kernel_size=4, padding=1, stride=2),
+            # now 32 x 32
+            nn.LeakyReLU(0.1),
+            nn.Conv2d(128, 256, kernel_size=4, padding=1, stride=2),
+            # now 16 x 16
+            nn.LeakyReLU(0.1),
+            nn.LayerNorm([256, 16, 16]),
+            nn.Conv2d(256, 512, kernel_size=4, padding=1, stride=2),
+            # now 8 x 8
+            nn.LeakyReLU(0.1),
+            nn.LayerNorm([512, 8, 8]),
+            nn.Conv2d(512, 1024, kernel_size=4, padding=1, stride=2),
+            # now 4 x 4
+            nn.LeakyReLU(0.1),
+            nn.LayerNorm([1024, 4, 4]),
+            nn.AdaptiveAvgPool2d(1),
+            nn.Flatten(),
+            nn.Linear(1024, 1),
+        )
+
+    def forward(self, x):
+        return self.network(x)
diff --git a/src/reward_preprocessing/interpret.py b/src/reward_preprocessing/interpret.py
@@ -50,7 +50,7 @@ def interpret(
             sacred ingredient 'common' in imitation.scripts.common.
         reward_path: Path to the learned supervised reward net.
         rollout_path:
-            Rollouts to use vor dataset visualization, dimensionality
+            Rollouts to use for dataset visualization, dimensionality
             reduction, and determining the shape of the features.
         limit_num_obs:
             Limit how many of the transitions from `rollout_path` are used for
diff --git a/src/reward_preprocessing/scripts/config/train_gan.py b/src/reward_preprocessing/scripts/config/train_gan.py
@@ -0,0 +1,43 @@
+"""Configuration settings for train_gan, training a generative model of transitions."""
+
+import sacred
+import vegans.GAN
+
+from reward_preprocessing.generative_modelling import gen_models
+
+train_gan_ex = sacred.Experiment("train_gan")
+
+
+@train_gan_ex.config
+def train_gan_defaults():
+    generator_class = gen_models.Small21To84Generator
+    discriminator_class = gen_models.SmallWassersteinCritic
+    gan_algorithm = vegans.GAN.WassersteinGAN
+    optim_kwargs = {
+        "Generator": {"lr": 5e-4},
+        "Adversary": {"lr": 1e-4},
+    }  # keyword arguments for GAN optimizer
+    num_training_epochs = 50
+    batch_size = 256  # batch size for transition dataloader
+    latent_shape = [3, 21, 21]  # shape of latent vector input to generator
+    locals()  # make flake8 happy
+
+
+@train_gan_ex.named_config
+def procgen():
+    generator_class = gen_models.DCGanFourTo64Generator
+    discriminator_class = gen_models.DCGanWassersteinCritic
+    gan_algorithm = vegans.GAN.WassersteinGANGP
+    optim_kwargs = {
+        "Generator": {"lr": 1e-4, "betas": (0.0, 0.9)},
+        "Adversary": {"lr": 1e-4, "betas": (0.0, 0.9), "weight_decay": 1e-3},
+    }
+    num_training_epochs = 10
+    batch_size = 128
+    latent_shape = [100]
+    print_every = "0.1e"
+    save_losses_every = "0.1e"
+    save_model_every = "1e"
+    num_acts = 15
+    device = "cuda"
+    locals()
diff --git a/src/reward_preprocessing/scripts/train_gan.py b/src/reward_preprocessing/scripts/train_gan.py
@@ -0,0 +1,115 @@
+"""Train a generative model of transitions.
+
+For use in reward function feature visualization.
+"""
+
+import torch as th
+from sacred.observers import FileStorageObserver
+
+from reward_preprocessing.common import utils
+from reward_preprocessing.scripts.config.train_gan import train_gan_ex
+
+# TODO: write script to use this in feature viz.
+
+
+@train_gan_ex.main
+def train_gan(
+    generator_class,
+    discriminator_class,
+    gan_algorithm,
+    optim_kwargs,
+    rollouts_paths,
+    num_acts,
+    num_training_epochs,
+    batch_size,
+    latent_shape,
+    gan_save_path,
+    device="cpu",
+    ngpu=None,
+    optimizer=th.optim.Adam,
+    adv_steps=5,
+    print_every="1e",
+    save_losses_every="0.25e",
+    save_model_every="1e",
+):
+    """Train a GAN on a set of transitions.
+
+    Assumes that observations are image-shaped and actions are discrete.
+
+    Args:
+        generator_class: Upon initialization, takes a shape for the latent
+            space and the shape of the transition tensors. Instantiates a
+            network that takes latent vectors and returns transition tensors.
+        discriminator_class: Upon initialization, takes a shape for the
+            transition tensors. Instantiates a network that takes a
+            transition tensor and gives it a realism score.
+        gan_algorithm: A GAN training algorithm imported from `vegans`.
+        optim_kwargs: A dictionary of keyword arguments for the generator and
+            adversary networks.
+        rollouts_paths: Path of rollouts saved by `imitation`, or list of paths.
+        num_acts: Number of actions in the training environment.
+        num_training_epochs: How many epochs to train the GAN for.
+        batch_size: Number of transitions per batch to be trained on.
+        latent_shape: Shape of the latent tensor to be fed into the generator
+            network. Should be in (c,h,w) format.
+        gan_save_path: Directory in which to save GAN training details.
+        device: "cpu" or "cuda", depending on what you're training on.
+        ngpu: Number of GPUs to train on, if training on GPUs.
+        optimizer: torch.optim. Optimizer to train GAN with.
+        adv_steps: Number of steps to train the adversary for for each step the
+            generator is trained for.
+        print_every: String specifying how many epochs should elapse between
+            successive printings of training information.
+        save_losses_every: String specifying how many epochs should elapse
+            between successive savings of loss information.
+        save_model_every: String specifying how many epochs should elapse
+            between successive savings of the model.
+    """
+    # create data loader of transitions
+    transitions_loader = utils.rollouts_to_dataloader(
+        rollouts_paths, num_acts, batch_size
+    )
+    # define gan
+    transitions_batch = next(iter(transitions_loader))
+    trans_shape = list(transitions_batch.shape)[1:]
+    generator = generator_class(latent_shape, trans_shape)
+    discriminator = discriminator_class(trans_shape)
+    gan = gan_algorithm(
+        generator,
+        discriminator,
+        z_dim=latent_shape,
+        x_dim=trans_shape,
+        optim=optimizer,
+        optim_kwargs=optim_kwargs,
+        folder=gan_save_path,
+        device=device,
+        ngpu=ngpu,
+    )
+    # print out summary
+    gan.summary()
+    # fit gan
+    steps = {"Adversary": adv_steps}
+    gan.fit(
+        transitions_loader,
+        batch_size=batch_size,
+        print_every=print_every,
+        save_losses_every=save_losses_every,
+        save_model_every=save_model_every,
+        epochs=num_training_epochs,
+        steps=steps,
+    )
+    # save samples, return losses, save plot of losses
+    samples, losses = gan.get_training_results()
+    utils.save_loss_plots(losses, gan.folder)
+    utils.visualize_samples(samples, num_acts, gan.folder)
+    return losses
+
+
+def main_console():
+    observer = FileStorageObserver("train_gan")
+    train_gan_ex.observers.append(observer)
+    train_gan_ex.run_commandline()
+
+
+if __name__ == "__main__":  # pragma: no cover
+    main_console()