Merge pull request #32 from HumanCompatibleAI/print_dataset_vis_rewards

PavelCz · web-flow · commit fd1ee850bfe7 · 2023-01-11T20:23:18.000+01:00
Print out rewards of dataset visualized things
diff --git a/src/reward_preprocessing/common/utils.py b/src/reward_preprocessing/common/utils.py
@@ -155,7 +155,7 @@ def tensor_to_transition(
     trans_tens: th.Tensor,
 ) -> Tuple[th.Tensor, th.Tensor, th.Tensor]:
     """Turn a generated 'transition tensor' batch into a batch of bona fide
-    transitions. Output observations will have channel dim last, activations will be
+    transitions. Output observations will have channel dim last, actions will be
     output as one-hot vectors.
     Assumes input transition tensor has values between 0 and 1.
     """
@@ -213,16 +213,6 @@ def forward(self, transition_tensor: th.Tensor) -> th.Tensor:
         # tensor_to_transition expects.
         obs, act, next_obs = tensor_to_transition(transition_tensor)
 
-        # TODO: Remove this once this becomes superfluous.
-        if self.rew_net.normalize_images:
-            # Imitation reward nets have this flag which basically decides whether
-            # observations will be divided by 255 (before being passed to the conv
-            # layers). If this flag is set they expect images to be between 0 and 255.
-            # The interpret and lucent code provides images between 0 and 1, so we
-            # scale up.
-            obs = obs * 255
-            next_obs = next_obs * 255
-
         dones = th.zeros_like(obs[:, 0])
         return self.rew_net(state=obs, action=act, next_state=next_obs, done=dones)
 
diff --git a/src/reward_preprocessing/interpret.py b/src/reward_preprocessing/interpret.py
@@ -169,7 +169,7 @@ def interpret(
 
     device = "cuda" if th.cuda.is_available() else "cpu"
 
-    # Load reward not pytorch module
+    # Load reward net pytorch module
     rew_net = th.load(str(reward_path), map_location=th.device(device))
 
     if gan_path is None:
@@ -228,7 +228,7 @@ def interpret(
         # input samples are used for dim reduction (if features is not
         # None) and for determining the shape of the features.
         model_inputs_preprocess=inputs,
-        activation_fn="sigmoid",
+        activation_fn="relu",
     )
 
     # If these are equal, then of course there is no actual reduction.
@@ -245,7 +245,6 @@ def interpret(
         if gan_path is None:
             # List of transforms
             transforms = _determine_transforms(reg)
-
             # This does the actual interpretability, i.e. it calculates the
             # visualizations.
             opt_transitions = nmf.vis_traditional(transforms=transforms)
@@ -384,9 +383,24 @@ def param_f():
             custom_logger.log(f"Feature {feature_i}")
 
             dataset_thumbnails, indices = nmf.vis_dataset_thumbnail(
-                feature=feature_i, num_mult=4, expand_mult=1
+                feature=feature_i,
+                num_mult=4,
+                expand_mult=1,
             )
 
+            if nmf.reducer is None:
+                # print out rewards
+                flat_indices = []
+                for index_list in indices:
+                    flat_indices += index_list
+                obses, _, next_obses = tensor_to_transition(inputs[flat_indices])
+                feature_i_rep = th.Tensor([feature_i] * len(flat_indices)).long()
+                action_i_tens = th.nn.functional.one_hot(
+                    feature_i_rep, num_classes=num_features
+                ).to(device)
+                rewards = rew_net(obses, action_i_tens, next_obses, done=None)
+                custom_logger.log(f"Rewards for feature {feature_i}: {rewards}")
+
             # remove opacity channel from dataset thumbnails
             np_trans_tens = dataset_thumbnails[:-1, :, :]
 
diff --git a/src/reward_preprocessing/procgen.py b/src/reward_preprocessing/procgen.py
@@ -66,7 +66,7 @@ class ProcgenFinalObsWrapper(gym.Wrapper):
     """Returns the final observation of gym3 procgen environment, correcting for the
     fact that Procgen gym environments return the second-to-last observation again
     instead of the final observation.
-    
+
     Only works correctly when the 'done' signal coincides with the end of an episode
     (which is not the case when using e.g. the seals AutoResetWrapper).
     Requires the use of the PavelCz/procgenAISC fork, which adds the 'final_obs' value.
diff --git a/src/reward_preprocessing/vis/reward_vis.py b/src/reward_preprocessing/vis/reward_vis.py
@@ -184,6 +184,9 @@ def __init__(
         # Apply activation function if specified.
         if activation_fn == "sigmoid":
             activations = th.sigmoid(activations)
+        elif activation_fn == "relu":
+            relu_func = th.nn.ReLU()
+            activations = relu_func(activations)
         elif activation_fn is not None:
             raise ValueError(f"Unsupported activation_fn: {activation_fn}")
 
@@ -286,12 +289,14 @@ def vis_traditional(
                 for feature in feature_list
             ]
         )
+
         if l2_coeff != 0.0:
             if l2_layer_name is None:
                 raise ValueError(
                     "l2_layer_name must be specified if l2_coeff is non-zero"
                 )
             obj -= l2_objective(l2_layer_name, l2_coeff)
+
         input_shape = tuple(self.model_inputs_preprocess.shape[1:])
 
         if param_f is None:
@@ -302,6 +307,7 @@ def param_f():
                     h=input_shape[1],
                     w=input_shape[2],
                     batch=len(feature_list),
+                    sd=1,
                 )
 
         logging.info(f"Performing vis_traditional with transforms: {transforms}")
@@ -472,13 +478,14 @@ def vis_dataset_thumbnail(
         pos_indices = argmax_nd(
             acts_feature, axes=[1, 2], max_rep=max_rep, max_rep_strict=True
         )
-        # The actual maximum values of the activations, accroding to max_rep setting.
+        # The actual maximum values of the activations, according to max_rep setting.
         acts_single = acts_feature[
             range(acts_feature.shape[0]), pos_indices[0], pos_indices[1]
         ]
         # Sort the activations in descending order and take the num_mult**2 strongest
         # activations.
         obs_indices = np.argsort(-acts_single, axis=0)[: num_mult**2]
+
         # Coordinates of the strongest activation in each observation.
         coords = np.array(list(zip(*pos_indices)), dtype=[("h", int), ("w", int)])[
             obs_indices