Add helper script

PavelCz · PavelCz · commit 22c2faadf364 · 2022-10-29T17:33:11.000+02:00
diff --git a/README.md b/README.md
@@ -94,7 +94,7 @@ python -m reward_preprocessing.interpret print_config
     - `policies`: RL policies for training experts with train_rl.
     - `preprocessing` Reward preprocessing / reward shaping code.
     - `scripts`: All scripts that are not the main scripts of the projects. Helpers and scripts that produce artifacts that are used by the main script. Everything here should either be an executable file or a config for one.
-       - `helpers`: Helper scripts that are bash executables.
+       - `helpers`: Helper scripts that are bash executables or python scripts that are not full sacred experiments.
     - `trainers`: Our additions to the suite of reward learning algorithms available in imitation. Currently this contains the trainer for training reward nets with supervised learning.
     - `vis`: Visualization code for interpreting reward functions.
     - `interpret.py`: The main script that provides the functionality for this project.
diff --git a/src/reward_preprocessing/scripts/helpers/fix_trajectories.py b/src/reward_preprocessing/scripts/helpers/fix_trajectories.py
@@ -0,0 +1,48 @@
+"""Fix saved trajectory format from that one time that I saved them wrong."""
+import numpy as np
+
+path = "/home/pavel/out/interpret/expert-rollouts/procgen-gm/005/fixed-coin_1000.2k.npz"
+data = np.load(path, allow_pickle=True)
+
+# Observations need to be fixed
+observations = data["obs"]
+
+indices = data["indices"]
+traj_list = []
+for i in range(len(indices)):
+    if i == 0:
+        start = 0
+    else:
+        start = indices[i - 1]
+    end = indices[i]
+    # + 1 because we also want to include the last next_obs
+    obs = observations[start : end + 1]
+    traj_list.append(obs)
+# Also add the last trajectory
+traj_list.append(observations[indices[-1] :])
+
+# Concatenate them together, duplicates and all
+new_observations = np.concatenate(traj_list, axis=0)
+
+# Sanity check
+assert (
+    np.cumsum([len(traj) - 1 for traj in traj_list[:-1]]) == np.array(indices)
+).all()
+
+new_dict = {
+    "obs": new_observations,
+    "acts": data["acts"],
+    "infos": data["infos"],
+    "terminal": data["terminal"],
+    "rews": data["rews"],
+    "indices": data["indices"],
+}
+
+# Update path name
+split = path.split(".")
+split[-2] += "_fixed"
+save_path = ".".join(split)
+
+# Save fixed data
+with open(save_path, "wb") as f:
+    np.savez_compressed(f, **new_dict)