Merge pull request #33 from boettiger-lab/env_customizer

abigailkeller · web-flow · commit f68c3a939aaf · 2026-01-13T09:09:58.000-08:00
update 2 action environment
diff --git a/.gitignore b/.gitignore
@@ -140,3 +140,4 @@ venv/
 manuscript/*.pdf
 saved_agents/
 
+notebooks/test.ipynb
diff --git a/src/rl4greencrab/envs/README.md b/src/rl4greencrab/envs/README.md
@@ -0,0 +1,78 @@
+# twoActEnv (Gymnasium) — Green Crab IPM Environment
+
+This dir contains a custom **Gymnasium** environment (`twoActEnv`) for simulating and controlling a green crab population with **two trapping actions**. For RL training, you can use normalized environment `TwoActNormalized`
+
+`twoActEnv` env_id = "twoactenv"
+
+`TwoActNormalized` env_id = "twoactenvnorm"
+
+---
+
+## Features
+
+- **Gymnasium-compatible** `Env` with `reset()` and `step()`
+- **Continuous 2D action space**: traps/effort per month for two actions
+- **Size-structured population dynamics** (21 size bins by default)
+- **Seasonal loop**: months advance from March (`curr_month=3`) through November, then recruitment + overwinter effects
+- **Multiple observation modes** via `observation_type`:
+  - `count`: number of crab caught, continous
+  - `biomass`: total biomass the crab caught, continous
+  - `size`: size distribution of the crab caught, continous
+  - `time`: current month, discrete
+  - `count-biomass-time`
+  - `count-time`
+  - `count-biomass`
+  - `biomass-time`
+  - `size-time`
+- Optional **reproducibility controls** with separate RNG streams:
+  - main environment RNG
+  - migration-only RNG (`seed_migration`)
+- Optional **curriculum learning** behavior that changes initial adult population range over training progress
+- Action smoothness penalty: discourages large within-year variance in actions (applied at month 11)
+
+---
+
+## Use Case: Training an RL Policy for Green Crab Control
+
+This environment can be used to train a reinforcement learning agent that learns
+monthly trapping effort for controlling invasive green crab populations.
+
+Below is an example using **PPO** from Stable-Baselines3 with vectorized
+environments and a normalized wrapper.
+
+### Example: PPO Training with `TwoActNormalized`
+
+```python
+from stable_baselines3 import PPO
+from stable_baselines3.common.env_util import make_vec_env
+from rl4greencrab import TwoActNormalized
+
+# Environment configuration
+config = {
+    "w_mort_scale" : 600,
+    "growth_k": 0.70,
+    'random_start':True,
+    'var_penalty_const': 0,
+    'observation_type': 'size-time'
+}
+
+# Optional: single environment (useful for debugging)
+env = TwoActNormalized(config)
+
+# Vectorized environments for efficient PPO training
+vec_env = make_vec_env(TwoActNormalized, n_envs=12, env_kwargs={"config": config},)
+
+# PPO with MultiInputPolicy for Dict observations
+model = PPO(
+    "MultiInputPolicy",
+    vec_env,
+    verbose=0,
+    tensorboard_log="/home/jovyan/logs",
+)
+
+# Train the agent
+model.learn(
+    total_timesteps=1_000,
+    progress_bar=True,
+)
+```
diff --git a/src/rl4greencrab/envs/twoAction_cutomize.py b/src/rl4greencrab/envs/twoAction_cutomize.py
@@ -237,6 +237,16 @@ def get_observations_space(self):
                 ), 
                 "months": spaces.Discrete(12, start=1)
             })
+        elif self.observation_type == 'count-time':
+            return spaces.Dict({
+               "crabs": spaces.Box(
+                    low=np.array([0, 0]),  # Lower bounds: original obs (0)
+                    high=np.array([self.max_obs]),  # Upper bounds: obs max,
+                    shape=(2,),
+                    dtype=np.float32
+                ), 
+                "months": spaces.Discrete(12, start=1)
+            })
         elif self.observation_type == 'size-time':
             return  spaces.Dict({
                    "crabs": spaces.Box(
@@ -273,26 +283,36 @@ def get_observations_space(self):
                 ),
                 "months": spaces.Discrete(12, start=1)
             })
+            
     def initial_observation(self):
         if self.observation_type == 'count-biomass-time':
             return {"crabs": np.array([0, 0], dtype=np.float32), "months": self.curr_month}
+        if self.observation_type == 'count-time':
+            return {"crabs": np.array([0], dtype=np.float32), "months": self.curr_month}
         if self.observation_type == 'size-time':
             return {"crabs":  np.zeros(self.nsize, dtype=np.float32), "months":  self.curr_month}
+        if self.observation_type == 'biomass-time':
+            return {"crabs": np.array([0], dtype=np.float32), "months": self.curr_month}
         if self.observation_type == 'size':
             return {"crabs":  np.zeros(self.nsize, dtype=np.float32)}
         if self.observation_type == 'count-biomass':
             return {"crabs": np.array([0, 0], dtype=np.float32)}
+
         
     
     def update_observation(self, crab_counts, mean_biomass, removed):
         if self.observation_type == 'count-biomass-time':
             return {"crabs": np.array([crab_counts, mean_biomass], dtype=np.float32), "months": self.curr_month}
+        if self.observation_type == 'count-time':
+            return {"crabs": np.array([crab_counts], dtype=np.float32), "months": self.curr_month}
         if self.observation_type == 'size-time':
             return {"crabs": np.array(removed[:,0], dtype=np.float32), "months": self.curr_month}
         if self.observation_type == 'size':
             return {"crabs": np.array(removed[:,0], dtype=np.float32)}
         if self.observation_type == 'count-biomass':
             return {"crabs": np.array([crab_counts, mean_biomass], dtype=np.float32)}
+        if self.observation_type == 'biomass-time':
+            return {"crabs": np.array([mean_biomass], dtype=np.float32), "months": self.curr_month}
         
     # calculate progress value for curriculum training
     def get_curriculum_progress(self):
diff --git a/src/rl4greencrab/envs/twoAction_norm.py b/src/rl4greencrab/envs/twoAction_norm.py
@@ -56,6 +56,24 @@ def get_observations_space(self):
                 ), 
                 "months": spaces.Discrete(12, start=1)
             })
+        elif self.observation_type == 'count-time':
+            return spaces.Dict({
+                "crabs": spaces.Box(
+                    low=np.array([-1]),  # Lower bounds: original obs (0), month (1)
+                    high=np.array([1]),  # Upper bounds: obs max, month max (12)
+                    dtype=np.float32
+                ), 
+                "months": spaces.Discrete(12, start=1)
+            })
+        elif self.observation_type == 'biomass-time':
+            return spaces.Dict({
+                "crabs": spaces.Box(
+                    low=np.array([-1]),  # Lower bounds: original obs (0), month (1)
+                    high=np.array([1]),  # Upper bounds: obs max, month max (12)
+                    dtype=np.float32
+                ), 
+                "months": spaces.Discrete(12, start=1)
+            })
         elif self.observation_type == 'size-time':
             return  spaces.Dict({
             "crabs": spaces.Box(
@@ -86,6 +104,10 @@ def get_observations_space(self):
     def update_observation_norm(self, normalized_cpue, normal_biomass):
         if self.observation_type == 'count-biomass-time':
             return {"crabs": np.array([normalized_cpue[0], normal_biomass], dtype=np.float32), "months": self.curr_month}
+        if self.observation_type == 'count-time':
+            return {"crabs": np.array([normalized_cpue[0]], dtype=np.float32), "months": self.curr_month}
+        if self.observation_type == 'biomass-time':
+            return {"crabs": np.array([normal_biomass], dtype=np.float32), "months": self.curr_month}
         if self.observation_type == 'size-time':
             return {"crabs": normalized_cpue,  "months": self.curr_month}
         if self.observation_type == 'size':

Original file line number	Diff line number	Diff line change
`@@ -140,3 +140,4 @@ venv/`
`140`	`140`	`manuscript/*.pdf`
`141`	`141`	`saved_agents/`
`142`	`142`
	`143`	`+notebooks/test.ipynb`