Skip to content

Commit f68c3a9

Browse files
Merge pull request #33 from boettiger-lab/env_customizer
update 2 action environment
2 parents 5e37d86 + fa62892 commit f68c3a9

File tree

4 files changed

+121
-0
lines changed

4 files changed

+121
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,4 @@ venv/
140140
manuscript/*.pdf
141141
saved_agents/
142142

143+
notebooks/test.ipynb

src/rl4greencrab/envs/README.md

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# twoActEnv (Gymnasium) — Green Crab IPM Environment
2+
3+
This dir contains a custom **Gymnasium** environment (`twoActEnv`) for simulating and controlling a green crab population with **two trapping actions**. For RL training, you can use normalized environment `TwoActNormalized`
4+
5+
`twoActEnv` env_id = "twoactenv"
6+
7+
`TwoActNormalized` env_id = "twoactenvnorm"
8+
9+
---
10+
11+
## Features
12+
13+
- **Gymnasium-compatible** `Env` with `reset()` and `step()`
14+
- **Continuous 2D action space**: traps/effort per month for two actions
15+
- **Size-structured population dynamics** (21 size bins by default)
16+
- **Seasonal loop**: months advance from March (`curr_month=3`) through November, then recruitment + overwinter effects
17+
- **Multiple observation modes** via `observation_type`:
18+
- `count`: number of crab caught, continous
19+
- `biomass`: total biomass the crab caught, continous
20+
- `size`: size distribution of the crab caught, continous
21+
- `time`: current month, discrete
22+
- `count-biomass-time`
23+
- `count-time`
24+
- `count-biomass`
25+
- `biomass-time`
26+
- `size-time`
27+
- Optional **reproducibility controls** with separate RNG streams:
28+
- main environment RNG
29+
- migration-only RNG (`seed_migration`)
30+
- Optional **curriculum learning** behavior that changes initial adult population range over training progress
31+
- Action smoothness penalty: discourages large within-year variance in actions (applied at month 11)
32+
33+
---
34+
35+
## Use Case: Training an RL Policy for Green Crab Control
36+
37+
This environment can be used to train a reinforcement learning agent that learns
38+
monthly trapping effort for controlling invasive green crab populations.
39+
40+
Below is an example using **PPO** from Stable-Baselines3 with vectorized
41+
environments and a normalized wrapper.
42+
43+
### Example: PPO Training with `TwoActNormalized`
44+
45+
```python
46+
from stable_baselines3 import PPO
47+
from stable_baselines3.common.env_util import make_vec_env
48+
from rl4greencrab import TwoActNormalized
49+
50+
# Environment configuration
51+
config = {
52+
"w_mort_scale" : 600,
53+
"growth_k": 0.70,
54+
'random_start':True,
55+
'var_penalty_const': 0,
56+
'observation_type': 'size-time'
57+
}
58+
59+
# Optional: single environment (useful for debugging)
60+
env = TwoActNormalized(config)
61+
62+
# Vectorized environments for efficient PPO training
63+
vec_env = make_vec_env(TwoActNormalized, n_envs=12, env_kwargs={"config": config},)
64+
65+
# PPO with MultiInputPolicy for Dict observations
66+
model = PPO(
67+
"MultiInputPolicy",
68+
vec_env,
69+
verbose=0,
70+
tensorboard_log="/home/jovyan/logs",
71+
)
72+
73+
# Train the agent
74+
model.learn(
75+
total_timesteps=1_000,
76+
progress_bar=True,
77+
)
78+
```

src/rl4greencrab/envs/twoAction_cutomize.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,16 @@ def get_observations_space(self):
237237
),
238238
"months": spaces.Discrete(12, start=1)
239239
})
240+
elif self.observation_type == 'count-time':
241+
return spaces.Dict({
242+
"crabs": spaces.Box(
243+
low=np.array([0, 0]), # Lower bounds: original obs (0)
244+
high=np.array([self.max_obs]), # Upper bounds: obs max,
245+
shape=(2,),
246+
dtype=np.float32
247+
),
248+
"months": spaces.Discrete(12, start=1)
249+
})
240250
elif self.observation_type == 'size-time':
241251
return spaces.Dict({
242252
"crabs": spaces.Box(
@@ -273,26 +283,36 @@ def get_observations_space(self):
273283
),
274284
"months": spaces.Discrete(12, start=1)
275285
})
286+
276287
def initial_observation(self):
277288
if self.observation_type == 'count-biomass-time':
278289
return {"crabs": np.array([0, 0], dtype=np.float32), "months": self.curr_month}
290+
if self.observation_type == 'count-time':
291+
return {"crabs": np.array([0], dtype=np.float32), "months": self.curr_month}
279292
if self.observation_type == 'size-time':
280293
return {"crabs": np.zeros(self.nsize, dtype=np.float32), "months": self.curr_month}
294+
if self.observation_type == 'biomass-time':
295+
return {"crabs": np.array([0], dtype=np.float32), "months": self.curr_month}
281296
if self.observation_type == 'size':
282297
return {"crabs": np.zeros(self.nsize, dtype=np.float32)}
283298
if self.observation_type == 'count-biomass':
284299
return {"crabs": np.array([0, 0], dtype=np.float32)}
300+
285301

286302

287303
def update_observation(self, crab_counts, mean_biomass, removed):
288304
if self.observation_type == 'count-biomass-time':
289305
return {"crabs": np.array([crab_counts, mean_biomass], dtype=np.float32), "months": self.curr_month}
306+
if self.observation_type == 'count-time':
307+
return {"crabs": np.array([crab_counts], dtype=np.float32), "months": self.curr_month}
290308
if self.observation_type == 'size-time':
291309
return {"crabs": np.array(removed[:,0], dtype=np.float32), "months": self.curr_month}
292310
if self.observation_type == 'size':
293311
return {"crabs": np.array(removed[:,0], dtype=np.float32)}
294312
if self.observation_type == 'count-biomass':
295313
return {"crabs": np.array([crab_counts, mean_biomass], dtype=np.float32)}
314+
if self.observation_type == 'biomass-time':
315+
return {"crabs": np.array([mean_biomass], dtype=np.float32), "months": self.curr_month}
296316

297317
# calculate progress value for curriculum training
298318
def get_curriculum_progress(self):

src/rl4greencrab/envs/twoAction_norm.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,24 @@ def get_observations_space(self):
5656
),
5757
"months": spaces.Discrete(12, start=1)
5858
})
59+
elif self.observation_type == 'count-time':
60+
return spaces.Dict({
61+
"crabs": spaces.Box(
62+
low=np.array([-1]), # Lower bounds: original obs (0), month (1)
63+
high=np.array([1]), # Upper bounds: obs max, month max (12)
64+
dtype=np.float32
65+
),
66+
"months": spaces.Discrete(12, start=1)
67+
})
68+
elif self.observation_type == 'biomass-time':
69+
return spaces.Dict({
70+
"crabs": spaces.Box(
71+
low=np.array([-1]), # Lower bounds: original obs (0), month (1)
72+
high=np.array([1]), # Upper bounds: obs max, month max (12)
73+
dtype=np.float32
74+
),
75+
"months": spaces.Discrete(12, start=1)
76+
})
5977
elif self.observation_type == 'size-time':
6078
return spaces.Dict({
6179
"crabs": spaces.Box(
@@ -86,6 +104,10 @@ def get_observations_space(self):
86104
def update_observation_norm(self, normalized_cpue, normal_biomass):
87105
if self.observation_type == 'count-biomass-time':
88106
return {"crabs": np.array([normalized_cpue[0], normal_biomass], dtype=np.float32), "months": self.curr_month}
107+
if self.observation_type == 'count-time':
108+
return {"crabs": np.array([normalized_cpue[0]], dtype=np.float32), "months": self.curr_month}
109+
if self.observation_type == 'biomass-time':
110+
return {"crabs": np.array([normal_biomass], dtype=np.float32), "months": self.curr_month}
89111
if self.observation_type == 'size-time':
90112
return {"crabs": normalized_cpue, "months": self.curr_month}
91113
if self.observation_type == 'size':

0 commit comments

Comments
 (0)