Skip to content

Commit ebb451c

Browse files
committed
param docs
1 parent 7916afc commit ebb451c

File tree

1 file changed

+86
-60
lines changed

1 file changed

+86
-60
lines changed

vmas/scenarios/football.py

Lines changed: 86 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -28,80 +28,49 @@
2828

2929

3030
class Scenario(BaseScenario):
31-
def make_world(self, batch_dim: int, device: torch.device, **kwargs):
32-
self.init_params(**kwargs)
33-
world = self.init_world(batch_dim, device)
34-
self.init_agents(world)
35-
self.init_ball(world)
36-
self.init_background(world)
37-
self.init_walls(world)
38-
self.init_goals(world)
39-
self.init_traj_pts(world)
40-
self.left_goal_pos = torch.tensor(
41-
[-self.pitch_length / 2 - self.ball_size / 2, 0],
42-
device=device,
43-
dtype=torch.float,
44-
)
45-
self.right_goal_pos = -self.left_goal_pos
46-
self._done = torch.zeros(batch_dim, device=device, dtype=torch.bool)
47-
self._sparse_reward_blue = torch.zeros(
48-
batch_dim, device=device, dtype=torch.float32
49-
)
50-
self._sparse_reward_red = self._sparse_reward_blue.clone()
51-
self._render_field = True
52-
self.min_agent_dist_to_ball_blue = None
53-
self.min_agent_dist_to_ball_red = None
54-
55-
self._reset_agent_range = torch.tensor(
56-
[self.pitch_length / 2, self.pitch_width],
57-
device=device,
58-
)
59-
self._reset_agent_offset_blue = torch.tensor(
60-
[-self.pitch_length / 2 + self.agent_size, -self.pitch_width / 2],
61-
device=device,
62-
)
63-
self._reset_agent_offset_red = torch.tensor(
64-
[-self.agent_size, -self.pitch_width / 2], device=device
65-
)
66-
self._agents_rel_pos_to_ball = None
67-
return world
68-
69-
def reset_world_at(self, env_index: int = None):
70-
self.reset_agents(env_index)
71-
self.reset_ball(env_index)
72-
self.reset_walls(env_index)
73-
self.reset_goals(env_index)
74-
self.reset_controllers(env_index)
75-
if env_index is None:
76-
self._done[:] = False
77-
else:
78-
self._done[env_index] = False
79-
8031
def init_params(self, **kwargs):
8132
# Scenario config
8233
self.viewer_size = kwargs.pop("viewer_size", (1200, 800))
8334

8435
# Agents config
8536
self.n_blue_agents = kwargs.pop("n_blue_agents", 3)
8637
self.n_red_agents = kwargs.pop("n_red_agents", 3)
38+
# What agents should be learning and what controlled by the heuristic (ai)
8739
self.ai_red_agents = kwargs.pop("ai_red_agents", True)
8840
self.ai_blue_agents = kwargs.pop("ai_blue_agents", False)
41+
42+
# When you have 5 blue agents there is the options of introducing physical differences with the following roles:
43+
# 1 goalkeeper -> slow and big
44+
# 2 defenders -> normal size and speed (agent_size, u_multiplier, max_speed)
45+
# 2 attackers -> small and fast
8946
self.physically_different = kwargs.pop("physically_different", False)
9047

9148
# Agent spawning
9249
self.spawn_in_formation = kwargs.pop("spawn_in_formation", False)
93-
self.only_blue_formation = kwargs.pop("only_blue_formation", True)
50+
self.only_blue_formation = kwargs.pop(
51+
"only_blue_formation", True
52+
) # Only spawn blue agents in formation
9453
self.formation_agents_per_column = kwargs.pop("formation_agents_per_column", 2)
9554
self.randomise_formation_indices = kwargs.pop(
9655
"randomise_formation_indices", False
97-
)
98-
self.formation_noise = kwargs.pop("formation_noise", 0.2)
56+
) # If False, each agent will always be in the same formation spot
57+
self.formation_noise = kwargs.pop(
58+
"formation_noise", 0.2
59+
) # Noise on formation positions
9960

10061
# Ai config
101-
self.n_traj_points = kwargs.pop("n_traj_points", 0)
102-
self.ai_speed_strength = kwargs.pop("ai_strength", 1.0)
103-
self.ai_decision_strength = kwargs.pop("ai_decision_strength", 1.0)
104-
self.ai_precision_strength = kwargs.pop("ai_precision_strength", 1.0)
62+
self.n_traj_points = kwargs.pop(
63+
"n_traj_points", 0
64+
) # Number of spline trajectory points to plot for heuristic (ai) agents
65+
self.ai_speed_strength = kwargs.pop(
66+
"ai_strength", 1.0
67+
) # The speed of the ai 0<=x<=1
68+
self.ai_decision_strength = kwargs.pop(
69+
"ai_decision_strength", 1.0
70+
) # The decision strength of the ai 0<=x<=1
71+
self.ai_precision_strength = kwargs.pop(
72+
"ai_precision_strength", 1.0
73+
) # The precision strength of the ai 0<=x<=1
10574
self.disable_ai_red = kwargs.pop("disable_ai_red", False)
10675

10776
# Task sizes
@@ -117,7 +86,9 @@ def init_params(self, **kwargs):
11786
self.u_multiplier = kwargs.pop("u_multiplier", 0.1)
11887

11988
# Actions shooting
120-
self.enable_shooting = kwargs.pop("enable_shooting", False)
89+
self.enable_shooting = kwargs.pop(
90+
"enable_shooting", False
91+
) # Whether to enable an extra 2 actions (for rotation and shooting). Only avaioable for non-ai agents
12192
self.u_rot_multiplier = kwargs.pop("u_rot_multiplier", 0.0003)
12293
self.u_shoot_multiplier = kwargs.pop("u_shoot_multiplier", 0.6)
12394
self.shooting_radius = kwargs.pop("shooting_radius", 0.08)
@@ -131,12 +102,16 @@ def init_params(self, **kwargs):
131102
self.dense_reward = kwargs.pop("dense_reward", True)
132103
self.pos_shaping_factor_ball_goal = kwargs.pop(
133104
"pos_shaping_factor_ball_goal", 10.0
134-
)
105+
) # Reward for moving the ball towards the opponents' goal. This can be annealed in a curriculum.
135106
self.pos_shaping_factor_agent_ball = kwargs.pop(
136107
"pos_shaping_factor_agent_ball", 0.1
137-
)
108+
) # Reward for moving the closest agent to the ball in a team closer to it.
109+
# This is useful for exploration and can be annealed in a curriculum.
110+
# This reward does not trigger if the agent is less than distance_to_ball_trigger from the ball or the ball is moving
138111
self.distance_to_ball_trigger = kwargs.pop("distance_to_ball_trigger", 0.4)
139-
self.scoring_reward = kwargs.pop("scoring_reward", 100.0)
112+
self.scoring_reward = kwargs.pop(
113+
"scoring_reward", 100.0
114+
) # Discrete reward for scoring
140115

141116
# Observations
142117
self.observe_teammates = kwargs.pop("observe_teammates", True)
@@ -150,6 +125,57 @@ def init_params(self, **kwargs):
150125
)
151126
ScenarioUtils.check_kwargs_consumed(kwargs)
152127

128+
def make_world(self, batch_dim: int, device: torch.device, **kwargs):
129+
self.init_params(**kwargs)
130+
world = self.init_world(batch_dim, device)
131+
self.init_agents(world)
132+
self.init_ball(world)
133+
self.init_background(world)
134+
self.init_walls(world)
135+
self.init_goals(world)
136+
self.init_traj_pts(world)
137+
138+
# Cached values
139+
self.left_goal_pos = torch.tensor(
140+
[-self.pitch_length / 2 - self.ball_size / 2, 0],
141+
device=device,
142+
dtype=torch.float,
143+
)
144+
self.right_goal_pos = -self.left_goal_pos
145+
self._done = torch.zeros(batch_dim, device=device, dtype=torch.bool)
146+
self._sparse_reward_blue = torch.zeros(
147+
batch_dim, device=device, dtype=torch.float32
148+
)
149+
self._sparse_reward_red = self._sparse_reward_blue.clone()
150+
self._render_field = True
151+
self.min_agent_dist_to_ball_blue = None
152+
self.min_agent_dist_to_ball_red = None
153+
154+
self._reset_agent_range = torch.tensor(
155+
[self.pitch_length / 2, self.pitch_width],
156+
device=device,
157+
)
158+
self._reset_agent_offset_blue = torch.tensor(
159+
[-self.pitch_length / 2 + self.agent_size, -self.pitch_width / 2],
160+
device=device,
161+
)
162+
self._reset_agent_offset_red = torch.tensor(
163+
[-self.agent_size, -self.pitch_width / 2], device=device
164+
)
165+
self._agents_rel_pos_to_ball = None
166+
return world
167+
168+
def reset_world_at(self, env_index: int = None):
169+
self.reset_agents(env_index)
170+
self.reset_ball(env_index)
171+
self.reset_walls(env_index)
172+
self.reset_goals(env_index)
173+
self.reset_controllers(env_index)
174+
if env_index is None:
175+
self._done[:] = False
176+
else:
177+
self._done[env_index] = False
178+
153179
def init_world(self, batch_dim: int, device: torch.device):
154180
# Make world
155181
world = World(

0 commit comments

Comments
 (0)