28
28
29
29
30
30
class Scenario (BaseScenario ):
31
- def make_world (self , batch_dim : int , device : torch .device , ** kwargs ):
32
- self .init_params (** kwargs )
33
- world = self .init_world (batch_dim , device )
34
- self .init_agents (world )
35
- self .init_ball (world )
36
- self .init_background (world )
37
- self .init_walls (world )
38
- self .init_goals (world )
39
- self .init_traj_pts (world )
40
- self .left_goal_pos = torch .tensor (
41
- [- self .pitch_length / 2 - self .ball_size / 2 , 0 ],
42
- device = device ,
43
- dtype = torch .float ,
44
- )
45
- self .right_goal_pos = - self .left_goal_pos
46
- self ._done = torch .zeros (batch_dim , device = device , dtype = torch .bool )
47
- self ._sparse_reward_blue = torch .zeros (
48
- batch_dim , device = device , dtype = torch .float32
49
- )
50
- self ._sparse_reward_red = self ._sparse_reward_blue .clone ()
51
- self ._render_field = True
52
- self .min_agent_dist_to_ball_blue = None
53
- self .min_agent_dist_to_ball_red = None
54
-
55
- self ._reset_agent_range = torch .tensor (
56
- [self .pitch_length / 2 , self .pitch_width ],
57
- device = device ,
58
- )
59
- self ._reset_agent_offset_blue = torch .tensor (
60
- [- self .pitch_length / 2 + self .agent_size , - self .pitch_width / 2 ],
61
- device = device ,
62
- )
63
- self ._reset_agent_offset_red = torch .tensor (
64
- [- self .agent_size , - self .pitch_width / 2 ], device = device
65
- )
66
- self ._agents_rel_pos_to_ball = None
67
- return world
68
-
69
- def reset_world_at (self , env_index : int = None ):
70
- self .reset_agents (env_index )
71
- self .reset_ball (env_index )
72
- self .reset_walls (env_index )
73
- self .reset_goals (env_index )
74
- self .reset_controllers (env_index )
75
- if env_index is None :
76
- self ._done [:] = False
77
- else :
78
- self ._done [env_index ] = False
79
-
80
31
def init_params (self , ** kwargs ):
81
32
# Scenario config
82
33
self .viewer_size = kwargs .pop ("viewer_size" , (1200 , 800 ))
83
34
84
35
# Agents config
85
36
self .n_blue_agents = kwargs .pop ("n_blue_agents" , 3 )
86
37
self .n_red_agents = kwargs .pop ("n_red_agents" , 3 )
38
+ # What agents should be learning and what controlled by the heuristic (ai)
87
39
self .ai_red_agents = kwargs .pop ("ai_red_agents" , True )
88
40
self .ai_blue_agents = kwargs .pop ("ai_blue_agents" , False )
41
+
42
+ # When you have 5 blue agents there is the options of introducing physical differences with the following roles:
43
+ # 1 goalkeeper -> slow and big
44
+ # 2 defenders -> normal size and speed (agent_size, u_multiplier, max_speed)
45
+ # 2 attackers -> small and fast
89
46
self .physically_different = kwargs .pop ("physically_different" , False )
90
47
91
48
# Agent spawning
92
49
self .spawn_in_formation = kwargs .pop ("spawn_in_formation" , False )
93
- self .only_blue_formation = kwargs .pop ("only_blue_formation" , True )
50
+ self .only_blue_formation = kwargs .pop (
51
+ "only_blue_formation" , True
52
+ ) # Only spawn blue agents in formation
94
53
self .formation_agents_per_column = kwargs .pop ("formation_agents_per_column" , 2 )
95
54
self .randomise_formation_indices = kwargs .pop (
96
55
"randomise_formation_indices" , False
97
- )
98
- self .formation_noise = kwargs .pop ("formation_noise" , 0.2 )
56
+ ) # If False, each agent will always be in the same formation spot
57
+ self .formation_noise = kwargs .pop (
58
+ "formation_noise" , 0.2
59
+ ) # Noise on formation positions
99
60
100
61
# Ai config
101
- self .n_traj_points = kwargs .pop ("n_traj_points" , 0 )
102
- self .ai_speed_strength = kwargs .pop ("ai_strength" , 1.0 )
103
- self .ai_decision_strength = kwargs .pop ("ai_decision_strength" , 1.0 )
104
- self .ai_precision_strength = kwargs .pop ("ai_precision_strength" , 1.0 )
62
+ self .n_traj_points = kwargs .pop (
63
+ "n_traj_points" , 0
64
+ ) # Number of spline trajectory points to plot for heuristic (ai) agents
65
+ self .ai_speed_strength = kwargs .pop (
66
+ "ai_strength" , 1.0
67
+ ) # The speed of the ai 0<=x<=1
68
+ self .ai_decision_strength = kwargs .pop (
69
+ "ai_decision_strength" , 1.0
70
+ ) # The decision strength of the ai 0<=x<=1
71
+ self .ai_precision_strength = kwargs .pop (
72
+ "ai_precision_strength" , 1.0
73
+ ) # The precision strength of the ai 0<=x<=1
105
74
self .disable_ai_red = kwargs .pop ("disable_ai_red" , False )
106
75
107
76
# Task sizes
@@ -117,7 +86,9 @@ def init_params(self, **kwargs):
117
86
self .u_multiplier = kwargs .pop ("u_multiplier" , 0.1 )
118
87
119
88
# Actions shooting
120
- self .enable_shooting = kwargs .pop ("enable_shooting" , False )
89
+ self .enable_shooting = kwargs .pop (
90
+ "enable_shooting" , False
91
+ ) # Whether to enable an extra 2 actions (for rotation and shooting). Only avaioable for non-ai agents
121
92
self .u_rot_multiplier = kwargs .pop ("u_rot_multiplier" , 0.0003 )
122
93
self .u_shoot_multiplier = kwargs .pop ("u_shoot_multiplier" , 0.6 )
123
94
self .shooting_radius = kwargs .pop ("shooting_radius" , 0.08 )
@@ -131,12 +102,16 @@ def init_params(self, **kwargs):
131
102
self .dense_reward = kwargs .pop ("dense_reward" , True )
132
103
self .pos_shaping_factor_ball_goal = kwargs .pop (
133
104
"pos_shaping_factor_ball_goal" , 10.0
134
- )
105
+ ) # Reward for moving the ball towards the opponents' goal. This can be annealed in a curriculum.
135
106
self .pos_shaping_factor_agent_ball = kwargs .pop (
136
107
"pos_shaping_factor_agent_ball" , 0.1
137
- )
108
+ ) # Reward for moving the closest agent to the ball in a team closer to it.
109
+ # This is useful for exploration and can be annealed in a curriculum.
110
+ # This reward does not trigger if the agent is less than distance_to_ball_trigger from the ball or the ball is moving
138
111
self .distance_to_ball_trigger = kwargs .pop ("distance_to_ball_trigger" , 0.4 )
139
- self .scoring_reward = kwargs .pop ("scoring_reward" , 100.0 )
112
+ self .scoring_reward = kwargs .pop (
113
+ "scoring_reward" , 100.0
114
+ ) # Discrete reward for scoring
140
115
141
116
# Observations
142
117
self .observe_teammates = kwargs .pop ("observe_teammates" , True )
@@ -150,6 +125,57 @@ def init_params(self, **kwargs):
150
125
)
151
126
ScenarioUtils .check_kwargs_consumed (kwargs )
152
127
128
+ def make_world (self , batch_dim : int , device : torch .device , ** kwargs ):
129
+ self .init_params (** kwargs )
130
+ world = self .init_world (batch_dim , device )
131
+ self .init_agents (world )
132
+ self .init_ball (world )
133
+ self .init_background (world )
134
+ self .init_walls (world )
135
+ self .init_goals (world )
136
+ self .init_traj_pts (world )
137
+
138
+ # Cached values
139
+ self .left_goal_pos = torch .tensor (
140
+ [- self .pitch_length / 2 - self .ball_size / 2 , 0 ],
141
+ device = device ,
142
+ dtype = torch .float ,
143
+ )
144
+ self .right_goal_pos = - self .left_goal_pos
145
+ self ._done = torch .zeros (batch_dim , device = device , dtype = torch .bool )
146
+ self ._sparse_reward_blue = torch .zeros (
147
+ batch_dim , device = device , dtype = torch .float32
148
+ )
149
+ self ._sparse_reward_red = self ._sparse_reward_blue .clone ()
150
+ self ._render_field = True
151
+ self .min_agent_dist_to_ball_blue = None
152
+ self .min_agent_dist_to_ball_red = None
153
+
154
+ self ._reset_agent_range = torch .tensor (
155
+ [self .pitch_length / 2 , self .pitch_width ],
156
+ device = device ,
157
+ )
158
+ self ._reset_agent_offset_blue = torch .tensor (
159
+ [- self .pitch_length / 2 + self .agent_size , - self .pitch_width / 2 ],
160
+ device = device ,
161
+ )
162
+ self ._reset_agent_offset_red = torch .tensor (
163
+ [- self .agent_size , - self .pitch_width / 2 ], device = device
164
+ )
165
+ self ._agents_rel_pos_to_ball = None
166
+ return world
167
+
168
+ def reset_world_at (self , env_index : int = None ):
169
+ self .reset_agents (env_index )
170
+ self .reset_ball (env_index )
171
+ self .reset_walls (env_index )
172
+ self .reset_goals (env_index )
173
+ self .reset_controllers (env_index )
174
+ if env_index is None :
175
+ self ._done [:] = False
176
+ else :
177
+ self ._done [env_index ] = False
178
+
153
179
def init_world (self , batch_dim : int , device : torch .device ):
154
180
# Make world
155
181
world = World (
0 commit comments