Skip to content

Commit d2769df

Browse files
MrSyeewhikwon
authored andcommitted
Convert code to python 2.7 (#35)
* Convert code format to python2.7 (SAC) * Convert code format python2.7 (TD3, all fD) * Remove no use import and black setting * Change SAC param * Change env name Reacher-v2 to v1 * Remove old version reacher training script * Convert code format python2.7 * Modify .travis.yml * Add install command python3.6 & black on Makefile * Fix seperator to tab on Makefile * Modify Makefile * Fix little error * Change td3 gamma parameter
1 parent 81a9d86 commit d2769df

39 files changed

+154
-286
lines changed

.travis.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
dist: xenial
22

33
language: python
4+
45
python:
5-
- "3.6"
6+
- "2.7"
67

78
install:
89
- make dep

Makefile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
test:
2-
pytest --flake8 # --cov=algorithms
2+
env PYTHONPATH=./scripts pytest --flake8 # --cov=algorithms
33

44
format:
5-
black .
65
isort -y
6+
python3.6 -m black -t py27 .
77

88
dev:
99
pip install -r scripts/requirements-dev.txt
10+
sudo add-apt-repository -y ppa:deadsnakes/ppa
11+
sudo apt-get update
12+
sudo apt-get install -y python3.6
13+
sudo apt-get install -y python3-pip
14+
python3.6 -m pip install black
1015
pre-commit install
1116

1217
dep:

__init__.py

Whitespace-only changes.

scripts/__init__.py

Whitespace-only changes.

scripts/algorithms/__init__.py

Whitespace-only changes.

scripts/algorithms/common/__init__.py

Whitespace-only changes.

scripts/algorithms/common/abstract/__init__.py

Whitespace-only changes.

scripts/algorithms/common/abstract/agent.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,16 @@
55
66
"""
77

8-
import argparse
98
import os
109
import subprocess
11-
from abc import ABC, abstractmethod
12-
from typing import Tuple
10+
from abc import ABCMeta, abstractmethod
1311

1412
import gym
1513
import numpy as np
1614
import torch
1715

1816

19-
class AbstractAgent(ABC):
17+
class AbstractAgent:
2018
"""Abstract Agent used for all agents.
2119
2220
Attributes:
@@ -27,7 +25,9 @@ class AbstractAgent(ABC):
2725
2826
"""
2927

30-
def __init__(self, env: gym.Env, args: argparse.Namespace):
28+
__metaclass__ = ABCMeta
29+
30+
def __init__(self, env, args):
3131
"""Initialization.
3232
3333
Args:
@@ -52,11 +52,11 @@ def __init__(self, env: gym.Env, args: argparse.Namespace):
5252
)
5353

5454
@abstractmethod
55-
def select_action(self, state: np.ndarray):
55+
def select_action(self, state):
5656
pass
5757

5858
@abstractmethod
59-
def step(self, action: torch.Tensor) -> Tuple[np.ndarray, np.float64, bool]:
59+
def step(self, action):
6060
pass
6161

6262
@abstractmethod
@@ -68,7 +68,7 @@ def load_params(self, *args):
6868
pass
6969

7070
@abstractmethod
71-
def save_params(self, params: dict, n_episode: int):
71+
def save_params(self, params, n_episode):
7272
if not os.path.exists("./save"):
7373
os.mkdir("./save")
7474

@@ -77,7 +77,7 @@ def save_params(self, params: dict, n_episode: int):
7777
path = os.path.join("./save/" + save_name + "_ep_" + str(n_episode) + ".pt")
7878
torch.save(params, path)
7979

80-
print("[INFO] Saved the model and optimizer to", path)
80+
print ("[INFO] Saved the model and optimizer to", path)
8181

8282
@abstractmethod
8383
def write_log(self, *args):
@@ -106,7 +106,7 @@ def test(self):
106106
score += reward
107107
step += 1
108108

109-
print(
109+
print (
110110
"[INFO] episode %d\tstep: %d\ttotal score: %d"
111111
% (i_episode, step, score)
112112
)
@@ -118,7 +118,7 @@ def test(self):
118118
class NormalizedActions(gym.ActionWrapper):
119119
"""Rescale and relocate the actions."""
120120

121-
def action(self, action: np.ndarray) -> np.ndarray:
121+
def action(self, action):
122122
"""Change the range (-1, 1) to (low, high)."""
123123
low = self.action_space.low
124124
high = self.action_space.high
@@ -131,7 +131,7 @@ def action(self, action: np.ndarray) -> np.ndarray:
131131

132132
return action
133133

134-
def reverse_action(self, action: np.ndarray) -> np.ndarray:
134+
def reverse_action(self, action):
135135
"""Change the range (low, high) to (-1, 1)."""
136136
low = self.action_space.low
137137
high = self.action_space.high

scripts/algorithms/common/buffer/__init__.py

Whitespace-only changes.

scripts/algorithms/common/buffer/priortized_replay_buffer.py

Lines changed: 16 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
"""
99

1010
import random
11-
from typing import Tuple
1211

1312
import numpy as np
1413
import torch
@@ -35,7 +34,7 @@ class PrioritizedReplayBuffer(ReplayBuffer):
3534
3635
"""
3736

38-
def __init__(self, buffer_size: int, batch_size: int, alpha: float = 0.6):
37+
def __init__(self, buffer_size, batch_size, alpha=0.6):
3938
"""Initialization.
4039
4140
Args:
@@ -59,27 +58,22 @@ def __init__(self, buffer_size: int, batch_size: int, alpha: float = 0.6):
5958
self.min_tree = MinSegmentTree(tree_capacity)
6059
self._max_priority = 1.0
6160

62-
def add(
63-
self,
64-
state: np.ndarray,
65-
action: np.ndarray,
66-
reward: np.float64,
67-
next_state: np.ndarray,
68-
done: bool,
69-
):
61+
def add(self, state, action, reward, next_state, done):
7062
"""Add experience and priority."""
7163
idx = self.tree_idx
7264
self.tree_idx = (self.tree_idx + 1) % self.buffer_size
73-
super().add(state, action, reward, next_state, done)
65+
super(PrioritizedReplayBuffer, self).add(
66+
state, action, reward, next_state, done
67+
)
7468

7569
self.sum_tree[idx] = self._max_priority ** self.alpha
7670
self.min_tree[idx] = self._max_priority ** self.alpha
7771

78-
def extend(self, transitions: list):
72+
def extend(self, transitions):
7973
"""Add experiences to memory."""
8074
raise NotImplementedError
8175

82-
def _sample_proportional(self, batch_size: int) -> list:
76+
def _sample_proportional(self, batch_size):
8377
"""Sample indices based on proportional."""
8478
indices = []
8579
p_total = self.sum_tree.sum(0, len(self.buffer) - 1)
@@ -92,7 +86,7 @@ def _sample_proportional(self, batch_size: int) -> list:
9286
indices.append(idx)
9387
return indices
9488

95-
def sample(self, beta: float = 0.4) -> Tuple[torch.Tensor, ...]:
89+
def sample(self, beta=0.4):
9690
"""Sample a batch of experiences."""
9791
assert beta > 0
9892

@@ -127,7 +121,7 @@ def sample(self, beta: float = 0.4) -> Tuple[torch.Tensor, ...]:
127121

128122
return experiences
129123

130-
def update_priorities(self, indices: list, priorities: np.ndarray):
124+
def update_priorities(self, indices, priorities):
131125
"""Update priorities of sampled transitions."""
132126
assert len(indices) == len(priorities)
133127

@@ -153,14 +147,7 @@ class PrioritizedReplayBufferfD(PrioritizedReplayBuffer):
153147
epsilon_d (float) : epsilon_d parameter to update priority using demo
154148
"""
155149

156-
def __init__(
157-
self,
158-
buffer_size: int,
159-
batch_size: int,
160-
demo: list,
161-
alpha: float = 0.6,
162-
epsilon_d: float = 1.0,
163-
):
150+
def __init__(self, buffer_size, batch_size, demo, alpha=0.6, epsilon_d=1.0):
164151
"""Initialization.
165152
Args:
166153
buffer_size (int): size of replay buffer for experience
@@ -181,30 +168,25 @@ def __init__(
181168
self.min_tree[self.tree_idx] = self._max_priority ** self.alpha
182169
self.tree_idx += 1
183170

184-
def add(
185-
self,
186-
state: np.ndarray,
187-
action: np.ndarray,
188-
reward: np.float64,
189-
next_state: np.ndarray,
190-
done: bool,
191-
):
171+
def add(self, state, action, reward, next_state, done):
192172
"""Add experience and priority."""
193173
idx = self.tree_idx
194174
# buffer is full
195175
if (self.tree_idx + 1) % (self.buffer_size + self.demo_size) == 0:
196176
self.tree_idx = self.demo_size
197177
else:
198178
self.tree_idx = self.tree_idx + 1
199-
super().add(state, action, reward, next_state, done)
179+
super(PrioritizedReplayBuffer, self).add(
180+
state, action, reward, next_state, done
181+
)
200182

201183
self.sum_tree[idx] = self._max_priority ** self.alpha
202184
self.min_tree[idx] = self._max_priority ** self.alpha
203185

204186
# update current total size
205187
self.total_size = self.demo_size + len(self.buffer)
206188

207-
def sample(self, beta: float = 0.4) -> Tuple[torch.Tensor, ...]:
189+
def sample(self, beta=0.4):
208190
"""Sample a batch of experiences."""
209191
assert beta > 0
210192

@@ -266,7 +248,7 @@ def sample(self, beta: float = 0.4) -> Tuple[torch.Tensor, ...]:
266248

267249
return experiences
268250

269-
def update_priorities(self, indices: list, priorities: np.ndarray):
251+
def update_priorities(self, indices, priorities):
270252
"""Update priorities of sampled transitions."""
271253
assert len(indices) == len(priorities)
272254

0 commit comments

Comments
 (0)