kairproject
diff --git a/‎.travis.yml
Lines changed: 2 additions & 1 deletion b/‎.travis.yml
Lines changed: 2 additions & 1 deletion
diff --git a/‎Makefile
Lines changed: 7 additions & 2 deletions b/‎Makefile
Lines changed: 7 additions & 2 deletions
diff --git a/‎__init__.py b/‎__init__.py
diff --git a/‎scripts/__init__.py b/‎scripts/__init__.py
diff --git a/‎scripts/algorithms/__init__.py b/‎scripts/algorithms/__init__.py
diff --git a/‎scripts/algorithms/common/__init__.py b/‎scripts/algorithms/common/__init__.py
diff --git a/‎scripts/algorithms/common/abstract/__init__.py b/‎scripts/algorithms/common/abstract/__init__.py
diff --git a/‎scripts/algorithms/common/abstract/agent.py
Lines changed: 12 additions & 12 deletions b/‎scripts/algorithms/common/abstract/agent.py
Lines changed: 12 additions & 12 deletions
diff --git a/‎scripts/algorithms/common/buffer/__init__.py b/‎scripts/algorithms/common/buffer/__init__.py
diff --git a/‎scripts/algorithms/common/buffer/priortized_replay_buffer.py
Lines changed: 16 additions & 34 deletions b/‎scripts/algorithms/common/buffer/priortized_replay_buffer.py
Lines changed: 16 additions & 34 deletions
@@ -1,8 +1,9 @@
 dist: xenial
 
 language: python
+
 python:
-    - "3.6"
+    - "2.7"
 
 install:
     - make dep
 
@@ -1,12 +1,17 @@
 test:
-	pytest --flake8  # --cov=algorithms
+	env PYTHONPATH=./scripts pytest --flake8  # --cov=algorithms
 
 format:
-	black .
 	isort -y
+	python3.6 -m black -t py27 .
 
 dev:
 	pip install -r scripts/requirements-dev.txt
+	sudo add-apt-repository -y ppa:deadsnakes/ppa
+	sudo apt-get update
+	sudo apt-get install -y python3.6
+	sudo apt-get install -y python3-pip
+	python3.6 -m pip install black
 	pre-commit install
 
 dep:
 
@@ -5,18 +5,16 @@
 - Contact: [email protected]
 """
 
-import argparse
 import os
 import subprocess
-from abc import ABC, abstractmethod
-from typing import Tuple
+from abc import ABCMeta, abstractmethod
 
 import gym
 import numpy as np
 import torch
 
 
-class AbstractAgent(ABC):
+class AbstractAgent:
     """Abstract Agent used for all agents.
 
     Attributes:
@@ -27,7 +25,9 @@ class AbstractAgent(ABC):
 
     """
 
-    def __init__(self, env: gym.Env, args: argparse.Namespace):
+    __metaclass__ = ABCMeta
+
+    def __init__(self, env, args):
         """Initialization.
 
         Args:
@@ -52,11 +52,11 @@ def __init__(self, env: gym.Env, args: argparse.Namespace):
         )
 
     @abstractmethod
-    def select_action(self, state: np.ndarray):
+    def select_action(self, state):
         pass
 
     @abstractmethod
-    def step(self, action: torch.Tensor) -> Tuple[np.ndarray, np.float64, bool]:
+    def step(self, action):
         pass
 
     @abstractmethod
@@ -68,7 +68,7 @@ def load_params(self, *args):
         pass
 
     @abstractmethod
-    def save_params(self, params: dict, n_episode: int):
+    def save_params(self, params, n_episode):
         if not os.path.exists("./save"):
             os.mkdir("./save")
 
@@ -77,7 +77,7 @@ def save_params(self, params: dict, n_episode: int):
         path = os.path.join("./save/" + save_name + "_ep_" + str(n_episode) + ".pt")
         torch.save(params, path)
 
-        print("[INFO] Saved the model and optimizer to", path)
+        print ("[INFO] Saved the model and optimizer to", path)
 
     @abstractmethod
     def write_log(self, *args):
@@ -106,7 +106,7 @@ def test(self):
                 score += reward
                 step += 1
 
-            print(
+            print (
                 "[INFO] episode %d\tstep: %d\ttotal score: %d"
                 % (i_episode, step, score)
             )
@@ -118,7 +118,7 @@ def test(self):
 class NormalizedActions(gym.ActionWrapper):
     """Rescale and relocate the actions."""
 
-    def action(self, action: np.ndarray) -> np.ndarray:
+    def action(self, action):
         """Change the range (-1, 1) to (low, high)."""
         low = self.action_space.low
         high = self.action_space.high
@@ -131,7 +131,7 @@ def action(self, action: np.ndarray) -> np.ndarray:
 
         return action
 
-    def reverse_action(self, action: np.ndarray) -> np.ndarray:
+    def reverse_action(self, action):
         """Change the range (low, high) to (-1, 1)."""
         low = self.action_space.low
         high = self.action_space.high
 
@@ -8,7 +8,6 @@
 """
 
 import random
-from typing import Tuple
 
 import numpy as np
 import torch
@@ -35,7 +34,7 @@ class PrioritizedReplayBuffer(ReplayBuffer):
 
         """
 
-    def __init__(self, buffer_size: int, batch_size: int, alpha: float = 0.6):
+    def __init__(self, buffer_size, batch_size, alpha=0.6):
         """Initialization.
 
         Args:
@@ -59,27 +58,22 @@ def __init__(self, buffer_size: int, batch_size: int, alpha: float = 0.6):
         self.min_tree = MinSegmentTree(tree_capacity)
         self._max_priority = 1.0
 
-    def add(
-        self,
-        state: np.ndarray,
-        action: np.ndarray,
-        reward: np.float64,
-        next_state: np.ndarray,
-        done: bool,
-    ):
+    def add(self, state, action, reward, next_state, done):
         """Add experience and priority."""
         idx = self.tree_idx
         self.tree_idx = (self.tree_idx + 1) % self.buffer_size
-        super().add(state, action, reward, next_state, done)
+        super(PrioritizedReplayBuffer, self).add(
+            state, action, reward, next_state, done
+        )
 
         self.sum_tree[idx] = self._max_priority ** self.alpha
         self.min_tree[idx] = self._max_priority ** self.alpha
 
-    def extend(self, transitions: list):
+    def extend(self, transitions):
         """Add experiences to memory."""
         raise NotImplementedError
 
-    def _sample_proportional(self, batch_size: int) -> list:
+    def _sample_proportional(self, batch_size):
         """Sample indices based on proportional."""
         indices = []
         p_total = self.sum_tree.sum(0, len(self.buffer) - 1)
@@ -92,7 +86,7 @@ def _sample_proportional(self, batch_size: int) -> list:
             indices.append(idx)
         return indices
 
-    def sample(self, beta: float = 0.4) -> Tuple[torch.Tensor, ...]:
+    def sample(self, beta=0.4):
         """Sample a batch of experiences."""
         assert beta > 0
 
@@ -127,7 +121,7 @@ def sample(self, beta: float = 0.4) -> Tuple[torch.Tensor, ...]:
 
         return experiences
 
-    def update_priorities(self, indices: list, priorities: np.ndarray):
+    def update_priorities(self, indices, priorities):
         """Update priorities of sampled transitions."""
         assert len(indices) == len(priorities)
 
@@ -153,14 +147,7 @@ class PrioritizedReplayBufferfD(PrioritizedReplayBuffer):
         epsilon_d (float) : epsilon_d parameter to update priority using demo
         """
 
-    def __init__(
-        self,
-        buffer_size: int,
-        batch_size: int,
-        demo: list,
-        alpha: float = 0.6,
-        epsilon_d: float = 1.0,
-    ):
+    def __init__(self, buffer_size, batch_size, demo, alpha=0.6, epsilon_d=1.0):
         """Initialization.
         Args:
             buffer_size (int): size of replay buffer for experience
@@ -181,30 +168,25 @@ def __init__(
             self.min_tree[self.tree_idx] = self._max_priority ** self.alpha
             self.tree_idx += 1
 
-    def add(
-        self,
-        state: np.ndarray,
-        action: np.ndarray,
-        reward: np.float64,
-        next_state: np.ndarray,
-        done: bool,
-    ):
+    def add(self, state, action, reward, next_state, done):
         """Add experience and priority."""
         idx = self.tree_idx
         # buffer is full
         if (self.tree_idx + 1) % (self.buffer_size + self.demo_size) == 0:
             self.tree_idx = self.demo_size
         else:
             self.tree_idx = self.tree_idx + 1
-        super().add(state, action, reward, next_state, done)
+        super(PrioritizedReplayBuffer, self).add(
+            state, action, reward, next_state, done
+        )
 
         self.sum_tree[idx] = self._max_priority ** self.alpha
         self.min_tree[idx] = self._max_priority ** self.alpha
 
         # update current total size
         self.total_size = self.demo_size + len(self.buffer)
 
-    def sample(self, beta: float = 0.4) -> Tuple[torch.Tensor, ...]:
+    def sample(self, beta=0.4):
         """Sample a batch of experiences."""
         assert beta > 0
 
@@ -266,7 +248,7 @@ def sample(self, beta: float = 0.4) -> Tuple[torch.Tensor, ...]:
 
         return experiences
 
-    def update_priorities(self, indices: list, priorities: np.ndarray):
+    def update_priorities(self, indices, priorities):
         """Update priorities of sampled transitions."""
         assert len(indices) == len(priorities)