Reinforcement Learning: Coin Catcher custom environment with Gym toolkit

DevOpsThinh · DevOpsThinh · commit bcc038485f09 · 2023-05-28T14:34:33.000+07:00
diff --git a/ml/rl_in_robotics/coin_catcher/airplane.png b/ml/rl_in_robotics/coin_catcher/airplane.png
diff --git a/ml/rl_in_robotics/coin_catcher/coin_catcher_rl_problem.py b/ml/rl_in_robotics/coin_catcher/coin_catcher_rl_problem.py
@@ -0,0 +1,123 @@
+# Learner: Nguyen Truong Thinh
+# Contact me: nguyentruongthinhvn2020@gmail.com || +84393280504
+#
+# Topic: Reinforcement Learning (RL): Coin Catcher RL problem
+import collections
+import sys
+from math import ceil
+from random import random, randint
+from time import sleep
+
+import gym
+
+from coin_catcher_screen import CoinCatcherScreen
+
+
+class CoinCatcherEnv(gym.Env):
+    """
+     Coin Catcher environment in Gym
+    """
+    metadata = {"render.modes": ["human", "ascii"]}
+
+    def __init__(self, display_width=10, display_height=10, density=.8):
+        self.display_width = display_width
+        self.display_height = display_height
+        self.density = density
+        self.display = collections.deque(maxlen=display_height)
+        self.last_action = None
+        self.last_reward = None
+        self.total_score = 0
+        self.v_position = 0
+        self.game_scr = None
+
+    def step(self, action):
+        self.last_action = action
+        self.v_position = min(max(self.v_position + action, 0), self.display_width - 1)
+        reward = self.display[0][self.v_position]
+        self.last_reward = reward
+        self.total_score += reward
+        self.display.append(self.line_generator())
+        state = self.display, self.v_position
+        done = False
+        info = {}
+
+        return state, reward, done, info
+
+    def _render_human(self):
+        """
+         The Pygame's Graphic environment rendering
+        """
+        if not self.game_scr:
+            self.game_scr = CoinCatcherScreen(
+                h=self.display_height,
+                w=self.display_width
+            )
+
+        if self.last_reward:
+            self.game_scr.plus()
+            sleep(.1)
+
+        self.game_scr.update(
+            self.display,
+            self.v_position,
+            self.total_score
+        )
+
+    def _render_ascii(self):
+        """
+         The ASCII environment rendering
+        """
+        outfile = sys.stdout
+        area = []
+        for i in range(self.display_height):
+            line = self.display[self.display_height - 1 - i]
+            row = []
+            for j in range(len(line)):
+                p = line[j]
+                if p > 0:
+                    row.append(str(p))
+                    if i > 0 and area[-1][j] == ' ':
+                        area[-1][j] = '|'
+                    if i > 1 and area[-2][j] == ' ':
+                        area[-2][j] = '.'
+                else:
+                    row.append(' ')
+
+            area.append(row)
+
+        pos_line = (['_'] * self.display_width)
+        pos_line[self.v_position] = str(self.last_reward) if self.last_reward else 'V'
+
+        area.append(pos_line)
+        outfile.write(f"\nTotal score: {self.total_score} \n")
+        outfile.write("\n".join("  ".join(line) for line in area) + "\n")
+
+    def render(self, mode="ascii"):
+        if mode == "human":
+            self._render_human()
+        elif mode == "ascii":
+            self._render_ascii()
+        else:
+            raise Exception("Not Implemented!")
+
+    def line_generator(self):
+        line = [0] * self.display_width
+        if random() > (1 - self.density):
+            ran = random()
+            if ran < .6:
+                v = 1
+            elif ran < .9:
+                v = 2
+            else:
+                v = 3
+
+            line[randint(0, self.display_width - 1)] = v
+        return line
+
+    def reset(self):
+        for _ in range(self.display_height):
+            self.display.append(self.line_generator())
+
+        self.v_position = ceil(self.display_width / 2)
+        state = self.display, self.v_position
+        return state
diff --git a/ml/rl_in_robotics/coin_catcher/coin_catcher_screen.py b/ml/rl_in_robotics/coin_catcher/coin_catcher_screen.py
@@ -0,0 +1,58 @@
+# Learner: Nguyen Truong Thinh
+# Contact me: nguyentruongthinhvn2020@gmail.com || +84393280504
+#
+# Topic: Reinforcement Learning (RL): Coin Catcher RL problem
+#    Ref: https://www.pygame.org/docs
+
+import os
+
+import pygame as game
+
+# Setting up the colors
+BLACK = (0, 0, 0)
+RED = (255, 0, 0)
+GREEN = (0, 255, 0)
+BLUE = (0, 0, 255)
+WHITE = (255, 255, 255)
+
+
+class CoinCatcherScreen:
+    """
+    Drawing the game screen
+    """
+    img_path = os.path.dirname(os.path.realpath(__file__)) + '/airplane.png'
+    rct_size = 50
+
+    def __init__(self, h=5, w=5) -> None:
+        game.init()
+        self.h = h
+        self.w = w
+        scr_size = ((w + 2) * self.rct_size, (h + 3) * self.rct_size)
+        self.scr = game.display.set_mode(scr_size, 0, 32)
+        self.img = game.image.load(CoinCatcherScreen.img_path)
+        self.font = game.font.SysFont("arial", 48)
+        game.display.set_caption('Catch Coins Game')
+        super().__init__()
+
+    def plus(self):
+        self.scr.fill(GREEN)
+        game.display.update()
+
+    def update(self, display, plane_pos, total_score):
+        self.scr.fill(WHITE)
+        for i in range(len(display)):
+            line = display[len(display) - 1 - i]
+            for j in range(len(line)):
+                p = line[j]
+                if p > 0:
+                    coord = ((j + 1) * self.rct_size, (i + 1) * self.rct_size)
+                    self.scr.blit(self.font.render(str(p), True, BLACK), coord)
+
+        self.scr.blit(self.font.render(f'Total: {total_score}', True, BLACK), (10, 10))
+        self.scr.blit(self.img, (self.rct_size * plane_pos + 30, (self.h + 1) * self.rct_size))
+        game.display.update()
+
+    @classmethod
+    def render(cls, display, plane_pos, total_score):
+        scr = CoinCatcherScreen()
+        scr.update(display, plane_pos, total_score)
diff --git a/ml/rl_in_robotics/coin_catcher/random_agent.py b/ml/rl_in_robotics/coin_catcher/random_agent.py
@@ -0,0 +1,23 @@
+# Learner: Nguyen Truong Thinh
+# Contact me: nguyentruongthinhvn2020@gmail.com || +84393280504
+#
+# Topic: Reinforcement Learning (RL): Coin Catcher RL problem
+
+import random
+from coin_catcher_rl_problem import CoinCatcherEnv
+from ml.rl_in_robotics.utility import gym_rl_custom_tasks
+
+# Hyperparameters we can adjust
+EPISODES = 1000
+MODE = "human"  # "ascii"
+SLEEP = .3          # 1
+
+env = CoinCatcherEnv()
+
+# -1: left
+#  0: stay
+# +1: right
+action_space = [-1, 0, 1]
+action = random.choice(action_space)
+
+gym_rl_custom_tasks(env, EPISODES, action, MODE, SLEEP)
diff --git a/ml/rl_in_robotics/gym_environments/right_mountain_car.py b/ml/rl_in_robotics/gym_environments/right_mountain_car.py
@@ -9,7 +9,7 @@
 from ml.rl_in_robotics.utility import gym_rl_tasks, init_environment
 
 # Hyperparameters we can adjust
-EPISODES = 30
+EPISODES = 10
 
 env = init_environment("MountainCar-v0")
 
diff --git a/ml/rl_in_robotics/images/coin_catcher.gif b/ml/rl_in_robotics/images/coin_catcher.gif
diff --git a/ml/rl_in_robotics/utility.py b/ml/rl_in_robotics/utility.py
@@ -9,6 +9,39 @@
 from time import sleep
 
 import gym
+import pygame
+
+
+def gym_rl_custom_tasks(env, episodes, action, mode, duration):
+    """
+      Unifying all RL tasks by Gym toolkit with seeding
+      """
+    init_reset_environment(env)
+
+    gym_customize_tasks(env, episodes, action, mode, duration)
+
+    env.close()
+
+
+def init_reset_environment(env):
+    env = env
+    init_state = env.reset()
+    return env
+
+
+def gym_customize_tasks(env, episodes, action, mode="human", duration=1):
+    """
+    Unifying RL tasks by Gym toolkit
+    """
+    for _ in range(episodes):
+        if mode == "human":
+            env.render(mode)
+        else:
+            env.render()
+
+        action = action
+        state, reward, done, debug = env.step(action)
+        sleep(duration)
 
 
 def gym_rl_tasks_with_seed(env, episodes, action):
@@ -59,6 +92,21 @@ def init_environment(rl_problem):
     return env
 
 
+# Utilities function
+
+
+def get_system_fonts():
+    """
+    Get all available fonts.
+    :return: A list of all the fonts available on the system.
+    """
+    fonts = pygame.font.get_fonts()
+    index = 1
+    for font in fonts:
+        print(f'{index}. {font},')
+        index += 1
+
+
 def check_list_of_environments():
     """
     Get a list of all pre-installed environments of Gym Toolkit