-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplayground.py
123 lines (77 loc) · 2.66 KB
/
playground.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import numpy as np
class Env():
def __init__(self, n, m, k=4):
self.n, self.m, self.k = n, m, k
self.c = 0 # count of moves
self.board = [[0]*m for _ in range(n)] # init
self.end = False
self.state = (0, )*m
self.act_ind = [0]*m # possible col index
self.actions = [i for i in range(self.m)]
def __str__(self):
return str(np.array(self.board[::-1])) # reverse to match the actual game
#return str(np.array(self.board))
def _reward(self, i, j):
c = 0 # c: count
t = self.board[i][j] # t: target
B = [0]*7 # B: buffer
for p, (u, v) in enumerate([(1, -1), (0, -1), (-1, -1), (-1, 0), (-1, 1), (0, 1), (1, 1)]): # check 7 directions
if i+u >= self.n or i+u < 0 or j+v >= self.m or j+v < 0:
continue
s = self.board[i+u][j+v] # s: sample
if not s: # no stone
continue
k = 1
while 0<= i+k*u < self.n and 0<= j+k*v < self.m \
and self.board[i+k*u][j+k*v] == s and k <= self.k-1:
c += 1
k += 1
B[p] = (k-1) * [-1, 1][s == t]
if k == self.k and s == t: # winning bonus!
c += self.m * self.n * 2
elif k == self.k-1:
if s != t:
c += (self.m * self.n) // 4 # block bonus
else:
c += (self.m * self.n) // 2 # almost win bonus
# deal with the case that the stone is in the middle of the line
for p in range(3):
if B[p] + B[p+4] + 1 >= self.k: # may over count if the last move is the end of conn 4, but that is ok
c += self.m * self.n * 2
return c
def _i_state(self, j):
b = 1 # start from 1
i = 0
while i<self.n and self.board[i][j]:
b = (b << 1) + (self.board[i][j]-1)
i += 1
return b
def step(self, act):
# put a stone at col act
i, j = self.act_ind[act], act
b = self.c % 2 # b: stone bit
self.board[i][j] = b + 1 # grid = 0: empty, 1: black, 2: white
# n means no more room for stone
if self.act_ind[act] == self.n - 1:
self.actions = [x for x in self.actions if x != act] # remove act from action space
self.act_ind[act] += 1
self.state = self.state[:j] + (self._i_state(j),) + self.state[j+1:]
self.c += 1
r = self._reward(i, j) # find reward
self.end = (r >= self.n*self.m*2) or (not self.actions) # find if game ends
return r, self.end, {} # return reward, if end, info for debug
def reset(self, k=4, board=None):
if not board:
self.__init__(self.n, self.m, self.k)
else:
self.n, self.m, self.k = len(board), len(board[0]), k
self.board = board
self.end = False
self.c = 0
for j in range(self.m):
i = 0
while board[i][j]:
i += 1
self.c += i # update moves
self.act_ind[j] = i # update action index
self.actions = [i for i in range(self.m)]