-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoracle.py
164 lines (129 loc) · 6.17 KB
/
oracle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
from enum import Enum, auto
from copy import deepcopy
from settings import BOARD_LENGTH
from square_board import SquareBoard
from settings import BOARD_LENGTH
class ColumnClassification(Enum):
FULL = -1 # imposible
LOSE = 1 # derrota inminente
BAD = 5 # muy indeseable
MAYBE = 10 # indeseable
WIN = 100 # La mejor opcion: gano por mucho
class ColumnRecommendation():
def __init__(self, index, classification):
self.index = index
self.classification = classification
def __eq__(self, other):
# si son de clases distatintas, pues son distintos
if not isinstance(other, self.__class__):
return False
# solo importa la clasificacion
else:
return self.classification == other.classification
def __hash__(self) -> int:
return hash((self.index, self.classification))
def __repr__(self):
return f'{self.__class__}:{self.classification}'
class BaseOracle():
def get_recommendation(self, board, player):
# Returns a list of ColumnRecommendations
recommendations = []
for i in range(len(board)):
recommendations.append(
self._get_column_recommendation(board, i, player))
return recommendations
def _get_column_recommendation(self, board, index, player):
# Classifies a column as either FULL or MAYBE and returns an ColumnRecommendation
classification = ColumnClassification.MAYBE
if board._columns[index].is_full():
classification = ColumnClassification.FULL
return ColumnRecommendation(index, classification)
def no_good_options(self, board, player):
#detecta que todas laas clasificaciones sean BAD o FULL
# obtener las clasificaciones
columnRecomendations = self.get_recommendation(board, player)
# comprobamos que todas sean del tipo correcto
result = True
for rec in columnRecomendations:
if (rec.classification == ColumnClassification.WIN) or (rec.classification == ColumnClassification.MAYBE):
result = False
break
return result
# metodos que han de ser sobre escritos por mis subclases
def update_to_bad(self, move):
pass
def backtrack(self, list_of_moves):
pass
class SmartOracle(BaseOracle):
def _get_column_recommendation(self, board, index, player):
# Afina la clasificacion de super e intenta encontrar columnas WIN
recommendation = super()._get_column_recommendation(board, index, player)
if recommendation.classification == ColumnClassification.MAYBE:
#se puede mejorar
if self._is_wining_move(board, index, player):
recommendation.classification = ColumnClassification.WIN
elif self._is_losing_move(board, index, player):
recommendation.classification = ColumnClassification.LOSE
return recommendation
def _is_losing_move(self, board, index, player):
# si player juega en index ¿genera una jugada vencedora para el oponente en alguna de las columnas?
tmp = self._play_on_tmp_board(board, index, player)
will_lose = False
for i in range(0, BOARD_LENGTH):
if self._is_wining_move(tmp, i, player.opponent):
will_lose = True
break
return will_lose
def _is_wining_move(self, board, index, player):
# determina si al jugar una posicion, nos llevaria a ganar de inmediato
# hago una copia del tablero
# juego en ella
tmp = self._play_on_tmp_board(board, index, player)
# determino si no hay una victoria para player o no
return tmp.is_victory(player.char)
def _play_on_tmp_board(self, board, index, player):
# crea una copia del board y juega en el
tmp = deepcopy(board)
tmp.add(player.char, index)
#devuelvo la copia alterada
return tmp
class MemoizingOracle(SmartOracle):
# el metodo get_recommendaion esta ahora memoizado (cada vez qye te llaman guardas los parametros junto con el resultado de un diccionario)
def __init__(self) -> None:
super().__init__()
self._past_recommendations = {}
def _make_key(self, board_code, player):
# la clave debe combinar el board y el player de la forma mas sencilla posible
return f'{board_code.raw_code}@{player.char}'
def get_recommendation(self, board, player):
# creamos la clave
key = self._make_key(board.as_code(), player)
# Miramos en el cache: si no esta calculo y guardo en cache
if key not in self._past_recommendations:
self._past_recommendations[key] = super().get_recommendation(board, player)
# devuelve lo que esta en el cache
return self._past_recommendations[key]
class LearningOracle(MemoizingOracle):
def update_to_bad(self, move):
# crear clave
key = self._make_key(move.board_code, move.player)
# obtener la clasificiacion erronea
recommendation = self.get_recommendation(SquareBoard.fromBoardCode(move.board_code), move.player)
# corregirla
recommendation[move.position] = ColumnRecommendation(move.position, ColumnClassification.BAD)
# sustituirla
self._past_recommendations[key] = recommendation
def backtrack(self, list_of_moves):
# repasa todas las juagadas y si encuentra una en la cual todo estaba perdidio, quiere decir que la anterior tiene que ser actualizada a BAD
#los moves estan en orden invesro(el primero sera el ultimo)
print('Learning....')
# por cada move .....
for move in list_of_moves:
# lo reclasifico a BAD
self.update_to_bad(move)
# evalua si esta todo perdido tras esta clasificacion
board = SquareBoard.fromBoardCode(move.board_code)
if not self.no_good_options(board, move.player):
# si no todo esatba perdido, salgo. si no sigo
break
print(f'Size of knowledgebase: {len(self._past_recommendations)}')