-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathbacterium.py
64 lines (52 loc) · 2.36 KB
/
bacterium.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
__author__ = 'gkour'
from creatures.abstractcreature import AbstractCreature
from creature_actions import Actions
from config import ConfigBiology, ConfigBrain
from brains.braindqntf import BrainDQN
import utils
from evolution import DNA
class Bacterium(AbstractCreature):
_master_brain = None
Fitrah = [0, 0, 0, 0, 1, 1]
def __init__(self, universe, id, dna, age=0, energy=ConfigBiology.INITIAL_ENERGY, parents=None):
super(Bacterium, self).__init__(universe, id, dna, age, energy, parents)
self._brain = self.get_master_brain()
def get_master_brain(self):
if Bacterium._master_brain is None:
Bacterium._master_brain = BrainDQN(lr=ConfigBrain.BASE_LEARNING_RATE,
observation_shape=self.observation_shape(),
num_actions=self.num_actions(),
h_size=ConfigBrain.BASE_BRAIN_STRUCTURE_PARAM,
reward_discount=ConfigBrain.BASE_REWARD_DISCOUNT, scope='master' + self.race_name())
return Bacterium._master_brain
return Bacterium._master_brain
@staticmethod
def get_actions():
return [Actions.LEFT, Actions.RIGHT, Actions.UP, Actions.DOWN, Actions.EAT, Actions.DIVIDE]
@staticmethod
def race_basic_dna():
return DNA(ConfigBiology.BASE_MEMORY_SIZE,
ConfigBrain.BASE_LEARNING_RATE,
ConfigBrain.BASE_BRAIN_STRUCTURE_PARAM,
ConfigBiology.BASE_LEARN_FREQ,
ConfigBiology.BASE_LIFE_EXPECTANCY,
ConfigBrain.BASE_REWARD_DISCOUNT,
Bacterium.race_fitrah())
@staticmethod
def race_name():
return 'Bacterium'
def get_race(self):
return Bacterium
@staticmethod
def self_race_enemy():
return True
@staticmethod
def race_fitrah():
return utils.normalize_dist(Bacterium.Fitrah)
def decide(self, state):
eps = max(ConfigBrain.BASE_EPSILON,
1 - (self.age() / (self.learning_frequency() * ConfigBiology.MATURITY_AGE)))
brain_actions_prob = self._brain.think(state)
action_prob = utils.normalize_dist(brain_actions_prob + self.fitrah())
action = utils.epsilon_greedy(eps, dist=action_prob)
return action