-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutil.py
77 lines (63 loc) · 1.53 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# /usr/bin/env python 3.6
# -*-coding:utf-8-*-
'''
utility function
'''
import numpy as np
import scipy.signal as signal
import random
from typing import NamedTuple
def explained_variance_1d(ypred, y):
"""
Var[ypred - y] / var[y].
https://www.quora.com/What-is-the-meaning-proportion-of-variance-explained-in-linear-regression
"""
assert y.ndim == 1 and ypred.ndim == 1
vary = np.var(y)
return np.nan if vary == 0 else 1 - np.var(y-ypred)/vary
def discount(self, x):
"""
Compute discounted sum of future values
out[i] = in[i] + gamma * in[i+1] + gamma^2 * in[i+2] + ...
"""
return signal.lfilter([1], [1, -self.gamma], x[::-1], axis=0)[::-1]
def flatten(l):
result = []
for sub in l:
for ele in sub:
result.append(ele)
return result
def get_discount_reward(rewards, gamma):
'''
Get discount reward
Args:
rewards (list)
gamma (float)
'''
result = [0] * len(rewards)
rew = 0
for i in reversed(range(len(rewards))):
rew = rew * gamma + rewards[i]
result[i] = rew
return result
# make transition
class Transition(NamedTuple):
state: np.array
action: np.array or int
next_state: np.array
reward: float
# define memory replay
class ReplayMemory(object):
def __init__(self, capacity):
self.capacity = capacity
self.memory = []
def push(self, transition):
if len(self.memory) == self.capacity:
self.memory.pop(0)
self.memory.append(transition)
def sample(self, batch_size):
return random.sample(self.memory, batch_size)
def __len__(self):
return len(self.memory)
def clear(self):
self.memory = []