-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutility.py
177 lines (146 loc) · 5.85 KB
/
utility.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import numpy as np
import torch
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
# Data reader
def read_data():
""" Read real option dataset from excel
"""
file_name = "Data/sxp.csv"
df = pd.read_csv(file_name)
df = pd.DataFrame(df)
data = []
# Group by symbols (i.e. different options)
trajs = df.groupby('symbol')
for traj in trajs:
option_id, traj_data = traj[0], traj[1]
# Remove trajectories with less than 10 data points
if len(traj_data) >= 10:
# Fields that we need
days_to_expire = traj_data["days_to_expire"].values
stock_price = traj_data["stock_price"].values
riskfree_rate = traj_data["riskfree_rate"].values
strike_price = traj_data["strike_price"].values / 1000
bid = traj_data["best_bid"].values
# For debugging, remove during actual training
if abs(strike_price[0] - stock_price[0]) / stock_price[0] > 0.25:
continue
traj_data = np.vstack((days_to_expire, stock_price, riskfree_rate, strike_price, bid))
traj_data = traj_data.T
data.append(traj_data)
data = np.array(data, dtype=object)
return data
# Data generator
def gen_data():
""" Generate dataset
Uses gen_traj() as a helper function. Use it multiple times
at different specifications to generate diverse data.
"""
# X, S0, r, sigma, T, M, N, transition = 40, 36, 0.06, 0.2, 1, 100, 800, BrownianMotion
pass
def gen_traj(X, S0, r, sigma, T, M, N, transition):
""" Generate N trajectories based on the specifications (i.e. all other inputs)
Input:
X, S0, r, sigma, T [scalar]: specifies the option
M, N [scalar]: number of time steps, number of trajectories
transition [class]: a transition function
"""
brownian = transition(S0, r, sigma, T, M, N)
data = []
trajs = brownian.simulate()
for i in range(trajs.shape[1]):
traj = trajs[:,i]
traj = np.vstack( (np.linspace(0, T*365, M+1),
traj,
X*np.ones(M+1),
r*np.ones(M+1),
sigma*np.ones(M+1)) )
traj = traj.T
data.append(traj)
data = np.array(data)
return data
# Dataset
class SimDataset(Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx]
# Dataloader
def data_load(data, lookback, train_test_split=0.8):
""" Prepare LSTM data from a list of trajectories.
Uses load_traj() as a helper function
Input:
data [matrix of size N x ? x 5]: M is the number of timesteps; at each time step
there are 4 state variables and 1 output price
lookback [scalar]: number of lookback steps
train_test_split [scaler]: ratio of train : test dataset
Output:
train_set [len(data) * train_test_split x size of traj_load() output]
test_set: analagous to train_set
"""
np.random.shuffle(data)
train_set, test_set = [], []
train_size = int(np.round(train_test_split * data.shape[0]))
for traj in data[:train_size]:
train_set_traj = traj_load(traj, lookback)
# Model doesn't need to know which trajectory we are on, thus I used extend() instead of append()
train_set.extend(train_set_traj)
for traj in data[train_size:]:
test_set_traj = traj_load(traj, lookback)
test_set.extend(test_set_traj)
# Convert to numpy arrays
train_set, test_set = np.array(train_set), np.array(test_set)
# Convert to torch tensors
train_set = torch.from_numpy(train_set).type(torch.Tensor)
test_set = torch.from_numpy(test_set).type(torch.Tensor)
return train_set, test_set
def traj_load(traj, lookback):
""" Prepare LSTM data from raw (single) trajectory data.
Helper function to load_data()
Input:
traj [matrix of size ? x 6]: M is the number of timesteps; at each time step
there are 4 state variables and 1 output price
lookback [scalar]: number of lookback steps
Output:
data [(len(traj) - lookback) x lookback x 5]: traj data
"""
M = traj.shape[0]
# Prepare path into LSTM data format
data = []
for i in range(M - lookback):
data.append(traj[i : i + lookback])
data = np.array(data)
return data
"""
Utility functions to load and save torch model checkpoints
"""
def load_checkpoint(net, optimizer=None, step='max', save_dir='checkpoints'):
os.makedirs(save_dir, exist_ok=True)
checkpoints = [x for x in os.listdir(save_dir) if not x.startswith('events')]
# Get rid of DS Store
checkpoints = list(filter(lambda x: "DS_Store" not in x, checkpoints))
if step == 'max':
step = 0
if checkpoints:
step, last_checkpoint = max([(int(x.split('.')[0]), x) for x in checkpoints])
else:
last_checkpoint = str(step) + '.pth'
if step:
save_path = os.path.join(save_dir, last_checkpoint)
state = torch.load(save_path, map_location='cpu')
net.load_state_dict(state['net'])
if optimizer:
optimizer.load_state_dict(state['optimizer'])
print('Loaded checkpoint %s' % save_path)
return step
def save_checkpoint(net, optimizer, step, save_dir='checkpoints'):
if not os.path.exists(save_dir):
os.makedirs(save_dir)
save_path = os.path.join(save_dir, str(step) + '.pth')
torch.save(dict(net=net.state_dict(), optimizer=optimizer.state_dict()), save_path)
print('Saved checkpoint %s' % save_path)
if __name__ == "__main__":
read_data()