-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathregressor_tools.py
174 lines (149 loc) · 5.83 KB
/
regressor_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import math
from time import time
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tqdm import tqdm
from utils.data_processor import uniform_scaling
name = "RegressorTools"
classical_ml_models = ["xgboost", "svr", "random_forest"]
deep_learning_models = ["fcn", "resnet", "inception"]
tsc_models = ["rocket"]
linear_models = ["lr", "ridge"]
all_models = classical_ml_models + deep_learning_models + tsc_models
def fit_regressor(output_directory, regressor_name, X_train, y_train,
X_val=None, y_val=None, itr=1):
"""
This is a function to fit a regression model given the name and data
:param output_directory:
:param regressor_name:
:param X_train:
:param y_train:
:param X_val:
:param y_val:
:param itr:
:return:
"""
print("[{}] Fitting regressor".format(name))
start_time = time()
input_shape = X_train.shape[1:]
regressor = create_regressor(regressor_name, input_shape, output_directory, itr)
if (X_val is not None) and (regressor_name in deep_learning_models):
regressor.fit(X_train, y_train, X_val, y_val)
else:
regressor.fit(X_train, y_train)
elapsed_time = time() - start_time
print("[{}] Regressor fitted, took {}s".format(name, elapsed_time))
return regressor
def create_regressor(regressor_name, input_shape, output_directory, verbose=1, itr=1):
"""
This is a function to create the regression model
:param regressor_name:
:param input_shape:
:param output_directory:
:param verbose:
:param itr:
:return:
"""
print("[{}] Creating regressor".format(name))
# SOTA TSC deep learning
if regressor_name == "resnet":
from models.deep_learning import resnet
return resnet.ResNetRegressor(output_directory, input_shape, verbose)
if regressor_name == "fcn":
from models.deep_learning import fcn
return fcn.FCNRegressor(output_directory, input_shape, verbose)
if regressor_name == "inception":
from models.deep_learning import inception
return inception.InceptionTimeRegressor(output_directory, input_shape, verbose)
if regressor_name == "rocket":
from models import rocket
return rocket.RocketRegressor(output_directory)
# classical ML models
if regressor_name == "xgboost":
from models.classical_models import XGBoostRegressor
kwargs = {"n_estimators": 100,
"n_jobs": 0,
"learning_rate": 0.1,
"random_state": itr - 1,
"verbosity ": verbose}
return XGBoostRegressor(output_directory, verbose, kwargs)
if regressor_name == "random_forest":
from models.classical_models import RFRegressor
kwargs = {"n_estimators": 100,
"n_jobs": -1,
"random_state": itr - 1,
"verbose": verbose}
return RFRegressor(output_directory, verbose, kwargs)
if regressor_name == "svr":
from models.classical_models import SVRRegressor
return SVRRegressor(output_directory, verbose)
# linear models
if regressor_name == "lr":
from models.classical_models import LinearRegressor
kwargs = {"fit_intercept": True,
"normalize": False,
"n_jobs": -1}
return LinearRegressor(output_directory, kwargs, type=regressor_name)
if regressor_name == "ridge":
from models.classical_models import LinearRegressor
kwargs = {"fit_intercept": True,
"normalize": False}
return LinearRegressor(output_directory, kwargs, type=regressor_name)
def process_data(X, min_len, normalise=None):
"""
This is a function to process the data, i.e. convert dataframe to numpy array
:param X:
:param min_len:
:param normalise:
:return:
"""
tmp = []
for i in tqdm(range(len(X))):
_x = X.iloc[i, :].copy(deep=True)
# 1. find the maximum length of each dimension
all_len = [len(y) for y in _x]
max_len = max(all_len)
# 2. adjust the length of each dimension
_y = []
for y in _x:
# 2.1 fill missing values
if y.isnull().any():
y = y.interpolate(method='linear', limit_direction='both')
# 2.2. if length of each dimension is different, uniformly scale the shorted one to the max length
if len(y) < max_len:
y = uniform_scaling(y, max_len)
_y.append(y)
_y = np.array(np.transpose(_y))
# 3. adjust the length of the series, chop of the longer series
_y = _y[:min_len, :]
# 4. normalise the series
if normalise == "standard":
scaler = StandardScaler().fit(_y)
_y = scaler.transform(_y)
if normalise == "minmax":
scaler = MinMaxScaler().fit(_y)
_y = scaler.transform(_y)
tmp.append(_y)
X = np.array(tmp)
return X
def calculate_regression_metrics(y_true, y_pred, y_true_val=None, y_pred_val=None):
"""
This is a function to calculate metrics for regression.
The metrics being calculated are RMSE and MAE.
:param y_true:
:param y_pred:
:param y_true_val:
:param y_pred_val:
:return:
"""
res = pd.DataFrame(data=np.zeros((1, 2), dtype=np.float), index=[0],
columns=['rmse', 'mae'])
res['rmse'] = math.sqrt(mean_squared_error(y_true, y_pred))
res['mae'] = mean_absolute_error(y_true, y_pred)
if not y_true_val is None:
# this is useful when transfer learning is used with cross validation
res['rmse_val'] = math.sqrt(mean_squared_error(y_true_val, y_pred_val))
res['mae_val'] = mean_absolute_error(y_true_val, y_pred_val)
return res