-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathmodel_with_rolling_mean.py
95 lines (75 loc) · 3.87 KB
/
model_with_rolling_mean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Standard modules
import os
import progressbar
import pandas as pd
import numpy as np
from matplotlib import pyplot
__author__ = 'Shawn Polson'
__contact__ = '[email protected]'
def model_with_rolling_mean(ts, window, ds_name, var_name='Value', verbose=False, calc_errors=False):
"""Model the time series data with a rolling mean.
Inputs:
ts [pd Series]: A pandas Series with a DatetimeIndex and a column for numerical values.
window [int]: Window size; the number of samples to include in the rolling mean.
ds_name [str]: Name of the dataset {bus voltage, etc.}
Optional Inputs:
var_name [str]: The name of the dependent variable in the time series.
Default is 'Value'.
verbose [bool]: When True, a plot of the rolling mean will be displayed.
calc_errors [bool]: Whether or not to calculate and return errors between data and rolling mean.
Outputs:
rolling_mean [pd Series]: The rolling mean, as a pandas Series with a DatetimeIndex and a column for the rolling mean.
Optional Outputs:
errors [pd Series]: The errors at each point, as a pandas Series with a DatetimeIndex and a column for the errors.
Example:
rolling_mean = detect_anomalies_with_rolling_mean(time_series, window_size, 'BusVoltage', False)
"""
if window <= 0:
raise ValueError('\'window\' must be given a value greater than 0 when using rolling mean.')
# Gather statistics
rolling_mean = ts.rolling(window=window, center=False).mean()
first_window_mean = ts.iloc[:window].mean()
for i in range(window): # fill first 'window' samples with mean of those samples
rolling_mean[i] = first_window_mean
X = ts.values
rolling_mean = pd.Series(rolling_mean, index=ts.index)
errors = pd.Series()
# Save data to proper directory with encoded file name
ts_with_rolling_mean = pd.DataFrame({'Rolling Mean': rolling_mean, var_name: ts})
ts_with_rolling_mean.rename_axis('Time', axis='index', inplace=True) # name index 'Time'
column_names = [var_name, 'Rolling Mean'] # column order
ts_with_rolling_mean = ts_with_rolling_mean.reindex(columns=column_names) # sort columns in specified order
data_filename = ds_name + '_with_rolling_mean.csv'
data_path = './save/datasets/' + ds_name + '/rolling mean/data/'
if not os.path.exists(data_path):
os.makedirs(data_path)
ts_with_rolling_mean.to_csv(data_path + data_filename)
# Save plot to proper directory with encoded file name
ax = ts.plot(color='#192C87', title=ds_name + ' with Rolling Mean', label=var_name, figsize=(14, 6))
rolling_mean.plot(color='#0CCADC', label='Rolling Mean', linewidth=2.5) #61AEFF is a nice baby blue
ax.set(xlabel='Time', ylabel=var_name)
pyplot.legend(loc='best')
plot_filename = ds_name + '_with_rolling_mean.png'
plot_path = './save/datasets/' + ds_name + '/rolling mean/plots/'
if not os.path.exists(plot_path):
os.makedirs(plot_path)
pyplot.savefig(plot_path + plot_filename, dpi=500)
if verbose:
pyplot.show()
if calc_errors:
# Start a progress bar
widgets = [progressbar.Percentage(), progressbar.Bar(), progressbar.Timer(), ' ', progressbar.AdaptiveETA()]
progress_bar_sliding_window = progressbar.ProgressBar(
widgets=[progressbar.FormatLabel('Rolling Mean errors ')] + widgets,
maxval=int(len(X))).start()
# Get errors
for t in range(len(X)):
obs = X[t]
y = rolling_mean[t]
error = abs(y-obs)
error_point = pd.Series(error, index=[ts.index[t]])
errors = errors.append(error_point)
progress_bar_sliding_window.update(t) # advance progress bar
return rolling_mean, errors
else:
return rolling_mean