-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconverter_online.py
367 lines (287 loc) · 15.7 KB
/
converter_online.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
# Exponential smoothing algorithm for paro data (barometer)
# Tristan Liang
# 1/19/2023
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime
from matplotlib.ticker import StrMethodFormatter
from statsmodels.tsa.arima.model import ARIMA
from scipy.signal import butter, lfilter, detrend, welch, spectrogram
from scipy.signal.windows import hamming
from influxdb_client.client.write_api import SYNCHRONOUS
from datetime import datetime, timedelta
# from apiikey_access import api_key
from influxdb_client import InfluxDBClient, Point
from influxdb_client.client.write_api import SYNCHRONOUS
import matplotlib.dates as mdates
# Insturction !!
# 1st step: do the manual setting, where you can changes the value to what you want
# 2nd step: uncomment the plot function in the main function (only uncomment one at a time)
""""""""""""""""""""""""""""" manual setting """""""""""""""""""""""""""""""""
event_name = "Turkey–Syria earthquake" # Name of the event, show in the title of the graph
start_time = "2023-02-06T00:01:00" # the starting time in the plot
format_data = "20%y-%m-%dT%H:%M:%S" # time format of start_time
box = "paros1" # which paros box
sensor_id = '141905' # barometer id
box_hz = 20 # 20 Hz is the sampling rate of the paro
sample_time = 10 # miuntes, time duration of the plot
sample_rate = 1200 # sample/min, sampling rate of the plot
alpha = 0.03 # smoothing factor of exponential smoothing, 1 > alpha > 0
p,d,q = 10,1,10 # p, d, q value of ARIMA model
A, B = 6, 2 # Fast Fourier transform (FFT) Window size: (2^A) and Shift size: (2^B)
dB = True # plot power in dB, True or False
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
""""""""""""""""""""""""""""" api_token setting """""""""""""""""""""""""""""""""
influxdb_apikey = # download your api_token from influxdB. Remember not to save it in the public file!
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
device_name = box + "-" + sensor_id
starting_date = datetime.strptime(start_time, format_data)
number_of_samples = sample_rate*sample_time
# Convert start_time from string to datetime object
start_time_obj = datetime.fromisoformat(start_time)
# Calculate end_time by adding sample_time (in minutes) to start_time
end_time_obj = start_time_obj + timedelta(minutes=sample_time)
# Convert end_time from datetime object to string in the same format as start_time
end_time = end_time_obj.isoformat()
def data_download():
influxdb_org = "paros"
influxdb_url = "https://influxdb.paros.casa.umass.edu/"
# create influxdb client and API objects
influxdb_client = InfluxDBClient(
url=influxdb_url,
token=influxdb_apikey,
org=influxdb_org,
verify_ssl=False
)
influxdb_query_api = influxdb_client.query_api()
influxdb_sensorid_tagkey = "anemometer"
# Main Query
"""idb_query = 'from(bucket:"' + "paros-live-datastream" + '")'\
'|> range(start:' + start_time + 'Z, stop:' + end_time + 'Z)'\
'|> filter(fn: (r) => r["_measurement"] ==' + box + ')'\
'|> filter(fn: (r) => r["sensor_id"] ==' + sensor_id + ')'"""
idb_query = 'from(bucket:"' + "paros-live-datastream" + '")'\
'|> range(start:' + start_time + 'Z, stop:' + end_time + 'Z)'\
'|> filter(fn: (r) => r["_measurement"] == "' + box + '" )'\
'|> filter(fn: (r) => r["sensor_id"] == "' + sensor_id + '")'
device_result = influxdb_query_api.query(org=influxdb_org, query=idb_query)
#print(device_result)
data_list = []
time_list = []
for table in device_result:
for record in table.records:
data_list.append(record.get_value())
time_list.append(record.get_time()) # Assuming there is a method get_time()
# Create a DataFrame for easy manipulation
df = pd.DataFrame(data=data_list, index=time_list, columns=['value'])
# Filter out rows with string values in the 'value' column
df = df.loc[df['value'].apply(lambda x: isinstance(x, float))]
# Convert 'value' column to numeric type
df['value'] = pd.to_numeric(df['value'])
return df
## function of calculate the moving average (exponantial smoothing)
# Formula : https://en.wikipedia.org/wiki/Exponential_smoothing
def exp_smoothing_from_raw(df):
start_pressure = df['value'].iloc[0] # pressure of the starting time.
raw = [] # raw data list
exp_smooth = [] # list for the exponential smoothing algorithm
exp_smooth.append(start_pressure)
raw.append(float(start_pressure))
interval = int((1/sample_rate) * 60 * box_hz)
time_points = [df.index[0]] # list to store time points
for i in range(1, len(df), interval):
a = i
b = int((i-1) / interval)
x_t = df['value'].iloc[a]
raw.append(float(x_t))
predict = alpha * float(x_t) + (1 - alpha) * float(exp_smooth[b])
exp_smooth.append(predict)
time_points.append(df.index[a])
# Creating a new DataFrame for the exponentially smoothed data
smoothed_df = pd.DataFrame(data=exp_smooth, index=time_points, columns=['ESValue'])
raw_df = pd.DataFrame(data=raw, index=time_points, columns=['RawValue'])
# Calculating the residuals
residual = [raw_val - smooth_val for raw_val, smooth_val in zip(raw, exp_smooth)]
residual_df = pd.DataFrame(data=residual, index=time_points, columns=['Residual'])
return raw_df, smoothed_df, residual_df
## function of ARIMA model
def arima_model(raw_df):
model = ARIMA(raw_df['RawValue'], order=(p,d,q))
results_ARIMA = model.fit()
y3_series = pd.Series(results_ARIMA.predict())
y3 = np.roll(y3_series, -1) # move left by one sample
# Create a DataFrame for easy manipulation
arima_df = pd.DataFrame(data=y3[:-1], index=raw_df.index[:-1], columns=['ARIMA'])
return arima_df
def calculate_derivatives(df):
# Calculate change in 'value'
delta_value = df['RawValue'].diff()
# Calculate change in 'time' (in seconds)
delta_time = df.index.to_series().diff().dt.total_seconds()
# Calculate first derivative (change in 'value' divided by change in 'time')
first_derivative = delta_value / delta_time
# Calculate second derivative
second_derivative = first_derivative.diff() / delta_time[1:]
# Create new DataFrames for the first and second derivatives
first_derivative_df = pd.DataFrame(first_derivative.fillna(0), columns=['FirstDerivative'])
second_derivative_df = pd.DataFrame(second_derivative.fillna(0), columns=['SecondDerivative'])
return first_derivative_df, second_derivative_df
## function of calculating the FFT of the residual(raw - ES)
def fft_ac(df):
data = df.values.flatten()
# Calculate the number of windows
n_windows = max(0, int((len(data) - 2**A) / 2**B) + 1)
# Create an empty array to store the squared sums
squared_sums = np.zeros(n_windows)
# Loop through each window
for i in range(n_windows):
# Extract the current window from the signal
window = data[i*2**B:i*2**B+2**A]
# Apply FFT to the window
fft = np.fft.fft(window)
# Extract the positive frequencies
positive_freqs = fft[:2**A//2]
# Take the absolute values
abs_values = np.abs(positive_freqs)
# Calculate the squared sum
squared_sums[i] = np.sum(abs_values)**2
# Convert to dB if desired
if dB:
squared_sums = 10 * np.log10(squared_sums)
# Create output DataFrame
fft_df = pd.DataFrame(data=squared_sums, index=[df.index[i*2**B] for i in range(n_windows)], columns=['FFTValue'])
# print(fft_df)
return fft_df
## function of calculating the bandpass filtered (0.5Hz to 3Hz) pressure
def bpf_pressure(raw_df):
pressure = raw_df['RawValue'].values
n = len(pressure) # number of data points
timestep = 1 / (sample_rate/60) # time between data points (in seconds)
freq = np.fft.fftfreq(n, d=timestep) # frequency array
# apply FFT
pressure_fft = np.fft.fft(pressure)
# apply bandpass filter
min_freq = 0.5 # Hz
max_freq = 3 # Hz
pressure_fft_filtered = pressure_fft.copy() # create copy of FFT data
pressure_fft_filtered[np.abs(freq) < min_freq] = 0 # set frequencies below min_freq to zero
pressure_fft_filtered[np.abs(freq) > max_freq] = 0 # set frequencies above max_freq to zero
# convert back to time domain
pressure_filtered = np.fft.ifft(pressure_fft_filtered).real.tolist() # convert back to list
# Create new DataFrame with the same index
bpf_raw_df = pd.DataFrame(data=pressure_filtered, index=raw_df.index, columns=['BPF_RawValue'])
return bpf_raw_df
## function of plotting the raw data, exp smoothing, and ARIMA prediction for single barometer
def gen_main_graphs(raw, smooth, ARIMA):
fig_main = plt.figure(figsize=(15, 10))
st = "%s/%s/%s %s:%s"%(starting_date.year, starting_date.month, starting_date.day, starting_date.hour, starting_date.minute)
fig_main.suptitle("%s\nRaw vs ARIMA prediction vs Exponential Smoothing \n (starting time is %s:00)"%(event_name, st), fontsize=18, fontweight = 'bold')
fig_main.supxlabel("Timestamp (UTC)", fontsize=20,fontweight = 'bold')
fig_main.supylabel("Pressure (hPa)", fontsize=20, fontweight = 'bold')
title_2 = 'ARIMA (p,d,q = %s,%s,%s)'%(p,d,q)
title_1 = 'E.S. (alpha = %s)'%(alpha)
plt.plot(raw.index, raw['RawValue'], label ='raw - %s'%(device_name), linewidth=0.8, color = 'black')
plt.plot(smooth.index, smooth['ESValue'], label = title_1, linewidth= 0.8, color = 'red')
plt.plot(ARIMA.index, ARIMA['ARIMA'], label = title_2, linewidth=1, color = 'orange')
# Draw a vertical line at chosen_time
#plt.axvline(x=earthquake_time, color='blue', linestyle='--')
# plt.yticks(np.arange(min(y2), max(y2), 0.2), fontsize=12)
# plt.xticks([])
plt.legend(fontsize=15, bbox_to_anchor=[0.5, 1], loc='center', ncol=3, facecolor='white', framealpha=1)
# plt.ylim(min(y2)-0.02, max(y2)+0.02)
# plt.ticklabel_format(useOffset=False, style='plain')
plt.gca().yaxis.set_major_formatter(StrMethodFormatter('{x:,.2f}'))
plt.show()
return ()
## function of plotting the residual (raw - ES), first derivative, second derivative for single barometer
def gen_diff_graphs(first_derivative, second_derivative, residual):
fig = plt.figure(figsize=(15, 10))
st = "%s/%s/%s %s:%s"%(starting_date.year, starting_date.month, starting_date.day, starting_date.hour, starting_date.minute)
fig.suptitle("%s\nResiduals (raw - E.S.) and Derivative of Raw Data\n (starting time is %s:00)"%(event_name, st), fontsize=18, fontweight = 'bold')
fig.supxlabel("Timestamp (UTC)", fontsize=20,fontweight = 'bold')
fig.supylabel("Pressure (hPa)", fontsize=20, fontweight = 'bold')
title_1 = 'First Derivative'
title_2 = 'Second Derivative'
title_3 = 'Residuals (raw - E.S.)'
plt.plot(first_derivative.index, first_derivative['FirstDerivative'], label = title_1, linewidth=1, color = "blue") # plot First Derivative
plt.plot(second_derivative.index, second_derivative['SecondDerivative'], label = title_2, linewidth=1, color = "green") # plot Second Derivative
plt.plot(residual.index, residual['Residual'], label = title_3, linewidth=1, color = "orange") # plot Residuals (raw - E.S.)
plt.text(0.1, 0.1, device_name, ha="center", va="center", transform=plt.gca().transAxes, fontsize = 18, fontweight = 'bold')
plt.legend(fontsize=12, bbox_to_anchor=[0.5, 1], loc='center', ncol=3, facecolor='white', framealpha=1)
# plt.ticklabel_format(useOffset=False, style='plain')
plt.gca().yaxis.set_major_formatter(StrMethodFormatter('{x:,.2f}'))
plt.xticks(fontsize=16)
# plt.ticklabel_format(useOffset=False, style='plain')
# formatter = matplotlib.ticker.FuncFormatter(lambda ms, x: time.strftime('%M:%S', time.gmtime(ms // 1000)))
# plt.gca().xaxis.set_major_formatter(formatter)
plt.gca().yaxis.set_major_formatter(StrMethodFormatter('{x:,.2f}'))
plt.show()
return()
# function of plotting the FFT of the residual(raw - ES) for single barometer
def gen_fft_ac_graph(fft):
fft_window_size = 2**A
fig_main = plt.figure(figsize=(15, 10))
st = "%s/%s/%s %s:%s"%(starting_date.year, starting_date.month, starting_date.day, starting_date.hour, starting_date.minute)
fig_main.suptitle("%s\nPower (magnitude of the sum of the FFT)\n(starting time is %s:00)"%(event_name, st), fontsize=18, fontweight = 'bold')
fig_main.supxlabel("Timestamp (UTC)", fontsize=20,fontweight = 'bold')
if dB == True:
fig_main.supylabel("Power (Pa/Hz) in dB", fontsize=20, fontweight = 'bold')
else:
fig_main.supylabel("Power (Pa/Hz)", fontsize=20, fontweight = 'bold')
title_1 = 'Sum of the FFT in the window of %s samples shift by %s'%(fft_window_size,2**B)
plt.plot(fft.index, fft['FFTValue'], label = title_1, linewidth=1, color = 'magenta')
plt.text(0.1, 0.1, device_name, ha="center", va="center", transform=plt.gca().transAxes, fontsize = 18, fontweight = 'bold')
plt.legend(fontsize=12, bbox_to_anchor=[0.5, 1], loc='center', ncol=3, framealpha=1)
plt.xticks(fontsize=16)
plt.show()
return()
## function of plotting the bandpass filtered (0.5Hz to 3Hz) pressure
def gen_bpf_graph(bpf):
fig_main = plt.figure(figsize=(15, 10))
st = "%s/%s/%s %s:%s"%(starting_date.year, starting_date.month, starting_date.day, starting_date.hour, starting_date.minute)
fig_main.suptitle("%s\nPressure (bandpass filtered to 0.5Hz-3Hz)\n(starting time is %s:00)"%( event_name, st), fontsize=18, fontweight = 'bold')
fig_main.supxlabel("Timestamp (UTC)", fontsize=20,fontweight = 'bold')
fig_main.supylabel("Pressure (hPa)", fontsize=20, fontweight = 'bold')
plt.plot(bpf.index, bpf['BPF_RawValue'], linewidth=1, color = 'magenta')
plt.text(0.1, 0.1, device_name, ha="center", va="center", transform=plt.gca().transAxes, fontsize = 18, fontweight = 'bold')
plt.xticks(fontsize=16)
plt.show()
return()
def main():
origninal_raw_data = data_download()
raw, exp, residual = exp_smoothing_from_raw(origninal_raw_data)
Quit = False
again = "y"
while again == "y":
while Quit == False:
print("Which plot do you want to generate?\n 1. raw data, exp smoothing, and ARIMA prediction\n 2. residual(raw - ES), first derivative, second derivative\n 3. power fft\n 4. power fft with bandpass filter")
plot_option = input("Enter your choice.(1, 2, 3 or 4): ")
if plot_option == "1": # generate plot for raw data, exp smoothing, and ARIMA prediction for single paro
arima = arima_model(raw)
gen_main_graphs(raw, exp, arima)
break
elif plot_option == "2": # generate plot for raw - ES, first derivative, second derivative for single paro
first_derivative_df, second_derivative_df = calculate_derivatives(raw)
gen_diff_graphs(first_derivative_df, second_derivative_df, residual)
break
elif plot_option == "3": # generate power fft over time for single barometer
fft_sum = fft_ac(residual)
gen_fft_ac_graph(fft_sum)
break
elif plot_option == "4": # generate power fft over time with bandpass filter for single barometer
bpf = bpf_pressure(raw)
gen_bpf_graph(bpf)
else:
print("Invalid option. Please enter again")
again = input("generate another plot(y/n): ")
if again == "y":
None
elif again == "n":
Quit == True
else:
print("Please enter again")
again = input("generate another plot(y/n): ")
return()
if __name__ == "__main__":
main()#