forked from e-sensing/sits
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapi_dtw.R
128 lines (128 loc) · 5.19 KB
/
api_dtw.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# ---- Distances ----
#' @title Calculate the DTW distance between temporal patterns and time-series.
#' @name .dtw_distance
#' @description This function calculates the DTW distance between label patterns
#' and real data (e.g., sample data, data cube data). The distance is calculated
#' without a window. It's use is recommended for big datasets.
#' @keywords internal
#' @noRd
.dtw_distance <- function(data, patterns) {
# Prepare input data
data <- as.matrix(.ts_values(data))
# Calculate the DTW distance between `data` and `patterns`
purrr::map_dfc(patterns, function(pattern) {
# Prepare pattern data
pattern_ts <- as.matrix(.ts_values(pattern))
# Calculate distance
stats::setNames(
data.frame(distance = dtw_distance(data, pattern_ts)),
pattern[["label"]][[1]]
)
})
}
#' @title Calculate the DTW distance between temporal patterns and time-series.
#' @name .dtw_distance_windowed
#' @description This function calculates the DTW distance between label patterns
#' and real data (e.g., sample data, data cube data). The distance is calculated
#' using windows.
#' @keywords internal
#' @noRd
.dtw_distance_windowed <- function(data, patterns, windows) {
# Calculate the DTW distance between `data` and `patterns`
purrr::map_dfc(patterns, function(pattern) {
# Get pattern data
pattern_ts <- as.matrix(.ts_values(pattern))
# Windowed search
distances <- purrr::map_df(windows, function(window) {
# Get time-series in the window
data_in_window <-
dplyr::filter(data,
.data[["Index"]] >= window[["start"]],
.data[["Index"]] <= window[["end"]])
# Remove the time reference column
data_in_window <- as.matrix(.ts_values(data_in_window))
# Calculate distance
data.frame(distance = dtw_distance(data_in_window, pattern_ts))
})
# Associate the pattern name with the distances
stats::setNames(distances, pattern[["label"]][[1]])
})
}
# ---- Operation mode ----
#' @title Search for events in time series using complete data (no windowing).
#' @name .dtw_complete_ts
#' @description This function searches for events in time series without
#' windowing.
#' @keywords internal
#' @noRd
.dtw_complete_ts <- function(values, patterns, threshold, ...) {
# Do the change detection for each time-series
purrr::map_vec(values, function(value_row) {
# Search for the patterns
patterns_distances <- .dtw_distance(
data = value_row,
patterns = patterns
)
# Remove distances out the user-defined threshold
as.numeric(any(patterns_distances <= threshold))
})
}
#' @title Search for events in time series using windowing.
#' @name .dtw_windowed_ts
#' @description This function searches for events in time series with windowing.
#' @keywords internal
#' @noRd
.dtw_windowed_ts <- function(values, patterns, window, threshold) {
# Extract dates
dates_min <- .ts_min_date(values[[1]])
dates_max <- .ts_max_date(values[[1]])
# Assume time-series are regularized, then use the period
# as the step of the moving window. As a result, we have
# one step per iteration.
dates_step <- lubridate::as.period(
lubridate::int_diff(.ts_index(values[[1]]))
)
dates_step <- dates_step[[1]]
# Create comparison windows
comparison_windows <- .period_windows(
period = window,
step = dates_step,
start_date = dates_min,
end_date = dates_max
)
# Do the change detection for each time-series
purrr::map(values, function(value_row) {
# Search for the patterns
patterns_distances <- .dtw_distance_windowed(
data = value_row,
patterns = patterns,
windows = comparison_windows
)
# Remove distances out the user-defined threshold
patterns_distances[patterns_distances > threshold] <- NA
# Define where each label was detected. For this, first
# get from each label the minimal distance
detections_idx <- apply(patterns_distances, 2, which.min)
detections_name <- names(detections_idx)
# For each label, extract the metadata where they had
# minimal distance
purrr::map_df(seq_len(length(detections_idx)), function(idx) {
# Extract detection name and inced
detection_name <- detections_name[idx]
detection_idx <- detections_idx[idx]
# Extract detection distance (min one defined above)
detection_distance <- patterns_distances[detection_idx,]
detection_distance <- detection_distance[detection_name]
detection_distance <- as.numeric(detection_distance)
# Extract detection dates
detection_dates <- comparison_windows[[detection_idx]]
# Prepare result and return it!
tibble::tibble(
change = detection_name,
distance = detection_distance,
from = detection_dates[["start"]],
to = detection_dates[["end"]]
)
})
})
}