|
1 |
| - |
2 |
| -#' @title Extract temporal pattern of samples using temporal median. |
3 |
| -#' @name .pattern_temporal_median |
| 1 | +# ---- Distances ---- |
| 2 | +#' @title Calculate the DTW distance between temporal patterns and time-series. |
| 3 | +#' @name .dtw_distance |
| 4 | +#' @description This function calculates the DTW distance between label patterns |
| 5 | +#' and real data (e.g., sample data, data cube data). The distance is calculated |
| 6 | +#' without a window. It's use is recommended for big datasets. |
4 | 7 | #' @keywords internal
|
5 | 8 | #' @noRd
|
6 |
| -.pattern_temporal_median <- function(samples) { |
7 |
| - samples |> |
8 |
| - dplyr::group_by(.data[["label"]]) |> |
9 |
| - dplyr::group_map(function(data, name) { |
10 |
| - ts_median <- dplyr::bind_rows(data[["time_series"]]) |> |
11 |
| - dplyr::group_by(.data[["Index"]]) |> |
12 |
| - dplyr::summarize(dplyr::across(dplyr::everything(), |
13 |
| - stats::median, na.rm = TRUE)) |> |
14 |
| - dplyr::select(-.data[["Index"]]) |
15 |
| - |
16 |
| - ts_median["label"] <- name |
17 |
| - ts_median |
18 |
| - }) |
| 9 | +.dtw_distance <- function(data, patterns) { |
| 10 | + # Prepare input data |
| 11 | + data <- as.matrix(.ts_values(data)) |
| 12 | + # Calculate the DTW distance between `data` and `patterns` |
| 13 | + purrr::map_dfc(patterns, function(pattern) { |
| 14 | + # Prepare pattern data |
| 15 | + pattern_ts <- as.matrix(.ts_values(pattern)) |
| 16 | + # Calculate distance |
| 17 | + stats::setNames( |
| 18 | + data.frame(distance = dtw_distance(data, pattern_ts)), |
| 19 | + pattern[["label"]][[1]] |
| 20 | + ) |
| 21 | + }) |
19 | 22 | }
|
20 |
| - |
21 |
| -#' @title Calculate the DTW distance between label patterns and sample data. |
22 |
| -#' @name .pattern_distance_dtw |
| 23 | +#' @title Calculate the DTW distance between temporal patterns and time-series. |
| 24 | +#' @name .dtw_distance_windowed |
23 | 25 | #' @description This function calculates the DTW distance between label patterns
|
24 |
| -#' and sample data in a given temporal window. |
| 26 | +#' and real data (e.g., sample data, data cube data). The distance is calculated |
| 27 | +#' using windows. |
25 | 28 | #' @keywords internal
|
26 | 29 | #' @noRd
|
27 |
| -.pattern_distance_dtw <- function(data, patterns, windows) { |
| 30 | +.dtw_distance_windowed <- function(data, patterns, windows) { |
28 | 31 | # Calculate the DTW distance between `data` and `patterns`
|
29 |
| - purrr::map_dfc(1:length(patterns), function(pattern_index) { |
30 |
| - # Get pattern metadata |
31 |
| - pattern <- patterns[pattern_index][[1]] |
32 |
| - pattern_label <- unique(pattern[["label"]]) |
| 32 | + purrr::map_dfc(patterns, function(pattern) { |
33 | 33 | # Get pattern data
|
34 |
| - pattern_ts <- dplyr::select(pattern, -.data[["label"]]) |
35 |
| - pattern_ts <- as.matrix(pattern_ts) |
| 34 | + pattern_ts <- as.matrix(.ts_values(pattern)) |
36 | 35 | # Windowed search
|
37 | 36 | distances <- purrr::map_df(windows, function(window) {
|
38 | 37 | # Get time-series in the window
|
|
41 | 40 | .data[["Index"]] >= window[["start"]],
|
42 | 41 | .data[["Index"]] <= window[["end"]])
|
43 | 42 | # Remove the time reference column
|
44 |
| - data_in_window <- dplyr::select(data_in_window, -.data[["Index"]]) |
45 |
| - # Transform values in matrix (as expected in the cpp code) |
46 |
| - data_in_window <- as.matrix(data_in_window) |
47 |
| - data_in_window <- data_in_window |
| 43 | + data_in_window <- as.matrix(.ts_values(data_in_window)) |
48 | 44 | # Calculate distance
|
49 |
| - distance_from_pattern <- dtw_distance(data_in_window, pattern_ts) |
50 |
| - # Prepare result and return it |
51 |
| - data.frame(distance = distance_from_pattern) |
| 45 | + data.frame(distance = dtw_distance(data_in_window, pattern_ts)) |
52 | 46 | })
|
53 | 47 | # Associate the pattern name with the distances
|
54 |
| - stats::setNames(distances, pattern_label) |
| 48 | + stats::setNames(distances, pattern[["label"]][[1]]) |
| 49 | + }) |
| 50 | +} |
| 51 | +# ---- Operation mode ---- |
| 52 | +#' @title Search for events in time series using complete data (no windowing). |
| 53 | +#' @name .dtw_complete_ts |
| 54 | +#' @description This function searches for events in time series without |
| 55 | +#' windowing. |
| 56 | +#' @keywords internal |
| 57 | +#' @noRd |
| 58 | +.dtw_complete_ts <- function(values, patterns, threshold, ...) { |
| 59 | + # Do the change detection for each time-series |
| 60 | + purrr::map_vec(values, function(value_row) { |
| 61 | + # Search for the patterns |
| 62 | + patterns_distances <- .dtw_distance( |
| 63 | + data = value_row, |
| 64 | + patterns = patterns |
| 65 | + ) |
| 66 | + # Remove distances out the user-defined threshold |
| 67 | + as.numeric(any(patterns_distances <= threshold)) |
| 68 | + }) |
| 69 | +} |
| 70 | +#' @title Search for events in time series using windowing. |
| 71 | +#' @name .dtw_windowed_ts |
| 72 | +#' @description This function searches for events in time series with windowing. |
| 73 | +#' @keywords internal |
| 74 | +#' @noRd |
| 75 | +.dtw_windowed_ts <- function(values, patterns, window, threshold) { |
| 76 | + # Extract dates |
| 77 | + dates_min <- .ts_min_date(values[[1]]) |
| 78 | + dates_max <- .ts_max_date(values[[1]]) |
| 79 | + # Assume time-series are regularized, then use the period |
| 80 | + # as the step of the moving window. As a result, we have |
| 81 | + # one step per iteration. |
| 82 | + dates_step <- lubridate::as.period( |
| 83 | + lubridate::int_diff(.ts_index(values[[1]])) |
| 84 | + ) |
| 85 | + dates_step <- dates_step[[1]] |
| 86 | + # Create comparison windows |
| 87 | + comparison_windows <- .period_windows( |
| 88 | + period = window, |
| 89 | + step = dates_step, |
| 90 | + start_date = dates_min, |
| 91 | + end_date = dates_max |
| 92 | + ) |
| 93 | + # Do the change detection for each time-series |
| 94 | + purrr::map(values, function(value_row) { |
| 95 | + # Search for the patterns |
| 96 | + patterns_distances <- .dtw_distance_windowed( |
| 97 | + data = value_row, |
| 98 | + patterns = patterns, |
| 99 | + windows = comparison_windows |
| 100 | + ) |
| 101 | + # Remove distances out the user-defined threshold |
| 102 | + patterns_distances[patterns_distances > threshold] <- NA |
| 103 | + # Define where each label was detected. For this, first |
| 104 | + # get from each label the minimal distance |
| 105 | + detections_idx <- apply(patterns_distances, 2, which.min) |
| 106 | + detections_name <- names(detections_idx) |
| 107 | + # For each label, extract the metadata where they had |
| 108 | + # minimal distance |
| 109 | + purrr::map_df(seq_len(length(detections_idx)), function(idx) { |
| 110 | + # Extract detection name and inced |
| 111 | + detection_name <- detections_name[idx] |
| 112 | + detection_idx <- detections_idx[idx] |
| 113 | + # Extract detection distance (min one defined above) |
| 114 | + detection_distance <- patterns_distances[detection_idx,] |
| 115 | + detection_distance <- detection_distance[detection_name] |
| 116 | + detection_distance <- as.numeric(detection_distance) |
| 117 | + # Extract detection dates |
| 118 | + detection_dates <- comparison_windows[[detection_idx]] |
| 119 | + # Prepare result and return it! |
| 120 | + tibble::tibble( |
| 121 | + change = detection_name, |
| 122 | + distance = detection_distance, |
| 123 | + from = detection_dates[["start"]], |
| 124 | + to = detection_dates[["end"]] |
| 125 | + ) |
| 126 | + }) |
55 | 127 | })
|
56 | 128 | }
|
0 commit comments