|
1 | | - |
2 | | -#' @title Extract temporal pattern of samples using temporal median. |
3 | | -#' @name .pattern_temporal_median |
| 1 | +# ---- Distances ---- |
| 2 | +#' @title Calculate the DTW distance between temporal patterns and time-series. |
| 3 | +#' @name .dtw_distance |
| 4 | +#' @description This function calculates the DTW distance between label patterns |
| 5 | +#' and real data (e.g., sample data, data cube data). The distance is calculated |
| 6 | +#' without a window. It's use is recommended for big datasets. |
4 | 7 | #' @keywords internal |
5 | 8 | #' @noRd |
6 | | -.pattern_temporal_median <- function(samples) { |
7 | | - samples |> |
8 | | - dplyr::group_by(.data[["label"]]) |> |
9 | | - dplyr::group_map(function(data, name) { |
10 | | - ts_median <- dplyr::bind_rows(data[["time_series"]]) |> |
11 | | - dplyr::group_by(.data[["Index"]]) |> |
12 | | - dplyr::summarize(dplyr::across(dplyr::everything(), |
13 | | - stats::median, na.rm = TRUE)) |> |
14 | | - dplyr::select(-.data[["Index"]]) |
15 | | - |
16 | | - ts_median["label"] <- name |
17 | | - ts_median |
18 | | - }) |
| 9 | +.dtw_distance <- function(data, patterns) { |
| 10 | + # Prepare input data |
| 11 | + data <- as.matrix(.ts_values(data)) |
| 12 | + # Calculate the DTW distance between `data` and `patterns` |
| 13 | + purrr::map_dfc(patterns, function(pattern) { |
| 14 | + # Prepare pattern data |
| 15 | + pattern_ts <- as.matrix(.ts_values(pattern)) |
| 16 | + # Calculate distance |
| 17 | + stats::setNames( |
| 18 | + data.frame(distance = dtw_distance(data, pattern_ts)), |
| 19 | + pattern[["label"]][[1]] |
| 20 | + ) |
| 21 | + }) |
19 | 22 | } |
20 | | - |
21 | | -#' @title Calculate the DTW distance between label patterns and sample data. |
22 | | -#' @name .pattern_distance_dtw |
| 23 | +#' @title Calculate the DTW distance between temporal patterns and time-series. |
| 24 | +#' @name .dtw_distance_windowed |
23 | 25 | #' @description This function calculates the DTW distance between label patterns |
24 | | -#' and sample data in a given temporal window. |
| 26 | +#' and real data (e.g., sample data, data cube data). The distance is calculated |
| 27 | +#' using windows. |
25 | 28 | #' @keywords internal |
26 | 29 | #' @noRd |
27 | | -.pattern_distance_dtw <- function(data, patterns, windows) { |
| 30 | +.dtw_distance_windowed <- function(data, patterns, windows) { |
28 | 31 | # Calculate the DTW distance between `data` and `patterns` |
29 | | - purrr::map_dfc(1:length(patterns), function(pattern_index) { |
30 | | - # Get pattern metadata |
31 | | - pattern <- patterns[pattern_index][[1]] |
32 | | - pattern_label <- unique(pattern[["label"]]) |
| 32 | + purrr::map_dfc(patterns, function(pattern) { |
33 | 33 | # Get pattern data |
34 | | - pattern_ts <- dplyr::select(pattern, -.data[["label"]]) |
35 | | - pattern_ts <- as.matrix(pattern_ts) |
| 34 | + pattern_ts <- as.matrix(.ts_values(pattern)) |
36 | 35 | # Windowed search |
37 | 36 | distances <- purrr::map_df(windows, function(window) { |
38 | 37 | # Get time-series in the window |
|
41 | 40 | .data[["Index"]] >= window[["start"]], |
42 | 41 | .data[["Index"]] <= window[["end"]]) |
43 | 42 | # Remove the time reference column |
44 | | - data_in_window <- dplyr::select(data_in_window, -.data[["Index"]]) |
45 | | - # Transform values in matrix (as expected in the cpp code) |
46 | | - data_in_window <- as.matrix(data_in_window) |
47 | | - data_in_window <- data_in_window |
| 43 | + data_in_window <- as.matrix(.ts_values(data_in_window)) |
48 | 44 | # Calculate distance |
49 | | - distance_from_pattern <- dtw_distance(data_in_window, pattern_ts) |
50 | | - # Prepare result and return it |
51 | | - data.frame(distance = distance_from_pattern) |
| 45 | + data.frame(distance = dtw_distance(data_in_window, pattern_ts)) |
52 | 46 | }) |
53 | 47 | # Associate the pattern name with the distances |
54 | | - stats::setNames(distances, pattern_label) |
| 48 | + stats::setNames(distances, pattern[["label"]][[1]]) |
| 49 | + }) |
| 50 | +} |
| 51 | +# ---- Operation mode ---- |
| 52 | +#' @title Search for events in time series using complete data (no windowing). |
| 53 | +#' @name .dtw_complete_ts |
| 54 | +#' @description This function searches for events in time series without |
| 55 | +#' windowing. |
| 56 | +#' @keywords internal |
| 57 | +#' @noRd |
| 58 | +.dtw_complete_ts <- function(values, patterns, threshold, ...) { |
| 59 | + # Do the change detection for each time-series |
| 60 | + purrr::map_vec(values, function(value_row) { |
| 61 | + # Search for the patterns |
| 62 | + patterns_distances <- .dtw_distance( |
| 63 | + data = value_row, |
| 64 | + patterns = patterns |
| 65 | + ) |
| 66 | + # Remove distances out the user-defined threshold |
| 67 | + as.numeric(any(patterns_distances <= threshold)) |
| 68 | + }) |
| 69 | +} |
| 70 | +#' @title Search for events in time series using windowing. |
| 71 | +#' @name .dtw_windowed_ts |
| 72 | +#' @description This function searches for events in time series with windowing. |
| 73 | +#' @keywords internal |
| 74 | +#' @noRd |
| 75 | +.dtw_windowed_ts <- function(values, patterns, window, threshold) { |
| 76 | + # Extract dates |
| 77 | + dates_min <- .ts_min_date(values[[1]]) |
| 78 | + dates_max <- .ts_max_date(values[[1]]) |
| 79 | + # Assume time-series are regularized, then use the period |
| 80 | + # as the step of the moving window. As a result, we have |
| 81 | + # one step per iteration. |
| 82 | + dates_step <- lubridate::as.period( |
| 83 | + lubridate::int_diff(.ts_index(values[[1]])) |
| 84 | + ) |
| 85 | + dates_step <- dates_step[[1]] |
| 86 | + # Create comparison windows |
| 87 | + comparison_windows <- .period_windows( |
| 88 | + period = window, |
| 89 | + step = dates_step, |
| 90 | + start_date = dates_min, |
| 91 | + end_date = dates_max |
| 92 | + ) |
| 93 | + # Do the change detection for each time-series |
| 94 | + purrr::map(values, function(value_row) { |
| 95 | + # Search for the patterns |
| 96 | + patterns_distances <- .dtw_distance_windowed( |
| 97 | + data = value_row, |
| 98 | + patterns = patterns, |
| 99 | + windows = comparison_windows |
| 100 | + ) |
| 101 | + # Remove distances out the user-defined threshold |
| 102 | + patterns_distances[patterns_distances > threshold] <- NA |
| 103 | + # Define where each label was detected. For this, first |
| 104 | + # get from each label the minimal distance |
| 105 | + detections_idx <- apply(patterns_distances, 2, which.min) |
| 106 | + detections_name <- names(detections_idx) |
| 107 | + # For each label, extract the metadata where they had |
| 108 | + # minimal distance |
| 109 | + purrr::map_df(seq_len(length(detections_idx)), function(idx) { |
| 110 | + # Extract detection name and inced |
| 111 | + detection_name <- detections_name[idx] |
| 112 | + detection_idx <- detections_idx[idx] |
| 113 | + # Extract detection distance (min one defined above) |
| 114 | + detection_distance <- patterns_distances[detection_idx,] |
| 115 | + detection_distance <- detection_distance[detection_name] |
| 116 | + detection_distance <- as.numeric(detection_distance) |
| 117 | + # Extract detection dates |
| 118 | + detection_dates <- comparison_windows[[detection_idx]] |
| 119 | + # Prepare result and return it! |
| 120 | + tibble::tibble( |
| 121 | + change = detection_name, |
| 122 | + distance = detection_distance, |
| 123 | + from = detection_dates[["start"]], |
| 124 | + to = detection_dates[["end"]] |
| 125 | + ) |
| 126 | + }) |
55 | 127 | }) |
56 | 128 | } |
0 commit comments