e-sensing · gilbertocamara · May 18, 2024 · May 18, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -148,6 +148,7 @@ Collate:
     'api_mosaic.R'
     'api_opensearch.R'
     'api_parallel.R'
+    'api_patterns.R'
     'api_period.R'
     'api_plot_time_series.R'
     'api_plot_raster.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -339,6 +339,7 @@ S3method(sits_cube,local_cube)
 S3method(sits_cube,sar_cube)
 S3method(sits_cube,stac_cube)
 S3method(sits_detect_change,default)
+S3method(sits_detect_change,raster_cube)
 S3method(sits_detect_change,sits)
 S3method(sits_get_data,csv)
 S3method(sits_get_data,data.frame)

diff --git a/R/api_detect_changes.R b/R/api_detect_changes.R
@@ -25,20 +25,211 @@
     # Divide samples in chunks to parallel processing
     parts <- .pred_create_partition(pred = samples, partitions = multicores)
     # Detect changes!
-    detections <- .jobs_map_parallel_dfr(parts, function(part) {
+    .jobs_map_parallel_dfr(parts, function(part) {
         # Get samples
         values <- .pred_part(part)
         # Detect changes! For detection, models can be time-aware. So, the
         # complete data, including dates, must be passed as argument.
-        detections <- cd_method(values[["time_series"]])
+        detections <- cd_method(values[["time_series"]], "ts")
         detections <- tibble::tibble(detections)
         # Prepare result
         result <- tibble::tibble(data.frame(values, detections = detections))
         class(result) <- class(values)
         # return
         result
-
     }, progress = progress)
+}
 
-    return(detections)
+#' @title Detect changes from a chunk of raster data using multicores
+#' @name .detect_change_tile
+#' @keywords internal
+#' @noRd
+#' @param  tile            Single tile of a data cube.
+#' @param  band            Band to be produced.
+#' @param  cd_method       Change Detection Model.
+#' @param  block           Optimized block to be read into memory.
+#' @param  roi             Region of interest.
+#' @param  filter_fn       Smoothing filter function to be applied to the data.
+#' @param  impute_fn       Imputation function.
+#' @param  output_dir      Output directory.
+#' @param  version         Version of result.
+#' @param  verbose         Print processing information?
+#' @param  progress        Show progress bar?
+#' @return List of the classified raster layers.
+.detect_change_tile <- function(tile,
+                           band,
+                           cd_method,
+                           block,
+                           roi,
+                           filter_fn,
+                           impute_fn,
+                           output_dir,
+                           version,
+                           verbose,
+                           progress) {
+    # Output file
+    out_file <- .file_derived_name(
+        tile = tile,
+        band = band,
+        version = version,
+        output_dir = output_dir
+    )
+    # Resume feature
+    if (file.exists(out_file)) {
+        if (.check_messages()) {
+            .check_recovery(out_file)
+        }
+        detected_changes_tile <- .tile_derived_from_file(
+            file = out_file,
+            band = band,
+            base_tile = tile,
+            labels = .ml_labels_code(cd_method),
+            derived_class = "detections_cube",
+            update_bbox = TRUE
+        )
+        return(detected_changes_tile)
+    }
+    # Show initial time for tile classification
+    tile_start_time <- .tile_classif_start(
+        tile = tile,
+        verbose = verbose
+    )
+    # Tile timeline
+    tile_timeline <- .tile_timeline(tile)
+    # Create chunks as jobs
+    chunks <- .tile_chunks_create(
+        tile = tile,
+        overlap = 0,
+        block = block
+    )
+    # By default, update_bbox is FALSE
+    update_bbox <- FALSE
+    if (.has(roi)) {
+        # How many chunks there are in tile?
+        nchunks <- nrow(chunks)
+        # Intersecting chunks with ROI
+        chunks <- .chunks_filter_spatial(
+            chunks = chunks,
+            roi = roi
+        )
+        # Should bbox of resulting tile be updated?
+        update_bbox <- nrow(chunks) != nchunks
+    }
+    # Process jobs in parallel
+    block_files <- .jobs_map_parallel_chr(chunks, function(chunk) {
+        # Job block
+        block <- .block(chunk)
+        # Block file name
+        block_file <- .file_block_name(
+            pattern = .file_pattern(out_file),
+            block = block,
+            output_dir = output_dir
+        )
+        # Resume processing in case of failure
+        if (.raster_is_valid(block_file)) {
+            return(block_file)
+        }
+        # Read and preprocess values
+        values <- .classify_data_read(
+            tile = tile,
+            block = block,
+            bands = .ml_bands(cd_method),
+            ml_model = cd_method,
+            impute_fn = impute_fn,
+            filter_fn = filter_fn
+        )
+        # Get mask of NA pixels
+        na_mask <- C_mask_na(values)
+        # Fill with zeros remaining NA pixels
+        values <- C_fill_na(values, 0)
+        # Used to check values (below)
+        n_input_pixels <- nrow(values)
+        # Include names in cube predictors
+        colnames(values) <- .pred_features_name(
+            .ml_bands(cd_method), tile_timeline
+        )
+        # Prepare values
+        values <- .pred_as_ts(values, .ml_bands(cd_method), tile_timeline) |>
+                    tidyr::nest(.by = "sample_id", .key = "time_series")
+        # Log here
+        .debug_log(
+            event = "start_block_data_detection",
+            key = "model",
+            value = .ml_class(cd_method)
+        )
+        # Detect changes!
+        values <- cd_method(values[["time_series"]], "cube") |>
+                    dplyr::as_tibble()
+        # Are the results consistent with the data input?
+        .check_processed_values(
+            values = values,
+            n_input_pixels = n_input_pixels
+        )
+        # Log here
+        .debug_log(
+            event = "end_block_data_detection",
+            key = "model",
+            value = .ml_class(cd_method)
+        )
+        # Prepare probability to be saved
+        band_conf <- .conf_derived_band(
+            derived_class = "detections_cube",
+            band = band
+        )
+        offset <- .offset(band_conf)
+        if (.has(offset) && offset != 0) {
+            values <- values - offset
+        }
+        scale <- .scale(band_conf)
+        if (.has(scale) && scale != 1) {
+            values <- values / scale
+        }
+        # Mask NA pixels with same probabilities for all classes
+        values[na_mask, ] <- 0  # event detection = 1, no event = 0
+        # Log
+        .debug_log(
+            event = "start_block_data_save",
+            key = "file",
+            value = block_file
+        )
+        # Prepare and save results as raster
+        .raster_write_block(
+            files = block_file,
+            block = block,
+            bbox = .bbox(chunk),
+            values = values,
+            data_type = .data_type(band_conf),
+            missing_value = .miss_value(band_conf),
+            crop_block = NULL
+        )
+        # Log
+        .debug_log(
+            event = "end_block_data_save",
+            key = "file",
+            value = block_file
+        )
+        # Free memory
+        gc()
+        # Returned block file
+        block_file
+    }, progress = progress)
+    # Merge blocks into a new detections_cube tile
+    detections_tile <- .tile_derived_merge_blocks(
+        file = out_file,
+        band = band,
+        labels = .ml_labels_code(cd_method),
+        base_tile = tile,
+        block_files = block_files,
+        derived_class = "detections_cube",
+        multicores = .jobs_multicores(),
+        update_bbox = update_bbox
+    )
+    # show final time for detection
+    .tile_classif_end(
+        tile = tile,
+        start_time = tile_start_time,
+        verbose = verbose
+    )
+    # Return detections tile
+    detections_tile
 }
diff --git a/R/api_dtw.R b/R/api_dtw.R
@@ -1,38 +1,37 @@
-
-#' @title Extract temporal pattern of samples using temporal median.
-#' @name .pattern_temporal_median
+# ---- Distances ----
+#' @title Calculate the DTW distance between temporal patterns and time-series.
+#' @name .dtw_distance
+#' @description This function calculates the DTW distance between label patterns
+#' and real data (e.g., sample data, data cube data). The distance is calculated
+#' without a window. It's use is recommended for big datasets.
 #' @keywords internal
 #' @noRd
-.pattern_temporal_median <- function(samples) {
-    samples |>
-        dplyr::group_by(.data[["label"]]) |>
-        dplyr::group_map(function(data, name) {
-            ts_median <- dplyr::bind_rows(data[["time_series"]]) |>
-                dplyr::group_by(.data[["Index"]]) |>
-                dplyr::summarize(dplyr::across(dplyr::everything(),
-                                               stats::median, na.rm = TRUE)) |>
-                dplyr::select(-.data[["Index"]])
-
-            ts_median["label"] <- name
-            ts_median
-        })
+.dtw_distance <- function(data, patterns) {
+    # Prepare input data
+    data <- as.matrix(.ts_values(data))
+    # Calculate the DTW distance between `data` and `patterns`
+    purrr::map_dfc(patterns, function(pattern) {
+        # Prepare pattern data
+        pattern_ts <- as.matrix(.ts_values(pattern))
+        # Calculate distance
+        stats::setNames(
+            data.frame(distance = dtw_distance(data, pattern_ts)),
+            pattern[["label"]][[1]]
+        )
+    })
 }
-
-#' @title Calculate the DTW distance between label patterns and sample data.
-#' @name .pattern_distance_dtw
+#' @title Calculate the DTW distance between temporal patterns and time-series.
+#' @name .dtw_distance_windowed
 #' @description This function calculates the DTW distance between label patterns
-#' and sample data in a given temporal window.
+#' and real data (e.g., sample data, data cube data). The distance is calculated
+#' using windows.
 #' @keywords internal
 #' @noRd
-.pattern_distance_dtw <- function(data, patterns, windows) {
+.dtw_distance_windowed <- function(data, patterns, windows) {
     # Calculate the DTW distance between `data` and `patterns`
-    purrr::map_dfc(1:length(patterns), function(pattern_index) {
-        # Get pattern metadata
-        pattern <- patterns[pattern_index][[1]]
-        pattern_label <- unique(pattern[["label"]])
+    purrr::map_dfc(patterns, function(pattern) {
         # Get pattern data
-        pattern_ts <- dplyr::select(pattern, -.data[["label"]])
-        pattern_ts <- as.matrix(pattern_ts)
+        pattern_ts <- as.matrix(.ts_values(pattern))
         # Windowed search
         distances <- purrr::map_df(windows, function(window) {
             # Get time-series in the window
@@ -41,16 +40,89 @@
                               .data[["Index"]] >= window[["start"]],
                               .data[["Index"]] <= window[["end"]])
             # Remove the time reference column
-            data_in_window <- dplyr::select(data_in_window, -.data[["Index"]])
-            # Transform values in matrix (as expected in the cpp code)
-            data_in_window <- as.matrix(data_in_window)
-            data_in_window <- data_in_window
+            data_in_window <- as.matrix(.ts_values(data_in_window))
             # Calculate distance
-            distance_from_pattern <- dtw_distance(data_in_window, pattern_ts)
-            # Prepare result and return it
-            data.frame(distance = distance_from_pattern)
+            data.frame(distance = dtw_distance(data_in_window, pattern_ts))
         })
         # Associate the pattern name with the distances
-        stats::setNames(distances, pattern_label)
+        stats::setNames(distances, pattern[["label"]][[1]])
+    })
+}
+# ---- Operation mode ----
+#' @title Search for events in time series using complete data (no windowing).
+#' @name .dtw_complete_ts
+#' @description This function searches for events in time series without
+#' windowing.
+#' @keywords internal
+#' @noRd
+.dtw_complete_ts <- function(values, patterns, threshold, ...) {
+    # Do the change detection for each time-series
+    purrr::map_vec(values, function(value_row) {
+        # Search for the patterns
+        patterns_distances <- .dtw_distance(
+            data       = value_row,
+            patterns   = patterns
+        )
+        # Remove distances out the user-defined threshold
+        as.numeric(any(patterns_distances <= threshold))
+    })
+}
+#' @title Search for events in time series using windowing.
+#' @name .dtw_windowed_ts
+#' @description This function searches for events in time series with windowing.
+#' @keywords internal
+#' @noRd
+.dtw_windowed_ts <- function(values, patterns, window, threshold) {
+    # Extract dates
+    dates_min  <- .ts_min_date(values[[1]])
+    dates_max  <- .ts_max_date(values[[1]])
+    # Assume time-series are regularized, then use the period
+    # as the step of the moving window. As a result, we have
+    # one step per iteration.
+    dates_step <- lubridate::as.period(
+        lubridate::int_diff(.ts_index(values[[1]]))
+    )
+    dates_step <- dates_step[[1]]
+    # Create comparison windows
+    comparison_windows <- .period_windows(
+        period = window,
+        step = dates_step,
+        start_date = dates_min,
+        end_date = dates_max
+    )
+    # Do the change detection for each time-series
+    purrr::map(values, function(value_row) {
+        # Search for the patterns
+        patterns_distances <- .dtw_distance_windowed(
+            data       = value_row,
+            patterns   = patterns,
+            windows    = comparison_windows
+        )
+        # Remove distances out the user-defined threshold
+        patterns_distances[patterns_distances > threshold] <- NA
+        # Define where each label was detected. For this, first
+        # get from each label the minimal distance
+        detections_idx <- apply(patterns_distances, 2, which.min)
+        detections_name <- names(detections_idx)
+        # For each label, extract the metadata where they had
+        # minimal distance
+        purrr::map_df(seq_len(length(detections_idx)), function(idx) {
+            # Extract detection name and inced
+            detection_name <- detections_name[idx]
+            detection_idx <- detections_idx[idx]
+            # Extract detection distance (min one defined above)
+            detection_distance <- patterns_distances[detection_idx,]
+            detection_distance <- detection_distance[detection_name]
+            detection_distance <- as.numeric(detection_distance)
+            # Extract detection dates
+            detection_dates <- comparison_windows[[detection_idx]]
+            # Prepare result and return it!
+            tibble::tibble(
+                change = detection_name,
+                distance = detection_distance,
+                from = detection_dates[["start"]],
+                to = detection_dates[["end"]]
+            )
+        })
     })
 }