Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

detections api: include raster cube support #1130

Merged
merged 1 commit into from
May 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ Collate:
'api_mosaic.R'
'api_opensearch.R'
'api_parallel.R'
'api_patterns.R'
'api_period.R'
'api_plot_time_series.R'
'api_plot_raster.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ S3method(sits_cube,local_cube)
S3method(sits_cube,sar_cube)
S3method(sits_cube,stac_cube)
S3method(sits_detect_change,default)
S3method(sits_detect_change,raster_cube)
S3method(sits_detect_change,sits)
S3method(sits_get_data,csv)
S3method(sits_get_data,data.frame)
Expand Down
199 changes: 195 additions & 4 deletions R/api_detect_changes.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,211 @@
# Divide samples in chunks to parallel processing
parts <- .pred_create_partition(pred = samples, partitions = multicores)
# Detect changes!
detections <- .jobs_map_parallel_dfr(parts, function(part) {
.jobs_map_parallel_dfr(parts, function(part) {
# Get samples
values <- .pred_part(part)
# Detect changes! For detection, models can be time-aware. So, the
# complete data, including dates, must be passed as argument.
detections <- cd_method(values[["time_series"]])
detections <- cd_method(values[["time_series"]], "ts")
detections <- tibble::tibble(detections)
# Prepare result
result <- tibble::tibble(data.frame(values, detections = detections))
class(result) <- class(values)
# return
result

}, progress = progress)
}

return(detections)
#' @title Detect changes from a chunk of raster data using multicores
#' @name .detect_change_tile
#' @keywords internal
#' @noRd
#' @param tile Single tile of a data cube.
#' @param band Band to be produced.
#' @param cd_method Change Detection Model.
#' @param block Optimized block to be read into memory.
#' @param roi Region of interest.
#' @param filter_fn Smoothing filter function to be applied to the data.
#' @param impute_fn Imputation function.
#' @param output_dir Output directory.
#' @param version Version of result.
#' @param verbose Print processing information?
#' @param progress Show progress bar?
#' @return List of the classified raster layers.
.detect_change_tile <- function(tile,
band,
cd_method,
block,
roi,
filter_fn,
impute_fn,
output_dir,
version,
verbose,
progress) {
# Output file
out_file <- .file_derived_name(
tile = tile,
band = band,
version = version,
output_dir = output_dir
)
# Resume feature
if (file.exists(out_file)) {
if (.check_messages()) {
.check_recovery(out_file)
}
detected_changes_tile <- .tile_derived_from_file(
file = out_file,
band = band,
base_tile = tile,
labels = .ml_labels_code(cd_method),
derived_class = "detections_cube",
update_bbox = TRUE
)
return(detected_changes_tile)
}
# Show initial time for tile classification
tile_start_time <- .tile_classif_start(
tile = tile,
verbose = verbose
)
# Tile timeline
tile_timeline <- .tile_timeline(tile)
# Create chunks as jobs
chunks <- .tile_chunks_create(
tile = tile,
overlap = 0,
block = block
)
# By default, update_bbox is FALSE
update_bbox <- FALSE
if (.has(roi)) {
# How many chunks there are in tile?
nchunks <- nrow(chunks)
# Intersecting chunks with ROI
chunks <- .chunks_filter_spatial(
chunks = chunks,
roi = roi
)
# Should bbox of resulting tile be updated?
update_bbox <- nrow(chunks) != nchunks
}
# Process jobs in parallel
block_files <- .jobs_map_parallel_chr(chunks, function(chunk) {
# Job block
block <- .block(chunk)
# Block file name
block_file <- .file_block_name(
pattern = .file_pattern(out_file),
block = block,
output_dir = output_dir
)
# Resume processing in case of failure
if (.raster_is_valid(block_file)) {
return(block_file)
}
# Read and preprocess values
values <- .classify_data_read(
tile = tile,
block = block,
bands = .ml_bands(cd_method),
ml_model = cd_method,
impute_fn = impute_fn,
filter_fn = filter_fn
)
# Get mask of NA pixels
na_mask <- C_mask_na(values)
# Fill with zeros remaining NA pixels
values <- C_fill_na(values, 0)
# Used to check values (below)
n_input_pixels <- nrow(values)
# Include names in cube predictors
colnames(values) <- .pred_features_name(
.ml_bands(cd_method), tile_timeline
)
# Prepare values
values <- .pred_as_ts(values, .ml_bands(cd_method), tile_timeline) |>
tidyr::nest(.by = "sample_id", .key = "time_series")
# Log here
.debug_log(
event = "start_block_data_detection",
key = "model",
value = .ml_class(cd_method)
)
# Detect changes!
values <- cd_method(values[["time_series"]], "cube") |>
dplyr::as_tibble()
# Are the results consistent with the data input?
.check_processed_values(
values = values,
n_input_pixels = n_input_pixels
)
# Log here
.debug_log(
event = "end_block_data_detection",
key = "model",
value = .ml_class(cd_method)
)
# Prepare probability to be saved
band_conf <- .conf_derived_band(
derived_class = "detections_cube",
band = band
)
offset <- .offset(band_conf)
if (.has(offset) && offset != 0) {
values <- values - offset
}
scale <- .scale(band_conf)
if (.has(scale) && scale != 1) {
values <- values / scale
}
# Mask NA pixels with same probabilities for all classes
values[na_mask, ] <- 0 # event detection = 1, no event = 0
# Log
.debug_log(
event = "start_block_data_save",
key = "file",
value = block_file
)
# Prepare and save results as raster
.raster_write_block(
files = block_file,
block = block,
bbox = .bbox(chunk),
values = values,
data_type = .data_type(band_conf),
missing_value = .miss_value(band_conf),
crop_block = NULL
)
# Log
.debug_log(
event = "end_block_data_save",
key = "file",
value = block_file
)
# Free memory
gc()
# Returned block file
block_file
}, progress = progress)
# Merge blocks into a new detections_cube tile
detections_tile <- .tile_derived_merge_blocks(
file = out_file,
band = band,
labels = .ml_labels_code(cd_method),
base_tile = tile,
block_files = block_files,
derived_class = "detections_cube",
multicores = .jobs_multicores(),
update_bbox = update_bbox
)
# show final time for detection
.tile_classif_end(
tile = tile,
start_time = tile_start_time,
verbose = verbose
)
# Return detections tile
detections_tile
}
142 changes: 107 additions & 35 deletions R/api_dtw.R
Original file line number Diff line number Diff line change
@@ -1,38 +1,37 @@

#' @title Extract temporal pattern of samples using temporal median.
#' @name .pattern_temporal_median
# ---- Distances ----
#' @title Calculate the DTW distance between temporal patterns and time-series.
#' @name .dtw_distance
#' @description This function calculates the DTW distance between label patterns
#' and real data (e.g., sample data, data cube data). The distance is calculated
#' without a window. It's use is recommended for big datasets.
#' @keywords internal
#' @noRd
.pattern_temporal_median <- function(samples) {
samples |>
dplyr::group_by(.data[["label"]]) |>
dplyr::group_map(function(data, name) {
ts_median <- dplyr::bind_rows(data[["time_series"]]) |>
dplyr::group_by(.data[["Index"]]) |>
dplyr::summarize(dplyr::across(dplyr::everything(),
stats::median, na.rm = TRUE)) |>
dplyr::select(-.data[["Index"]])

ts_median["label"] <- name
ts_median
})
.dtw_distance <- function(data, patterns) {
# Prepare input data
data <- as.matrix(.ts_values(data))
# Calculate the DTW distance between `data` and `patterns`
purrr::map_dfc(patterns, function(pattern) {
# Prepare pattern data
pattern_ts <- as.matrix(.ts_values(pattern))
# Calculate distance
stats::setNames(
data.frame(distance = dtw_distance(data, pattern_ts)),
pattern[["label"]][[1]]
)
})
}

#' @title Calculate the DTW distance between label patterns and sample data.
#' @name .pattern_distance_dtw
#' @title Calculate the DTW distance between temporal patterns and time-series.
#' @name .dtw_distance_windowed
#' @description This function calculates the DTW distance between label patterns
#' and sample data in a given temporal window.
#' and real data (e.g., sample data, data cube data). The distance is calculated
#' using windows.
#' @keywords internal
#' @noRd
.pattern_distance_dtw <- function(data, patterns, windows) {
.dtw_distance_windowed <- function(data, patterns, windows) {
# Calculate the DTW distance between `data` and `patterns`
purrr::map_dfc(1:length(patterns), function(pattern_index) {
# Get pattern metadata
pattern <- patterns[pattern_index][[1]]
pattern_label <- unique(pattern[["label"]])
purrr::map_dfc(patterns, function(pattern) {
# Get pattern data
pattern_ts <- dplyr::select(pattern, -.data[["label"]])
pattern_ts <- as.matrix(pattern_ts)
pattern_ts <- as.matrix(.ts_values(pattern))
# Windowed search
distances <- purrr::map_df(windows, function(window) {
# Get time-series in the window
Expand All @@ -41,16 +40,89 @@
.data[["Index"]] >= window[["start"]],
.data[["Index"]] <= window[["end"]])
# Remove the time reference column
data_in_window <- dplyr::select(data_in_window, -.data[["Index"]])
# Transform values in matrix (as expected in the cpp code)
data_in_window <- as.matrix(data_in_window)
data_in_window <- data_in_window
data_in_window <- as.matrix(.ts_values(data_in_window))
# Calculate distance
distance_from_pattern <- dtw_distance(data_in_window, pattern_ts)
# Prepare result and return it
data.frame(distance = distance_from_pattern)
data.frame(distance = dtw_distance(data_in_window, pattern_ts))
})
# Associate the pattern name with the distances
stats::setNames(distances, pattern_label)
stats::setNames(distances, pattern[["label"]][[1]])
})
}
# ---- Operation mode ----
#' @title Search for events in time series using complete data (no windowing).
#' @name .dtw_complete_ts
#' @description This function searches for events in time series without
#' windowing.
#' @keywords internal
#' @noRd
.dtw_complete_ts <- function(values, patterns, threshold, ...) {
# Do the change detection for each time-series
purrr::map_vec(values, function(value_row) {
# Search for the patterns
patterns_distances <- .dtw_distance(
data = value_row,
patterns = patterns
)
# Remove distances out the user-defined threshold
as.numeric(any(patterns_distances <= threshold))
})
}
#' @title Search for events in time series using windowing.
#' @name .dtw_windowed_ts
#' @description This function searches for events in time series with windowing.
#' @keywords internal
#' @noRd
.dtw_windowed_ts <- function(values, patterns, window, threshold) {
# Extract dates
dates_min <- .ts_min_date(values[[1]])
dates_max <- .ts_max_date(values[[1]])
# Assume time-series are regularized, then use the period
# as the step of the moving window. As a result, we have
# one step per iteration.
dates_step <- lubridate::as.period(
lubridate::int_diff(.ts_index(values[[1]]))
)
dates_step <- dates_step[[1]]
# Create comparison windows
comparison_windows <- .period_windows(
period = window,
step = dates_step,
start_date = dates_min,
end_date = dates_max
)
# Do the change detection for each time-series
purrr::map(values, function(value_row) {
# Search for the patterns
patterns_distances <- .dtw_distance_windowed(
data = value_row,
patterns = patterns,
windows = comparison_windows
)
# Remove distances out the user-defined threshold
patterns_distances[patterns_distances > threshold] <- NA
# Define where each label was detected. For this, first
# get from each label the minimal distance
detections_idx <- apply(patterns_distances, 2, which.min)
detections_name <- names(detections_idx)
# For each label, extract the metadata where they had
# minimal distance
purrr::map_df(seq_len(length(detections_idx)), function(idx) {
# Extract detection name and inced
detection_name <- detections_name[idx]
detection_idx <- detections_idx[idx]
# Extract detection distance (min one defined above)
detection_distance <- patterns_distances[detection_idx,]
detection_distance <- detection_distance[detection_name]
detection_distance <- as.numeric(detection_distance)
# Extract detection dates
detection_dates <- comparison_windows[[detection_idx]]
# Prepare result and return it!
tibble::tibble(
change = detection_name,
distance = detection_distance,
from = detection_dates[["start"]],
to = detection_dates[["end"]]
)
})
})
}
Loading
Loading