Skip to content

Commit ee6ecb4

Browse files
committed
include raster support in the detections api
1 parent a4d7211 commit ee6ecb4

12 files changed

+575
-122
lines changed

DESCRIPTION

+1
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ Collate:
148148
'api_mosaic.R'
149149
'api_opensearch.R'
150150
'api_parallel.R'
151+
'api_patterns.R'
151152
'api_period.R'
152153
'api_plot_time_series.R'
153154
'api_plot_raster.R'

NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ S3method(sits_cube,local_cube)
339339
S3method(sits_cube,sar_cube)
340340
S3method(sits_cube,stac_cube)
341341
S3method(sits_detect_change,default)
342+
S3method(sits_detect_change,raster_cube)
342343
S3method(sits_detect_change,sits)
343344
S3method(sits_get_data,csv)
344345
S3method(sits_get_data,data.frame)

R/api_detect_changes.R

+195-4
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,211 @@
2525
# Divide samples in chunks to parallel processing
2626
parts <- .pred_create_partition(pred = samples, partitions = multicores)
2727
# Detect changes!
28-
detections <- .jobs_map_parallel_dfr(parts, function(part) {
28+
.jobs_map_parallel_dfr(parts, function(part) {
2929
# Get samples
3030
values <- .pred_part(part)
3131
# Detect changes! For detection, models can be time-aware. So, the
3232
# complete data, including dates, must be passed as argument.
33-
detections <- cd_method(values[["time_series"]])
33+
detections <- cd_method(values[["time_series"]], "ts")
3434
detections <- tibble::tibble(detections)
3535
# Prepare result
3636
result <- tibble::tibble(data.frame(values, detections = detections))
3737
class(result) <- class(values)
3838
# return
3939
result
40-
4140
}, progress = progress)
41+
}
4242

43-
return(detections)
43+
#' @title Detect changes from a chunk of raster data using multicores
44+
#' @name .detect_change_tile
45+
#' @keywords internal
46+
#' @noRd
47+
#' @param tile Single tile of a data cube.
48+
#' @param band Band to be produced.
49+
#' @param cd_method Change Detection Model.
50+
#' @param block Optimized block to be read into memory.
51+
#' @param roi Region of interest.
52+
#' @param filter_fn Smoothing filter function to be applied to the data.
53+
#' @param impute_fn Imputation function.
54+
#' @param output_dir Output directory.
55+
#' @param version Version of result.
56+
#' @param verbose Print processing information?
57+
#' @param progress Show progress bar?
58+
#' @return List of the classified raster layers.
59+
.detect_change_tile <- function(tile,
60+
band,
61+
cd_method,
62+
block,
63+
roi,
64+
filter_fn,
65+
impute_fn,
66+
output_dir,
67+
version,
68+
verbose,
69+
progress) {
70+
# Output file
71+
out_file <- .file_derived_name(
72+
tile = tile,
73+
band = band,
74+
version = version,
75+
output_dir = output_dir
76+
)
77+
# Resume feature
78+
if (file.exists(out_file)) {
79+
if (.check_messages()) {
80+
.check_recovery(out_file)
81+
}
82+
detected_changes_tile <- .tile_derived_from_file(
83+
file = out_file,
84+
band = band,
85+
base_tile = tile,
86+
labels = .ml_labels_code(cd_method),
87+
derived_class = "detections_cube",
88+
update_bbox = TRUE
89+
)
90+
return(detected_changes_tile)
91+
}
92+
# Show initial time for tile classification
93+
tile_start_time <- .tile_classif_start(
94+
tile = tile,
95+
verbose = verbose
96+
)
97+
# Tile timeline
98+
tile_timeline <- .tile_timeline(tile)
99+
# Create chunks as jobs
100+
chunks <- .tile_chunks_create(
101+
tile = tile,
102+
overlap = 0,
103+
block = block
104+
)
105+
# By default, update_bbox is FALSE
106+
update_bbox <- FALSE
107+
if (.has(roi)) {
108+
# How many chunks there are in tile?
109+
nchunks <- nrow(chunks)
110+
# Intersecting chunks with ROI
111+
chunks <- .chunks_filter_spatial(
112+
chunks = chunks,
113+
roi = roi
114+
)
115+
# Should bbox of resulting tile be updated?
116+
update_bbox <- nrow(chunks) != nchunks
117+
}
118+
# Process jobs in parallel
119+
block_files <- .jobs_map_parallel_chr(chunks, function(chunk) {
120+
# Job block
121+
block <- .block(chunk)
122+
# Block file name
123+
block_file <- .file_block_name(
124+
pattern = .file_pattern(out_file),
125+
block = block,
126+
output_dir = output_dir
127+
)
128+
# Resume processing in case of failure
129+
if (.raster_is_valid(block_file)) {
130+
return(block_file)
131+
}
132+
# Read and preprocess values
133+
values <- .classify_data_read(
134+
tile = tile,
135+
block = block,
136+
bands = .ml_bands(cd_method),
137+
ml_model = cd_method,
138+
impute_fn = impute_fn,
139+
filter_fn = filter_fn
140+
)
141+
# Get mask of NA pixels
142+
na_mask <- C_mask_na(values)
143+
# Fill with zeros remaining NA pixels
144+
values <- C_fill_na(values, 0)
145+
# Used to check values (below)
146+
n_input_pixels <- nrow(values)
147+
# Include names in cube predictors
148+
colnames(values) <- .pred_features_name(
149+
.ml_bands(cd_method), tile_timeline
150+
)
151+
# Prepare values
152+
values <- .pred_as_ts(values, .ml_bands(cd_method), tile_timeline) |>
153+
tidyr::nest(.by = "sample_id", .key = "time_series")
154+
# Log here
155+
.debug_log(
156+
event = "start_block_data_detection",
157+
key = "model",
158+
value = .ml_class(cd_method)
159+
)
160+
# Detect changes!
161+
values <- cd_method(values[["time_series"]], "cube") |>
162+
dplyr::as_tibble()
163+
# Are the results consistent with the data input?
164+
.check_processed_values(
165+
values = values,
166+
n_input_pixels = n_input_pixels
167+
)
168+
# Log here
169+
.debug_log(
170+
event = "end_block_data_detection",
171+
key = "model",
172+
value = .ml_class(cd_method)
173+
)
174+
# Prepare probability to be saved
175+
band_conf <- .conf_derived_band(
176+
derived_class = "detections_cube",
177+
band = band
178+
)
179+
offset <- .offset(band_conf)
180+
if (.has(offset) && offset != 0) {
181+
values <- values - offset
182+
}
183+
scale <- .scale(band_conf)
184+
if (.has(scale) && scale != 1) {
185+
values <- values / scale
186+
}
187+
# Mask NA pixels with same probabilities for all classes
188+
values[na_mask, ] <- 0 # event detection = 1, no event = 0
189+
# Log
190+
.debug_log(
191+
event = "start_block_data_save",
192+
key = "file",
193+
value = block_file
194+
)
195+
# Prepare and save results as raster
196+
.raster_write_block(
197+
files = block_file,
198+
block = block,
199+
bbox = .bbox(chunk),
200+
values = values,
201+
data_type = .data_type(band_conf),
202+
missing_value = .miss_value(band_conf),
203+
crop_block = NULL
204+
)
205+
# Log
206+
.debug_log(
207+
event = "end_block_data_save",
208+
key = "file",
209+
value = block_file
210+
)
211+
# Free memory
212+
gc()
213+
# Returned block file
214+
block_file
215+
}, progress = progress)
216+
# Merge blocks into a new detections_cube tile
217+
detections_tile <- .tile_derived_merge_blocks(
218+
file = out_file,
219+
band = band,
220+
labels = .ml_labels_code(cd_method),
221+
base_tile = tile,
222+
block_files = block_files,
223+
derived_class = "detections_cube",
224+
multicores = .jobs_multicores(),
225+
update_bbox = update_bbox
226+
)
227+
# show final time for detection
228+
.tile_classif_end(
229+
tile = tile,
230+
start_time = tile_start_time,
231+
verbose = verbose
232+
)
233+
# Return detections tile
234+
detections_tile
44235
}

R/api_dtw.R

+107-35
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,37 @@
1-
2-
#' @title Extract temporal pattern of samples using temporal median.
3-
#' @name .pattern_temporal_median
1+
# ---- Distances ----
2+
#' @title Calculate the DTW distance between temporal patterns and time-series.
3+
#' @name .dtw_distance
4+
#' @description This function calculates the DTW distance between label patterns
5+
#' and real data (e.g., sample data, data cube data). The distance is calculated
6+
#' without a window. It's use is recommended for big datasets.
47
#' @keywords internal
58
#' @noRd
6-
.pattern_temporal_median <- function(samples) {
7-
samples |>
8-
dplyr::group_by(.data[["label"]]) |>
9-
dplyr::group_map(function(data, name) {
10-
ts_median <- dplyr::bind_rows(data[["time_series"]]) |>
11-
dplyr::group_by(.data[["Index"]]) |>
12-
dplyr::summarize(dplyr::across(dplyr::everything(),
13-
stats::median, na.rm = TRUE)) |>
14-
dplyr::select(-.data[["Index"]])
15-
16-
ts_median["label"] <- name
17-
ts_median
18-
})
9+
.dtw_distance <- function(data, patterns) {
10+
# Prepare input data
11+
data <- as.matrix(.ts_values(data))
12+
# Calculate the DTW distance between `data` and `patterns`
13+
purrr::map_dfc(patterns, function(pattern) {
14+
# Prepare pattern data
15+
pattern_ts <- as.matrix(.ts_values(pattern))
16+
# Calculate distance
17+
stats::setNames(
18+
data.frame(distance = dtw_distance(data, pattern_ts)),
19+
pattern[["label"]][[1]]
20+
)
21+
})
1922
}
20-
21-
#' @title Calculate the DTW distance between label patterns and sample data.
22-
#' @name .pattern_distance_dtw
23+
#' @title Calculate the DTW distance between temporal patterns and time-series.
24+
#' @name .dtw_distance_windowed
2325
#' @description This function calculates the DTW distance between label patterns
24-
#' and sample data in a given temporal window.
26+
#' and real data (e.g., sample data, data cube data). The distance is calculated
27+
#' using windows.
2528
#' @keywords internal
2629
#' @noRd
27-
.pattern_distance_dtw <- function(data, patterns, windows) {
30+
.dtw_distance_windowed <- function(data, patterns, windows) {
2831
# Calculate the DTW distance between `data` and `patterns`
29-
purrr::map_dfc(1:length(patterns), function(pattern_index) {
30-
# Get pattern metadata
31-
pattern <- patterns[pattern_index][[1]]
32-
pattern_label <- unique(pattern[["label"]])
32+
purrr::map_dfc(patterns, function(pattern) {
3333
# Get pattern data
34-
pattern_ts <- dplyr::select(pattern, -.data[["label"]])
35-
pattern_ts <- as.matrix(pattern_ts)
34+
pattern_ts <- as.matrix(.ts_values(pattern))
3635
# Windowed search
3736
distances <- purrr::map_df(windows, function(window) {
3837
# Get time-series in the window
@@ -41,16 +40,89 @@
4140
.data[["Index"]] >= window[["start"]],
4241
.data[["Index"]] <= window[["end"]])
4342
# Remove the time reference column
44-
data_in_window <- dplyr::select(data_in_window, -.data[["Index"]])
45-
# Transform values in matrix (as expected in the cpp code)
46-
data_in_window <- as.matrix(data_in_window)
47-
data_in_window <- data_in_window
43+
data_in_window <- as.matrix(.ts_values(data_in_window))
4844
# Calculate distance
49-
distance_from_pattern <- dtw_distance(data_in_window, pattern_ts)
50-
# Prepare result and return it
51-
data.frame(distance = distance_from_pattern)
45+
data.frame(distance = dtw_distance(data_in_window, pattern_ts))
5246
})
5347
# Associate the pattern name with the distances
54-
stats::setNames(distances, pattern_label)
48+
stats::setNames(distances, pattern[["label"]][[1]])
49+
})
50+
}
51+
# ---- Operation mode ----
52+
#' @title Search for events in time series using complete data (no windowing).
53+
#' @name .dtw_complete_ts
54+
#' @description This function searches for events in time series without
55+
#' windowing.
56+
#' @keywords internal
57+
#' @noRd
58+
.dtw_complete_ts <- function(values, patterns, threshold, ...) {
59+
# Do the change detection for each time-series
60+
purrr::map_vec(values, function(value_row) {
61+
# Search for the patterns
62+
patterns_distances <- .dtw_distance(
63+
data = value_row,
64+
patterns = patterns
65+
)
66+
# Remove distances out the user-defined threshold
67+
as.numeric(any(patterns_distances <= threshold))
68+
})
69+
}
70+
#' @title Search for events in time series using windowing.
71+
#' @name .dtw_windowed_ts
72+
#' @description This function searches for events in time series with windowing.
73+
#' @keywords internal
74+
#' @noRd
75+
.dtw_windowed_ts <- function(values, patterns, window, threshold) {
76+
# Extract dates
77+
dates_min <- .ts_min_date(values[[1]])
78+
dates_max <- .ts_max_date(values[[1]])
79+
# Assume time-series are regularized, then use the period
80+
# as the step of the moving window. As a result, we have
81+
# one step per iteration.
82+
dates_step <- lubridate::as.period(
83+
lubridate::int_diff(.ts_index(values[[1]]))
84+
)
85+
dates_step <- dates_step[[1]]
86+
# Create comparison windows
87+
comparison_windows <- .period_windows(
88+
period = window,
89+
step = dates_step,
90+
start_date = dates_min,
91+
end_date = dates_max
92+
)
93+
# Do the change detection for each time-series
94+
purrr::map(values, function(value_row) {
95+
# Search for the patterns
96+
patterns_distances <- .dtw_distance_windowed(
97+
data = value_row,
98+
patterns = patterns,
99+
windows = comparison_windows
100+
)
101+
# Remove distances out the user-defined threshold
102+
patterns_distances[patterns_distances > threshold] <- NA
103+
# Define where each label was detected. For this, first
104+
# get from each label the minimal distance
105+
detections_idx <- apply(patterns_distances, 2, which.min)
106+
detections_name <- names(detections_idx)
107+
# For each label, extract the metadata where they had
108+
# minimal distance
109+
purrr::map_df(seq_len(length(detections_idx)), function(idx) {
110+
# Extract detection name and inced
111+
detection_name <- detections_name[idx]
112+
detection_idx <- detections_idx[idx]
113+
# Extract detection distance (min one defined above)
114+
detection_distance <- patterns_distances[detection_idx,]
115+
detection_distance <- detection_distance[detection_name]
116+
detection_distance <- as.numeric(detection_distance)
117+
# Extract detection dates
118+
detection_dates <- comparison_windows[[detection_idx]]
119+
# Prepare result and return it!
120+
tibble::tibble(
121+
change = detection_name,
122+
distance = detection_distance,
123+
from = detection_dates[["start"]],
124+
to = detection_dates[["end"]]
125+
)
126+
})
55127
})
56128
}

0 commit comments

Comments
 (0)