Skip to content

Commit ea05a26

Browse files
Merge pull request #1181 from M3nin0/fix/segs-base-classify
2 parents 434d9c5 + 8aae5e2 commit ea05a26

File tree

4 files changed

+157
-63
lines changed

4 files changed

+157
-63
lines changed

R/api_classify.R

+6
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,8 @@
227227
#' in the classified images for each corresponding year.
228228
#'
229229
#' @param tile Single tile of a data cube.
230+
#' @param bands Bands to extract time series
231+
#' @param base_bands Base bands to extract values
230232
#' @param ml_model Model trained by \code{\link[sits]{sits_train}}.
231233
#' @param block Optimized block to be read into memory.
232234
#' @param roi Region of interest.
@@ -241,6 +243,8 @@
241243
#' @param progress Show progress bar?
242244
#' @return List of the classified raster layers.
243245
.classify_vector_tile <- function(tile,
246+
bands,
247+
base_bands,
244248
ml_model,
245249
block,
246250
roi,
@@ -322,6 +326,8 @@
322326
# Extract segments time series
323327
segments_ts <- .segments_poly_read(
324328
tile = tile,
329+
bands = bands,
330+
base_bands = base_bands,
325331
chunk = chunk,
326332
n_sam_pol = n_sam_pol,
327333
impute_fn = impute_fn

R/api_segments.R

+78-59
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@
264264
y = data,
265265
by = c(pol_id = "polygon_id")
266266
) |>
267-
dplyr::filter(.data[["pol_id"]] %in% unique(data[["polygon_id"]]))
267+
dplyr::filter(.data[["pol_id"]] %in% unique(data[["polygon_id"]]))
268268
}
269269
#'
270270
#' @name .segments_data_read
@@ -273,78 +273,45 @@
273273
#' @description Using the segments as polygons, get all time series
274274
#'
275275
#' @param tile tile of regular data cube
276+
#' @param bands Bands to extract time series
277+
#' @param base_bands Base bands to extract values
276278
#' @param chunk A chunk to be read.
277279
#' @param n_sam_pol Number of samples per polygon to be read.
278280
#' @param impute_fn Imputation function to remove NA
279281
#'
280282
#' @return samples associated to segments
281-
.segments_poly_read <- function(tile, chunk, n_sam_pol, impute_fn) {
283+
.segments_poly_read <- function(
284+
tile, bands, base_bands, chunk, n_sam_pol, impute_fn
285+
) {
286+
# define bands variables
287+
ts_bands <- NULL
288+
ts_bands_base <- NULL
282289
# For cubes that have a time limit to expire (MPC cubes only)
283290
tile <- .cube_token_generator(cube = tile)
284-
# Read and preprocess values of cloud
285-
# Get tile bands
286-
tile_bands <- .tile_bands(
287-
tile = tile,
288-
add_cloud = FALSE
289-
)
290291
# Read and preprocess values of each band
291-
ts_bands <- purrr::map(tile_bands, function(band) {
292+
ts_bands <- purrr::map(bands, function(band) {
292293
# extract band values
293-
values <- .tile_extract_segments(
294+
.tile_read_segments(
294295
tile = tile,
295296
band = band,
296-
chunk = chunk
297-
)
298-
pol_id <- values[, "pol_id"]
299-
values <- values[, -1:0]
300-
# Correct missing, minimum, and maximum values and
301-
# apply scale and offset.
302-
band_conf <- .tile_band_conf(
303-
tile = tile,
304-
band = band
297+
chunk = chunk,
298+
impute_fn = impute_fn
305299
)
306-
miss_value <- .miss_value(band_conf)
307-
if (.has(miss_value)) {
308-
values[values == miss_value] <- NA
309-
}
310-
min_value <- .min_value(band_conf)
311-
if (.has(min_value)) {
312-
values[values < min_value] <- NA
313-
}
314-
max_value <- .max_value(band_conf)
315-
if (.has(max_value)) {
316-
values[values > max_value] <- NA
317-
}
318-
scale <- .scale(band_conf)
319-
if (.has(scale) && scale != 1) {
320-
values <- values * scale
321-
}
322-
offset <- .offset(band_conf)
323-
if (.has(offset) && offset != 0) {
324-
values <- values + offset
325-
}
326-
# are there NA values? interpolate them
327-
if (anyNA(values)) {
328-
values <- impute_fn(values)
329-
}
330-
# Returning extracted time series
331-
return(list(pol_id, c(t(unname(values)))))
332300
})
333301
# extract the pol_id information from the first element of the list
334302
pol_id <- ts_bands[[1]][[1]]
335303
# remove the first element of the each list and retain the second
336304
ts_bands <- purrr::map(ts_bands, function(ts_band) ts_band[[2]])
337305
# rename the resulting list
338-
names(ts_bands) <- tile_bands
306+
names(ts_bands) <- bands
339307
# transform the list to a tibble
340308
ts_bands <- tibble::as_tibble(ts_bands)
341309
# retrieve the dates of the tile
342310
n_dates <- length(.tile_timeline(tile))
343311
# find how many samples have been extracted from the tile
344312
n_samples <- nrow(ts_bands) / n_dates
345313
# include sample_id information
346-
ts_bands[["sample_id"]] <- rep(seq_len(n_samples),
347-
each = n_dates)
314+
ts_bands[["sample_id"]] <- rep(seq_len(n_samples), each = n_dates)
348315
# include timeline
349316
ts_bands[["Index"]] <- rep(
350317
.tile_timeline(tile),
@@ -353,23 +320,70 @@
353320
# nest the values by bands
354321
ts_bands <- tidyr::nest(
355322
ts_bands,
356-
time_series = c("Index", dplyr::all_of(tile_bands))
323+
time_series = c("Index", dplyr::all_of(bands))
357324
)
325+
# if `base_bands` is available, transform it to the same structure as
326+
# `time_series`
327+
if (.has(base_bands)) {
328+
# read base data values
329+
ts_bands_base <- purrr::map(base_bands, function(band) {
330+
.tile_read_segments(
331+
tile = .tile_base_info(tile),
332+
band = band,
333+
chunk = chunk,
334+
impute_fn = impute_fn
335+
)
336+
})
337+
# remove polygon ids
338+
ts_bands_base <- purrr::map(ts_bands_base,
339+
function(ts_band) ts_band[[2]])
340+
# name band values
341+
names(ts_bands_base) <- base_bands
342+
# merge band values
343+
ts_bands_base <- dplyr::bind_cols(ts_bands_base)
344+
# include time reference in the data
345+
ts_bands_base[["Index"]] <- rep(
346+
.tile_timeline(.tile_base_info(tile)),
347+
times = n_samples
348+
)
349+
# include base bands data
350+
ts_bands <- tibble::add_column(ts_bands, ts_bands_base)
351+
# nest base data
352+
ts_bands <- tidyr::nest(
353+
ts_bands,
354+
base_data = c("Index", dplyr::all_of(base_bands))
355+
)
356+
}
358357
# include the ids of the polygons
359358
ts_bands[["polygon_id"]] <- pol_id
360-
# we do the unnest again because we do not know the polygon id index
361-
ts_bands <- tidyr::unnest(ts_bands, "time_series")
362-
# remove pixels where all timeline was NA
363-
ts_bands <- tidyr::drop_na(ts_bands)
364-
# nest the values by bands
365-
ts_bands <- tidyr::nest(
366-
ts_bands,
367-
time_series = c("Index", dplyr::all_of(tile_bands))
368-
)
359+
# define which columns must be checked to drop na values
360+
drop_na_colums <- list("time_series" = bands)
361+
# if `base_bands` is available, to `base_data` column is used
362+
if (.has(base_bands)) {
363+
drop_na_colums[["base_data"]] <- base_bands
364+
}
365+
# drop na values
366+
for (colname in names(drop_na_colums)) {
367+
# we do the unnest again because we do not know the polygon id index
368+
ts_bands <- tidyr::unnest(ts_bands, colname)
369+
# remove pixels where all timeline was NA
370+
ts_bands <- tidyr::drop_na(ts_bands)
371+
# nest the values by bands
372+
ts_bands <- tidyr::nest(
373+
ts_bands,
374+
!!colname := c("Index", dplyr::all_of(drop_na_colums[[colname]]))
375+
)
376+
}
377+
# define columns used in the points nest
378+
points_nest <- c("sample_id", "time_series")
379+
# if `base_bands` is available, include it in the nest operation
380+
if (.has(base_bands)) {
381+
points_nest <- c(points_nest, "base_data")
382+
}
369383
# nest the values by sample_id and time_series
370384
ts_bands <- tidyr::nest(
371385
ts_bands,
372-
points = c("sample_id", "time_series")
386+
points = points_nest
373387
)
374388
# retrieve the segments
375389
segments <- .vector_read_vec(chunk[["segments"]][[1]])
@@ -404,5 +418,10 @@
404418
samples <- .discard(samples, "sample_id")
405419
# set sits class
406420
class(samples) <- c("sits", class(samples))
421+
# define `sits_base` if applicable
422+
if (.has(base_bands)) {
423+
class(samples) <- c("sits_base", class(samples))
424+
}
425+
# return!
407426
return(samples)
408427
}

R/api_tile.R

+61-4
Original file line numberDiff line numberDiff line change
@@ -1443,11 +1443,11 @@ NULL
14431443
#' @keywords internal
14441444
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
14451445
#'
1446-
#' @description Given a data cube, retrieve the time series of XY locations
1446+
#' @description Given a tile and a band, return a set of values for segments
14471447
#'
1448-
#' @param tile ... TODO: document
1449-
#' @param band ...
1450-
#' @param chunk ...
1448+
#' @param tile Metadata about a data cube (one tile)
1449+
#' @param band Name of the band to the retrieved
1450+
#' @param chunk Chunk from where segments data will be extracted
14511451
#'
14521452
#' @return Data.frame with values per polygon.
14531453
.tile_extract_segments <- function(tile, band, chunk) {
@@ -1472,6 +1472,63 @@ NULL
14721472
# Return values
14731473
return(as.matrix(values))
14741474
}
1475+
#' @title Given a tile and a band, return a set of values for segments ready to
1476+
#' be used
1477+
#' @name .tile_extract_segments
1478+
#' @noRd
1479+
#' @keywords internal
1480+
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
1481+
#'
1482+
#' @description Given a tile and a band, return a set of values for segments
1483+
#' ready to be used (e.g., scale transformation, offset, and so on).
1484+
#'
1485+
#' @param tile Metadata about a data cube (one tile)
1486+
#' @param band Name of the band to the retrieved
1487+
#' @param chunk Chunk from where segments data will be extracted
1488+
#' @param impute_fn Imputation function to remove NA
1489+
#'
1490+
#' @return Data.frame with values per polygon.
1491+
.tile_read_segments <- function(tile, band, chunk, impute_fn) {
1492+
values <- .tile_extract_segments(
1493+
tile = tile,
1494+
band = band,
1495+
chunk = chunk
1496+
)
1497+
pol_id <- values[, "pol_id"]
1498+
values <- values[, -1:0]
1499+
# Correct missing, minimum, and maximum values and
1500+
# apply scale and offset.
1501+
band_conf <- .tile_band_conf(
1502+
tile = tile,
1503+
band = band
1504+
)
1505+
miss_value <- .miss_value(band_conf)
1506+
if (.has(miss_value)) {
1507+
values[values == miss_value] <- NA
1508+
}
1509+
min_value <- .min_value(band_conf)
1510+
if (.has(min_value)) {
1511+
values[values < min_value] <- NA
1512+
}
1513+
max_value <- .max_value(band_conf)
1514+
if (.has(max_value)) {
1515+
values[values > max_value] <- NA
1516+
}
1517+
scale <- .scale(band_conf)
1518+
if (.has(scale) && scale != 1) {
1519+
values <- values * scale
1520+
}
1521+
offset <- .offset(band_conf)
1522+
if (.has(offset) && offset != 0) {
1523+
values <- values + offset
1524+
}
1525+
# are there NA values? interpolate them
1526+
if (anyNA(values)) {
1527+
values <- impute_fn(values)
1528+
}
1529+
# Returning extracted time series
1530+
return(list(pol_id, c(t(unname(values)))))
1531+
}
14751532
#' @title Check if tile contains cloud band
14761533
#' @keywords internal
14771534
#' @noRd

R/sits_classify.R

+12
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,16 @@ sits_classify.segs_cube <- function(data,
429429
}
430430
if (.has(filter_fn))
431431
.check_filter_fn(filter_fn)
432+
# By default, base bands is null.
433+
base_bands <- NULL
434+
if (.cube_is_base(data)) {
435+
# Get base bands
436+
base_bands <- intersect(
437+
.ml_bands(ml_model), .cube_bands(.cube_base_info(data))
438+
)
439+
}
440+
# get non-base bands
441+
bands <- setdiff(.ml_bands(ml_model), base_bands)
432442
# Check memory and multicores
433443
# Get block size
434444
block <- .raster_file_blocksize(.raster_open_rast(.tile_path(data)))
@@ -471,6 +481,8 @@ sits_classify.segs_cube <- function(data,
471481
# Classify all the segments for each tile
472482
class_vector <- .classify_vector_tile(
473483
tile = tile,
484+
bands = bands,
485+
base_bands = base_bands,
474486
ml_model = ml_model,
475487
block = block,
476488
roi = roi,

0 commit comments

Comments
 (0)