|
264 | 264 | y = data,
|
265 | 265 | by = c(pol_id = "polygon_id")
|
266 | 266 | ) |>
|
267 |
| - dplyr::filter(.data[["pol_id"]] %in% unique(data[["polygon_id"]])) |
| 267 | + dplyr::filter(.data[["pol_id"]] %in% unique(data[["polygon_id"]])) |
268 | 268 | }
|
269 | 269 | #'
|
270 | 270 | #' @name .segments_data_read
|
|
273 | 273 | #' @description Using the segments as polygons, get all time series
|
274 | 274 | #'
|
275 | 275 | #' @param tile tile of regular data cube
|
| 276 | +#' @param bands Bands to extract time series |
| 277 | +#' @param base_bands Base bands to extract values |
276 | 278 | #' @param chunk A chunk to be read.
|
277 | 279 | #' @param n_sam_pol Number of samples per polygon to be read.
|
278 | 280 | #' @param impute_fn Imputation function to remove NA
|
279 | 281 | #'
|
280 | 282 | #' @return samples associated to segments
|
281 |
| -.segments_poly_read <- function(tile, chunk, n_sam_pol, impute_fn) { |
| 283 | +.segments_poly_read <- function( |
| 284 | + tile, bands, base_bands, chunk, n_sam_pol, impute_fn |
| 285 | +) { |
| 286 | + # define bands variables |
| 287 | + ts_bands <- NULL |
| 288 | + ts_bands_base <- NULL |
282 | 289 | # For cubes that have a time limit to expire (MPC cubes only)
|
283 | 290 | tile <- .cube_token_generator(cube = tile)
|
284 |
| - # Read and preprocess values of cloud |
285 |
| - # Get tile bands |
286 |
| - tile_bands <- .tile_bands( |
287 |
| - tile = tile, |
288 |
| - add_cloud = FALSE |
289 |
| - ) |
290 | 291 | # Read and preprocess values of each band
|
291 |
| - ts_bands <- purrr::map(tile_bands, function(band) { |
| 292 | + ts_bands <- purrr::map(bands, function(band) { |
292 | 293 | # extract band values
|
293 |
| - values <- .tile_extract_segments( |
| 294 | + .tile_read_segments( |
294 | 295 | tile = tile,
|
295 | 296 | band = band,
|
296 |
| - chunk = chunk |
297 |
| - ) |
298 |
| - pol_id <- values[, "pol_id"] |
299 |
| - values <- values[, -1:0] |
300 |
| - # Correct missing, minimum, and maximum values and |
301 |
| - # apply scale and offset. |
302 |
| - band_conf <- .tile_band_conf( |
303 |
| - tile = tile, |
304 |
| - band = band |
| 297 | + chunk = chunk, |
| 298 | + impute_fn = impute_fn |
305 | 299 | )
|
306 |
| - miss_value <- .miss_value(band_conf) |
307 |
| - if (.has(miss_value)) { |
308 |
| - values[values == miss_value] <- NA |
309 |
| - } |
310 |
| - min_value <- .min_value(band_conf) |
311 |
| - if (.has(min_value)) { |
312 |
| - values[values < min_value] <- NA |
313 |
| - } |
314 |
| - max_value <- .max_value(band_conf) |
315 |
| - if (.has(max_value)) { |
316 |
| - values[values > max_value] <- NA |
317 |
| - } |
318 |
| - scale <- .scale(band_conf) |
319 |
| - if (.has(scale) && scale != 1) { |
320 |
| - values <- values * scale |
321 |
| - } |
322 |
| - offset <- .offset(band_conf) |
323 |
| - if (.has(offset) && offset != 0) { |
324 |
| - values <- values + offset |
325 |
| - } |
326 |
| - # are there NA values? interpolate them |
327 |
| - if (anyNA(values)) { |
328 |
| - values <- impute_fn(values) |
329 |
| - } |
330 |
| - # Returning extracted time series |
331 |
| - return(list(pol_id, c(t(unname(values))))) |
332 | 300 | })
|
333 | 301 | # extract the pol_id information from the first element of the list
|
334 | 302 | pol_id <- ts_bands[[1]][[1]]
|
335 | 303 | # remove the first element of the each list and retain the second
|
336 | 304 | ts_bands <- purrr::map(ts_bands, function(ts_band) ts_band[[2]])
|
337 | 305 | # rename the resulting list
|
338 |
| - names(ts_bands) <- tile_bands |
| 306 | + names(ts_bands) <- bands |
339 | 307 | # transform the list to a tibble
|
340 | 308 | ts_bands <- tibble::as_tibble(ts_bands)
|
341 | 309 | # retrieve the dates of the tile
|
342 | 310 | n_dates <- length(.tile_timeline(tile))
|
343 | 311 | # find how many samples have been extracted from the tile
|
344 | 312 | n_samples <- nrow(ts_bands) / n_dates
|
345 | 313 | # include sample_id information
|
346 |
| - ts_bands[["sample_id"]] <- rep(seq_len(n_samples), |
347 |
| - each = n_dates) |
| 314 | + ts_bands[["sample_id"]] <- rep(seq_len(n_samples), each = n_dates) |
348 | 315 | # include timeline
|
349 | 316 | ts_bands[["Index"]] <- rep(
|
350 | 317 | .tile_timeline(tile),
|
|
353 | 320 | # nest the values by bands
|
354 | 321 | ts_bands <- tidyr::nest(
|
355 | 322 | ts_bands,
|
356 |
| - time_series = c("Index", dplyr::all_of(tile_bands)) |
| 323 | + time_series = c("Index", dplyr::all_of(bands)) |
357 | 324 | )
|
| 325 | + # if `base_bands` is available, transform it to the same structure as |
| 326 | + # `time_series` |
| 327 | + if (.has(base_bands)) { |
| 328 | + # read base data values |
| 329 | + ts_bands_base <- purrr::map(base_bands, function(band) { |
| 330 | + .tile_read_segments( |
| 331 | + tile = .tile_base_info(tile), |
| 332 | + band = band, |
| 333 | + chunk = chunk, |
| 334 | + impute_fn = impute_fn |
| 335 | + ) |
| 336 | + }) |
| 337 | + # remove polygon ids |
| 338 | + ts_bands_base <- purrr::map(ts_bands_base, |
| 339 | + function(ts_band) ts_band[[2]]) |
| 340 | + # name band values |
| 341 | + names(ts_bands_base) <- base_bands |
| 342 | + # merge band values |
| 343 | + ts_bands_base <- dplyr::bind_cols(ts_bands_base) |
| 344 | + # include time reference in the data |
| 345 | + ts_bands_base[["Index"]] <- rep( |
| 346 | + .tile_timeline(.tile_base_info(tile)), |
| 347 | + times = n_samples |
| 348 | + ) |
| 349 | + # include base bands data |
| 350 | + ts_bands <- tibble::add_column(ts_bands, ts_bands_base) |
| 351 | + # nest base data |
| 352 | + ts_bands <- tidyr::nest( |
| 353 | + ts_bands, |
| 354 | + base_data = c("Index", dplyr::all_of(base_bands)) |
| 355 | + ) |
| 356 | + } |
358 | 357 | # include the ids of the polygons
|
359 | 358 | ts_bands[["polygon_id"]] <- pol_id
|
360 |
| - # we do the unnest again because we do not know the polygon id index |
361 |
| - ts_bands <- tidyr::unnest(ts_bands, "time_series") |
362 |
| - # remove pixels where all timeline was NA |
363 |
| - ts_bands <- tidyr::drop_na(ts_bands) |
364 |
| - # nest the values by bands |
365 |
| - ts_bands <- tidyr::nest( |
366 |
| - ts_bands, |
367 |
| - time_series = c("Index", dplyr::all_of(tile_bands)) |
368 |
| - ) |
| 359 | + # define which columns must be checked to drop na values |
| 360 | + drop_na_colums <- list("time_series" = bands) |
| 361 | + # if `base_bands` is available, to `base_data` column is used |
| 362 | + if (.has(base_bands)) { |
| 363 | + drop_na_colums[["base_data"]] <- base_bands |
| 364 | + } |
| 365 | + # drop na values |
| 366 | + for (colname in names(drop_na_colums)) { |
| 367 | + # we do the unnest again because we do not know the polygon id index |
| 368 | + ts_bands <- tidyr::unnest(ts_bands, colname) |
| 369 | + # remove pixels where all timeline was NA |
| 370 | + ts_bands <- tidyr::drop_na(ts_bands) |
| 371 | + # nest the values by bands |
| 372 | + ts_bands <- tidyr::nest( |
| 373 | + ts_bands, |
| 374 | + !!colname := c("Index", dplyr::all_of(drop_na_colums[[colname]])) |
| 375 | + ) |
| 376 | + } |
| 377 | + # define columns used in the points nest |
| 378 | + points_nest <- c("sample_id", "time_series") |
| 379 | + # if `base_bands` is available, include it in the nest operation |
| 380 | + if (.has(base_bands)) { |
| 381 | + points_nest <- c(points_nest, "base_data") |
| 382 | + } |
369 | 383 | # nest the values by sample_id and time_series
|
370 | 384 | ts_bands <- tidyr::nest(
|
371 | 385 | ts_bands,
|
372 |
| - points = c("sample_id", "time_series") |
| 386 | + points = points_nest |
373 | 387 | )
|
374 | 388 | # retrieve the segments
|
375 | 389 | segments <- .vector_read_vec(chunk[["segments"]][[1]])
|
|
404 | 418 | samples <- .discard(samples, "sample_id")
|
405 | 419 | # set sits class
|
406 | 420 | class(samples) <- c("sits", class(samples))
|
| 421 | + # define `sits_base` if applicable |
| 422 | + if (.has(base_bands)) { |
| 423 | + class(samples) <- c("sits_base", class(samples)) |
| 424 | + } |
| 425 | + # return! |
407 | 426 | return(samples)
|
408 | 427 | }
|
0 commit comments