Skip to content

Commit be8f102

Browse files
improve stratified sample
1 parent 00a4fb0 commit be8f102

7 files changed

Lines changed: 97 additions & 57 deletions

File tree

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: sits
22
Type: Package
3-
Version: 1.6.0
3+
Version: 2.0.0
44
Title: Satellite Image Time Series Analysis for Earth Observation Data Cubes
55
Authors@R: c(person('Rolf', 'Simoes', role = c('aut'), email = 'rolfsimoes@gmail.com'),
66
person('Gilberto', 'Camara', role = c('aut', 'cre', 'ths'), email = 'gilberto.camara.inpe@gmail.com'),
@@ -296,3 +296,4 @@ Collate:
296296
'sits_xlsx.R'
297297
'zzz.R'
298298
Config/roxygen2/version: 8.0.0
299+
RoxygenNote: 7.3.3

R/api_check.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1895,6 +1895,19 @@
18951895
.check_set_caller(".check_smoothness")
18961896
.check_that(length(smoothness) == 1L || length(smoothness) == nlabels)
18971897
}
1898+
#' @title Check samples_per_class parameter
1899+
#' @name .check_samples_per_class
1900+
#' @param samples_per_class a vector with the number of values to
1901+
#' allocate per class
1902+
#' @param labels labels of the cube
1903+
#' @return Called for side effects.
1904+
#' @keywords internal
1905+
#' @noRd
1906+
.check_samples_per_class <- function(samples_per_class, labels) {
1907+
.check_set_caller(".check_samples_per_class")
1908+
.check_that(length(samples_per_class) == 1L ||
1909+
length(samples_per_class) == length(labels))
1910+
}
18981911
#' @title Check if data contains predicted and reference values
18991912
#' @name .check_pred_ref_match
19001913
#' @param reference vector with reference labels

R/api_samples.R

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,7 @@
314314
#' @author Felipe Carlos, \email{efelipecarlos@@gmail.com}
315315
#' @author Felipe Carvalho, \email{felipe.carvalho@@inpe.br}
316316
#' @param cube Classified data cube (raster or vector)
317-
#' @param samples_class Matrix with sampling design to be allocated
318-
#' @param alloc Allocation method chosen
317+
#' @param samples_per_class Number of samples allocated per class
319318
#' @param dots Other params for the function
320319
#' @param multicores Number of cores to work in parallel
321320
#' @param block Optimized block to be read into memory (used only in
@@ -325,21 +324,21 @@
325324
#' @keywords internal
326325
#' @noRd
327326
.samples_alloc_strata <- function(cube,
328-
samples_class,
329-
alloc, ...) {
327+
samples_per_class, ...) {
330328
UseMethod(".samples_alloc_strata", cube)
331329
}
332330
#' @export
333331
.samples_alloc_strata.class_cube <- function(cube,
334-
samples_class,
335-
alloc, ...,
332+
samples_per_class, ...,
336333
block,
337334
progress = progress) {
338335
# estimate size
339-
size <- samples_class[[alloc]]
336+
size <- unname(samples_per_class)
340337
size <- ceiling(max(size) / nrow(cube))
341338
# get labels
342-
labels <- samples_class[["label"]]
339+
labels <- names(samples_per_class)
340+
names(labels) <- c(1:length(labels))
341+
covers <- names(labels)
343342
# Create assets as jobs
344343
cube_assets <- .cube_split_assets(cube)
345344
# Process each asset in parallel
@@ -400,7 +399,7 @@
400399
cell_xy <- .raster_open_vect(cell_xy, crs = .raster_crs(tile_raster))
401400
# Return as sf
402401
sf::st_as_sf(x = cbind(cell_xy, cells[, 2, drop = FALSE])) |>
403-
dplyr::left_join(samples_class, by = c("cover" = "label_id")) |>
402+
dplyr::mutate(label = labels[.data[["cover"]]]) |>
404403
dplyr::select("label", "geometry") |>
405404
sf::st_transform(crs = "EPSG:4326")
406405
})
@@ -409,10 +408,7 @@
409408
# Process labels
410409
samples <- .map_dfr(labels, function(lab) {
411410
# get metadata for the current label
412-
samples_label <- samples_class |>
413-
dplyr::filter(.data[["label"]] == lab)
414-
# extract alloc strategy
415-
samples_label <- unique(samples_label[[alloc]])
411+
samples_label <- samples_per_class[[lab]]
416412
# filter data
417413
samples |>
418414
dplyr::filter(.data[["label"]] == lab) |>
@@ -423,8 +419,7 @@
423419
}
424420
#' @export
425421
.samples_alloc_strata.class_vector_cube <- function(cube,
426-
samples_class,
427-
alloc, ...,
422+
samples_per_class, ...,
428423
multicores = 2,
429424
progress = progress) {
430425
# Open segments and transform them to tibble
@@ -438,10 +433,7 @@
438433
# prepare class name
439434
class <- class[["class"]]
440435
# get metadata for the current label
441-
samples_label <- samples_class |>
442-
dplyr::filter(.data[["label"]] == class)
443-
# extract alloc strategy
444-
samples_label <- samples_label[[alloc]]
436+
samples_label <- samples_per_class(cl)
445437
# extract samples
446438
samples_label <- sf::st_sample(cl, samples_label)
447439
# prepare extracted samples
@@ -497,14 +489,13 @@
497489
)
498490
# merge sampling design with samples metadata to ensure reference to the
499491
# correct class / values from the cube
500-
samples_class <- dplyr::inner_join(
492+
samples_per_class <- dplyr::inner_join(
501493
x = sampling_design,
502494
y = labels,
503495
by = "labels"
504496
) |>
505497
dplyr::select("labels", "label_id", dplyr::all_of(alloc)) |>
506498
dplyr::rename("label" = "labels")
507499
# include overhead
508-
samples_class[alloc] <- ceiling(unlist(samples_class[[alloc]]) * overhead)
509-
500+
samples_per_class <- ceiling(unlist(samples_per_class[[alloc]]) * overhead)
510501
}

R/sits_sample_functions.R

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -532,24 +532,26 @@ sits_sampling_design <- function(cube,
532532
#' with a number of samples per class and allocates a set of
533533
#' locations for each class.
534534
#'
535-
#' (b) When the parameter "n_alloc_class" is available, the method selects
536-
#' a set of locations based on a classified cube based on the number of
537-
#' samples per class in this parameter. The "n_alloc_class" parameter
535+
#' (b) When the parameter "sampling_design" is not provided, the method selects
536+
#' a set of locations based on a classified cube based on the parameter
537+
#' "samples_per_class". This parameter
538538
#' should either be a named vector (names are the labels of the cube)
539-
#' or a single value. In the latter case, this value is used to retrieve
539+
#' or a single value. In the latter case, the same value is used to retrieve
540540
#' the samples for all classes.
541541
#'
542542
#' @param cube Classified cube
543543
#' @param sampling_design Result of sits_sampling_design
544544
#' @param alloc Allocation method chosen
545+
#' @param samples_per_class Number of samples per class
546+
#' (in case sampling_design is NULL)
545547
#' @param overhead Additional percentage to account
546548
#' for border points
547549
#' @param multicores Number of cores that will be used to
548550
#' sample the images in parallel.
549551
#' @param memsize Memory available for sampling.
550552
#' @param shp_file Name of shapefile to be saved (optional)
551553
#' @param progress Show progress bar? Default is TRUE.
552-
#' @return samples Point sf object with required samples
554+
#' @return samples Point sf object with required samples and label
553555
#'
554556
#' @examples
555557
#' if (sits_run_examples()) {
@@ -571,24 +573,31 @@ sits_sampling_design <- function(cube,
571573
#' probs_cube,
572574
#' output_dir = tempdir()
573575
#' )
576+
#' # Option 1 - select samples based on sampling design
574577
#' # estimated UA for classes
575578
#' expected_ua <- c(
576579
#' Cerrado = 0.95, Forest = 0.95,
577580
#' Pasture = 0.95, Soy_Corn = 0.95
578581
#' )
579582
#' # design sampling
580583
#' sampling_design <- sits_sampling_design(label_cube, expected_ua)
581-
#' # select samples
584+
#' # select samples using the sampling design
582585
#' samples <- sits_stratified_sampling(
583586
#' label_cube,
584-
#' sampling_design, "alloc_prop"
587+
#' sampling_design = sampling_design,
588+
#' alloc = "alloc_prop"
589+
#' )
590+
#' # Option 2 - Select samples based on a fixed number of samples per class
591+
#' samples <- sits_stratified_sampling(
592+
#' label_cube,
593+
#' samples_per_class = 100
585594
#' )
586595
#' }
587596
#' @export
588597
sits_stratified_sampling <- function(cube,
589598
sampling_design = NULL,
590599
alloc = "alloc_prop",
591-
n_alloc_class = 100,
600+
samples_per_class = 100,
592601
overhead = 1.2,
593602
multicores = 2L,
594603
memsize = 2L,
@@ -605,12 +614,21 @@ sits_stratified_sampling <- function(cube,
605614
# get the labels
606615
labels <- .cube_labels(cube)
607616
n_labels <- length(labels)
608-
# check number of labels
609-
.check_smoothness(n_alloc_class, n_labels)
610-
# Prepare smoothness parameter
611-
if (length(n_alloc_class) == 1L) {
612-
n_alloc_class <- rep(n_alloc_class, n_labels)
617+
# check progress
618+
progress <- .message_progress(progress)
619+
# check samples_per_class parameter is either 1
620+
# or is a named vector with the cube labels
621+
.check_samples_per_class(samples_per_class, labels)
622+
# Prepare samples_per_class parameter
623+
if (length(samples_per_class) == 1L) {
624+
samples_per_class <- rep(samples_per_class, n_labels)
625+
names(samples_per_class) <- labels
626+
} else{
627+
.check_that(all(names(samples_per_class) %in% labels),
628+
msg = .conf("messages",
629+
"sits_stratified_sampling_wrong_labels"))
613630
}
631+
# if a sampling_design parameter exists, use it
614632
if (.has(sampling_design)) {
615633
.check_that(nrow(sampling_design) <= n_labels)
616634
# check names of labels
@@ -619,15 +637,14 @@ sits_stratified_sampling <- function(cube,
619637
.check_that(alloc %in% colnames(sampling_design),
620638
msg = .conf("messages", "sits_stratified_sampling_alloc")
621639
)
640+
622641
# check samples by class
623-
n_alloc_class <- unlist(sampling_design[, alloc])
642+
samples_per_class <- .samples_by_design(sampling_design,
643+
labels,
644+
alloc,
645+
overhead)
624646
}
625-
.check_int_parameter(n_alloc_class,
626-
is_named = TRUE,
627-
msg = .conf("messages", "sits_stratified_sampling_samples")
628-
)
629-
# check progress
630-
progress <- .message_progress(progress)
647+
631648
# The following functions define optimal parameters for parallel processing
632649
# Get block size
633650
block <- .raster_file_blocksize(.raster_open_rast(.tile_path(cube)))
@@ -656,14 +673,11 @@ sits_stratified_sampling <- function(cube,
656673
if (.parallel_start(workers = multicores)) {
657674
on.exit(.parallel_stop(), add = TRUE)
658675
}
659-
samples_class <- .samples_by_design(sampling_design,
660-
labels,
661-
alloc,
662-
overhead)
676+
663677
# call function to allocate sample per strata
664678
samples <- .samples_alloc_strata(
665679
cube = cube,
666-
samples_class = samples_class,
680+
samples_per_class = samples_per_class,
667681
alloc = alloc,
668682
block = block,
669683
progress = progress

inst/extdata/config_messages.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@
118118
.check_samples_embeddings_index: "missing colunm Index in embeddings data"
119119
.check_samples_embeddings_bands: "all embeddings should have the same dimensions (bands)"
120120
.check_samples_embeddings_range: "all embeddings should have finite values"
121+
.check_samples_per_class: "samples_per_class must be either one value or a named vector with a value for each label"
121122
.check_samples_tile_match_bands: "tile bands do not match samples bands"
122123
.check_samples_tile_match_timeline: "tile timeline does not match samples timeline"
123124
.check_samples_timeline: "samples contain timelines with different lengths"
@@ -555,6 +556,7 @@ sits_som_map_grid_size: "recommended values for grid_xdim and grid_ydim are "
555556
sits_stratified_sampling: "labels in sampling design do not match labels in cube"
556557
sits_stratified_sampling_alloc: "allocation method is not included in sampling design"
557558
sits_stratified_sampling_samples: "number of samples per allocation method should be integer"
559+
sits_stratified_sampling_wrong_labels: "label names requested do not match those of the data cube"
558560
sits_stratified_sampling_shp: "invalid shapefile name"
559561
sits_stratified_sampling_shp_save: "saved allocation in shapefile"
560562
sits_svm: "wrong input parameters - see example in documentation"

man/sits-package.Rd

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/sits_stratified_sampling.Rd

Lines changed: 28 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)