Skip to content

Commit 87a94c3

Browse files
closes #1158
1 parent 63bb426 commit 87a94c3

10 files changed

+147
-66
lines changed

NAMESPACE

+3
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ S3method(.raster_yres,terra)
129129
S3method(.reg_s2tile_convert,dem_cube)
130130
S3method(.reg_s2tile_convert,grd_cube)
131131
S3method(.reg_s2tile_convert,rtc_cube)
132+
S3method(.samples_alloc_strata,class_cube)
133+
S3method(.samples_alloc_strata,class_vector_cube)
132134
S3method(.slice_dfr,numeric)
133135
S3method(.source_collection_access_test,"mpc_cube_sentinel-1-grd")
134136
S3method(.source_collection_access_test,cdse_cube)
@@ -198,6 +200,7 @@ S3method(.source_tile_get_bbox,stac_cube)
198200
S3method(.tile,default)
199201
S3method(.tile,raster_cube)
200202
S3method(.tile_area_freq,class_cube)
203+
S3method(.tile_area_freq,class_vector_cube)
201204
S3method(.tile_area_freq,default)
202205
S3method(.tile_area_freq,raster_cube)
203206
S3method(.tile_as_sf,default)

R/api_cube.R

-13
Original file line numberDiff line numberDiff line change
@@ -143,23 +143,10 @@ NULL
143143
#'
144144
#' @return A \code{vector} with the areas of the cube labels.
145145
.cube_class_areas <- function(cube) {
146-
.check_is_class_cube(cube)
147-
labels_cube <- sits_labels(cube)
148-
149146
# Get area for each class for each row of the cube
150147
freq_lst <- slider::slide(cube, function(tile) {
151148
# Get the frequency count and value for each labelled image
152149
freq <- .tile_area_freq(tile)
153-
# pixel area
154-
# convert the area to hectares
155-
# assumption: spatial resolution unit is meters
156-
area <- freq[["count"]] * .tile_xres(tile) * .tile_yres(tile) / 10000
157-
# Include class names
158-
freq <- dplyr::mutate(
159-
freq,
160-
area = area,
161-
class = labels_cube[as.character(freq[["value"]])]
162-
)
163150
return(freq)
164151
})
165152
# Get a tibble by binding the row (duplicated labels with different counts)

R/api_samples.R

+79
Original file line numberDiff line numberDiff line change
@@ -203,3 +203,82 @@
203203
})
204204
})
205205
}
206+
#' @title Allocate points for stratified sampling for accuracy estimation
207+
#' @name .samples_alloc_strata
208+
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
209+
#' @param cube Classified data cube (raster or vector)
210+
#' @param samples_class Matrix with sampling design to be allocated
211+
#' @param dots Other params for the function
212+
#' @param multicores Number of cores to work in parallel
213+
#' @param progress Show progress bar?
214+
#'
215+
#' @return Points resulting from stratified sampling
216+
#' @keywords internal
217+
#' @noRd
218+
.samples_alloc_strata <- function(cube,
219+
samples_class, ...,
220+
multicores = 2,
221+
progress = TRUE){
222+
UseMethod(".samples_alloc_strata", cube)
223+
}
224+
#' @export
225+
.samples_alloc_strata.class_cube <- function(cube,
226+
samples_class, ...,
227+
multicores = 2,
228+
progress = TRUE){
229+
# estimate size
230+
size <- ceiling(max(samples_class) / nrow(cube))
231+
# get labels
232+
labels <- names(samples_class)
233+
n_labels <- length(labels)
234+
# Prepare parallel processing
235+
.parallel_start(workers = multicores)
236+
on.exit(.parallel_stop(), add = TRUE)
237+
# Create assets as jobs
238+
cube_assets <- .cube_split_assets(cube)
239+
# Process each asset in parallel
240+
samples <- .jobs_map_parallel_dfr(cube_assets, function(tile) {
241+
robj <- .raster_open_rast(.tile_path(tile))
242+
cls <- data.frame(id = 1:n_labels,
243+
cover = labels)
244+
levels(robj) <- cls
245+
samples_sv <- terra::spatSample(
246+
x = robj,
247+
size = size,
248+
method = "stratified",
249+
as.points = TRUE
250+
)
251+
samples_sf <- sf::st_as_sf(samples_sv)
252+
samples_sf <- dplyr::mutate(samples_sf,
253+
label = labels[.data[["cover"]]])
254+
samples_sf <- sf::st_transform(samples_sf, crs = "EPSG:4326")
255+
}, progress = progress)
256+
257+
samples <- purrr::map_dfr(labels, function(lab) {
258+
samples_class <- samples |>
259+
dplyr::filter(.data[["label"]] == lab) |>
260+
dplyr::slice_sample(n = samples_class[lab])
261+
})
262+
263+
return(samples)
264+
}
265+
#' @export
266+
.samples_alloc_strata.class_vector_cube <- function(cube,
267+
samples_class, ...,
268+
multicores = 2,
269+
progress = TRUE){
270+
271+
segments_cube <- slider::slide_dfr(cube, function(tile){
272+
# Open segments and transform them to tibble
273+
segments <- .segments_read_vec(tile)
274+
})
275+
# Retrieve the required number of segments per class
276+
samples_lst <- segments_cube |>
277+
dplyr::group_by(.data[["class"]]) |>
278+
dplyr::group_map(function(cl, class){
279+
samples_class <- sf::st_sample(cl, samples_class[class[["class"]]])
280+
sf_samples <- sf::st_sf(class, geometry = samples_class)
281+
})
282+
samples <- dplyr::bind_rows(samples_lst)
283+
return(samples)
284+
}

R/api_tile.R

+27
Original file line numberDiff line numberDiff line change
@@ -1314,6 +1314,33 @@ NULL
13141314
r_obj <- .raster_open_rast(.tile_path(tile))
13151315
# Retrieve the frequency
13161316
freq <- tibble::as_tibble(.raster_freq(r_obj))
1317+
# get labels
1318+
labels <- .tile_labels(tile)
1319+
# pixel area
1320+
# convert the area to hectares
1321+
# assumption: spatial resolution unit is meters
1322+
area <- freq[["count"]] * .tile_xres(tile) * .tile_yres(tile) / 10000
1323+
# Include class names
1324+
freq <- dplyr::mutate(
1325+
freq,
1326+
area = area,
1327+
class = labels[as.character(freq[["value"]])]
1328+
)
1329+
# Return frequencies
1330+
freq
1331+
}
1332+
#' @export
1333+
.tile_area_freq.class_vector_cube <- function(tile) {
1334+
# Open segments
1335+
segments <- .segments_read_vec(tile)
1336+
segments[["area"]] <- sf::st_area(segments)
1337+
segments <- sf::st_drop_geometry(segments)
1338+
segments <- units::drop_units(segments)
1339+
# Retrieve the area
1340+
freq <- segments |>
1341+
dplyr::group_by(class) |>
1342+
dplyr::summarise(area = sum(.data[["area"]])) |>
1343+
dplyr::select(c(dplyr::all_of("area"), dplyr::all_of("class")))
13171344
# Return frequencies
13181345
freq
13191346
}

R/sits_classify.R

+3-1
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,8 @@ sits_classify.raster_cube <- function(data,
253253
.check_filter_fn(filter_fn)
254254
# Retrieve the samples from the model
255255
samples <- .ml_samples(ml_model)
256+
# Retrieve the bands from the model
257+
bands <- .ml_bands(ml_model)
256258
# Do the samples and tile match their timeline length?
257259
.check_samples_tile_match_timeline(samples = samples, tile = data)
258260
# Do the samples and tile match their bands?
@@ -263,7 +265,7 @@ sits_classify.raster_cube <- function(data,
263265
# Check minimum memory needed to process one block
264266
job_memsize <- .jobs_memsize(
265267
job_size = .block_size(block = block, overlap = 0),
266-
npaths = length(.tile_paths(data)) + length(.ml_labels(ml_model)),
268+
npaths = length(.tile_paths(data, bands)) + length(.ml_labels(ml_model)),
267269
nbytes = 8,
268270
proc_bloat = proc_bloat
269271
)

R/sits_plot.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,7 @@ plot.vector_cube <- function(x, ...,
524524
tile = x[["tile"]][[1]],
525525
dates = NULL,
526526
seg_color = "black",
527-
line_width = 1,
527+
line_width = 0.3,
528528
palette = "RdYlGn",
529529
rev = FALSE,
530530
scale = 1.0,

R/sits_sample_functions.R

+26-45
Original file line numberDiff line numberDiff line change
@@ -284,15 +284,14 @@ sits_reduce_imbalance <- function(samples,
284284
#' sampling_design <- sits_sampling_design(label_cube, expected_ua)
285285
#' }
286286
#' @export
287-
sits_sampling_design <- function(cube,
287+
sits_sampling_design <- function(cube, ...,
288288
expected_ua = 0.75,
289289
std_err = 0.01,
290290
rare_class_prop = 0.1) {
291291
.check_set_caller("sits_sampling_design")
292292
# check the cube is valid
293-
.check_raster_cube_files(cube)
294-
# check cube is class cube
295-
.check_is_class_cube(cube)
293+
.check_that(inherits(cube, "class_cube") ||
294+
inherits(cube, "class_vector_cube"))
296295
# get the labels
297296
labels <- .cube_labels(cube)
298297
n_labels <- length(labels)
@@ -304,19 +303,23 @@ sits_sampling_design <- function(cube,
304303
.check_that(length(expected_ua) == n_labels)
305304
# check names of labels
306305
.check_that(all(labels %in% names(expected_ua)))
307-
# adjust names to match cube labels
308-
expected_ua <- expected_ua[labels]
309306
# get cube class areas
310307
class_areas <- .cube_class_areas(cube)
308+
# check that names of class areas are contained in the labels
309+
.check_that(all(names(class_areas) %in% labels),
310+
msg = .conf("messages", "sits_sampling_design_labels"))
311+
# adjust names to match cube labels
312+
expected_ua <- expected_ua[names(class_areas)]
311313
# calculate proportion of class areas
312314
prop <- class_areas / sum(class_areas)
313315
# standard deviation of the stratum
314316
std_dev <- signif(sqrt(expected_ua * (1 - expected_ua)), 3)
315317
# calculate sample size
316318
sample_size <- round((sum(prop * std_dev) / std_err) ^ 2)
317319
# determine "Equal" allocation
318-
equal <- rep(round(sample_size / n_labels), n_labels)
319-
names(equal) <- labels
320+
n_classes <- length(class_areas)
321+
equal <- rep(round(sample_size / n_classes), n_classes)
322+
names(equal) <- names(class_areas)
320323
# find out the classes which are rare
321324
rare_classes <- prop[prop <= rare_class_prop]
322325
# Determine allocation possibilities
@@ -418,66 +421,44 @@ sits_stratified_sampling <- function(cube,
418421
.check_set_caller("sits_stratified_sampling")
419422
# check the cube is valid
420423
.check_raster_cube_files(cube)
421-
# check cube is class cube
422-
.check_is_class_cube(cube)
424+
# check the cube is valid
425+
.check_that(inherits(cube, "class_cube") ||
426+
inherits(cube, "class_vector_cube"))
423427
# get the labels
424428
labels <- .cube_labels(cube)
425429
n_labels <- length(labels)
426430
# check number of labels
427-
.check_that(nrow(sampling_design) == n_labels)
431+
.check_that(nrow(sampling_design) <= n_labels)
428432
# check names of labels
429433
.check_that(all(rownames(sampling_design) %in% labels))
430434
# check allocation method
431435
.check_that(alloc %in% colnames(sampling_design),
432-
msg = .conf("messages", "sits_sampling_design_alloc"))
436+
msg = .conf("messages", "sits_stratified_sampling_alloc"))
433437
# retrieve samples class
434438
samples_class <- unlist(sampling_design[, alloc])
435439
# check samples class
436440
.check_int_parameter(samples_class, is_named = TRUE,
437-
msg = .conf("messages", "sits_sampling_design_samples")
441+
msg = .conf("messages", "sits_stratified_sampling_samples")
438442
)
439443
.check_int_parameter(multicores, min = 1, max = 2048)
440444
.check_progress(progress)
441445
# name samples class
442-
names(samples_class) <- rownames(sampling_design)
446+
# names(samples_class) <- rownames(sampling_design)
443447
# include overhead
444448
samples_class <- ceiling(samples_class * overhead)
445-
# estimate size
446-
size <- ceiling(max(samples_class) / nrow(cube))
447-
# Prepare parallel processing
448-
.parallel_start(workers = multicores)
449-
on.exit(.parallel_stop(), add = TRUE)
450-
# Create assets as jobs
451-
cube_assets <- .cube_split_assets(cube)
452-
# Process each asset in parallel
453-
samples <- .jobs_map_parallel_dfr(cube_assets, function(tile) {
454-
robj <- .raster_open_rast(.tile_path(tile))
455-
cls <- data.frame(id = 1:n_labels,
456-
cover = labels)
457-
levels(robj) <- cls
458-
samples_sv <- terra::spatSample(
459-
x = robj,
460-
size = size,
461-
method = "stratified",
462-
as.points = TRUE
463-
)
464-
samples_sf <- sf::st_as_sf(samples_sv)
465-
samples_sf <- dplyr::mutate(samples_sf,
466-
label = labels[.data[["cover"]]])
467-
samples_sf <- sf::st_transform(samples_sf, crs = "EPSG:4326")
468-
}, progress = progress)
469449

470-
samples <- purrr::map_dfr(labels, function(lab) {
471-
samples_class <- samples |>
472-
dplyr::filter(.data[["label"]] == lab) |>
473-
dplyr::slice_sample(n = samples_class[lab])
474-
})
450+
# call function to allocate sample per strata
451+
samples <- .samples_alloc_strata(
452+
cube = cube,
453+
samples_class = samples_class,
454+
multicores = multicores)
455+
475456
if (.has(shp_file)) {
476457
.check_that(tools::file_ext(shp_file) == "shp",
477-
msg = .conf("messages", "sits_sampling_design_shp")
458+
msg = .conf("messages", "sits_stratified_sampling_shp")
478459
)
479460
sf::st_write(samples, shp_file, append = FALSE)
480-
message(.conf("messages", "sits_sampling_design_shp_save"), shp_file)
461+
message(.conf("messages", "sits_stratified_sampling_shp_save"), shp_file)
481462
}
482463
return(samples)
483464
}

inst/extdata/config_messages.yml

+6-5
Original file line numberDiff line numberDiff line change
@@ -410,11 +410,8 @@ sits_reduce_imbalance_samples: "number of samples to undersample for large class
410410
sits_resnet: "wrong input parameters - see example in documentation"
411411
sits_rfor: "wrong input parameters - see example in documentation"
412412
sits_sample: "invalid frac parameter - values should be btw 0.0 and 2.0"
413-
sits_sampling_design: "expected values of user's accuracy should contain names of labels"
414-
sits_sampling_design_alloc: "allocation method is not included in sampling design"
415-
sits_sampling_design_samples: "number of samples per allocation method should be integer"
416-
sits_sampling_design_shp: "invalid shapefile name"
417-
sits_sampling_design_shp_save: "saved allocation in shapefile"
413+
sits_sampling_design: "sampling design only runs in classified cubes"
414+
sits_sampling_design_labels: "names of classes in cube do not match labels in expected_ua"
418415
sits_select: "input should be a valid set of training samples or a non-classified data cube"
419416
sits_segment: "wrong input parameters - see example in documentation"
420417
sits_slic: "wrong input parameters - see example in documentation"
@@ -426,6 +423,10 @@ sits_som_evaluate_cluster: "wrong input data; please run sits_som_map first"
426423
sits_som_map: "number of samples should be greater than number of neurons"
427424
sits_som_map_grid_size: "recommended values for grid_xdim and grid_ydim are "
428425
sits_stratified_sampling: "labels in sampling design do not match labels in cube"
426+
sits_stratified_sampling_alloc: "allocation method is not included in sampling design"
427+
sits_stratified_sampling_samples: "number of samples per allocation method should be integer"
428+
sits_stratified_sampling_shp: "invalid shapefile name"
429+
sits_stratified_sampling_shp_save: "saved allocation in shapefile"
429430
sits_svm: "wrong input parameters - see example in documentation"
430431
sits_tae: "wrong input parameters - see example in documentation"
431432
sits_tempcnn: "wrong input parameters - see example in documentation"

man/plot.vector_cube.Rd

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/sits_sampling_design.Rd

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)