31
31
# ' @param min_uncert Minimum uncertainty value to select a sample.
32
32
# ' @param sampling_window Window size for collecting points (in pixels).
33
33
# ' The minimum window size is 10.
34
+ # ' @param multicores Number of workers for parallel processing
35
+ # ' (integer, min = 1, max = 2048).
36
+ # ' @param memsize Maximum overall memory (in GB) to run the
37
+ # ' function.
34
38
# '
35
39
# ' @return
36
40
# ' A tibble with longitude and latitude in WGS84 with locations
75
79
sits_uncertainty_sampling <- function (uncert_cube ,
76
80
n = 100L ,
77
81
min_uncert = 0.4 ,
78
- sampling_window = 10L ) {
82
+ sampling_window = 10L ,
83
+ multicores = 1L ,
84
+ memsize = 1L ) {
79
85
.check_set_caller(" sits_uncertainty_sampling" )
80
-
81
86
# Pre-conditions
82
87
.check_is_uncert_cube(uncert_cube )
83
88
.check_int_parameter(n , min = 1 , max = 10000 )
84
89
.check_num_parameter(min_uncert , min = 0.2 , max = 1.0 )
85
90
.check_int_parameter(sampling_window , min = 10L )
86
-
91
+ .check_int_parameter(multicores , min = 1 , max = 2048 )
92
+ .check_int_parameter(memsize , min = 1 , max = 16384 )
93
+ # Get block size
94
+ block <- .raster_file_blocksize(.raster_open_rast(.tile_path(uncert_cube )))
95
+ # Overlapping pixels
96
+ overlap <- ceiling(sampling_window / 2 ) - 1
97
+ # Check minimum memory needed to process one block
98
+ job_memsize <- .jobs_memsize(
99
+ job_size = .block_size(block = block , overlap = overlap ),
100
+ npaths = sampling_window ,
101
+ nbytes = 8 ,
102
+ proc_bloat = .conf(" processing_bloat_cpu" )
103
+ )
104
+ # Update multicores parameter
105
+ multicores <- .jobs_max_multicores(
106
+ job_memsize = job_memsize ,
107
+ memsize = memsize ,
108
+ multicores = multicores
109
+ )
110
+ # Update block parameter
111
+ block <- .jobs_optimal_block(
112
+ job_memsize = job_memsize ,
113
+ block = block ,
114
+ image_size = .tile_size(.tile(uncert_cube )),
115
+ memsize = memsize ,
116
+ multicores = multicores
117
+ )
118
+ # Prepare parallel processing
119
+ .parallel_start(workers = multicores )
120
+ on.exit(.parallel_stop(), add = TRUE )
87
121
# Slide on cube tiles
88
122
samples_tb <- slider :: slide_dfr(uncert_cube , function (tile ) {
89
- path <- .tile_path(tile )
123
+ # Create chunks as jobs
124
+ chunks <- .tile_chunks_create(
125
+ tile = tile ,
126
+ overlap = overlap ,
127
+ block = block
128
+ )
129
+ # Tile path
130
+ tile_path <- .tile_path(tile )
90
131
# Get a list of values of high uncertainty
91
- top_values <- .raster_open_rast(path ) | >
132
+ # Process jobs in parallel
133
+ top_values <- .jobs_map_parallel_dfr(chunks , function (chunk ) {
134
+ # Read and preprocess values
135
+ .raster_open_rast(tile_path ) | >
92
136
.raster_get_top_values(
93
- band = 1 ,
94
- n = n ,
137
+ block = .block(chunk ),
138
+ band = 1 ,
139
+ n = n ,
95
140
sampling_window = sampling_window
96
141
) | >
97
142
dplyr :: mutate(
@@ -105,6 +150,7 @@ sits_uncertainty_sampling <- function(uncert_cube,
105
150
c(" longitude" , " latitude" , " value" )
106
151
)) | >
107
152
tibble :: as_tibble()
153
+ })
108
154
# All the cube's uncertainty images have the same start & end dates.
109
155
top_values [[" start_date" ]] <- .tile_start_date(tile )
110
156
top_values [[" end_date" ]] <- .tile_end_date(tile )
@@ -174,6 +220,10 @@ sits_uncertainty_sampling <- function(uncert_cube,
174
220
# ' @param min_margin Minimum margin of confidence to select a sample
175
221
# ' @param sampling_window Window size for collecting points (in pixels).
176
222
# ' The minimum window size is 10.
223
+ # ' @param multicores Number of workers for parallel processing
224
+ # ' (integer, min = 1, max = 2048).
225
+ # ' @param memsize Maximum overall memory (in GB) to run the
226
+ # ' function.
177
227
# '
178
228
# ' @return
179
229
# ' A tibble with longitude and latitude in WGS84 with locations
@@ -204,54 +254,92 @@ sits_uncertainty_sampling <- function(uncert_cube,
204
254
sits_confidence_sampling <- function (probs_cube ,
205
255
n = 20L ,
206
256
min_margin = 0.90 ,
207
- sampling_window = 10L ) {
257
+ sampling_window = 10L ,
258
+ multicores = 1L ,
259
+ memsize = 1L ) {
208
260
.check_set_caller(" sits_confidence_sampling" )
209
-
210
261
# Pre-conditions
211
262
.check_is_probs_cube(probs_cube )
212
263
.check_int_parameter(n , min = 20 )
213
264
.check_num_parameter(min_margin , min = 0.01 , max = 1.0 )
214
265
.check_int_parameter(sampling_window , min = 10 )
215
-
266
+ .check_int_parameter(multicores , min = 1 , max = 2048 )
267
+ .check_int_parameter(memsize , min = 1 , max = 16384 )
268
+ # Get block size
269
+ block <- .raster_file_blocksize(.raster_open_rast(.tile_path(probs_cube )))
270
+ # Overlapping pixels
271
+ overlap <- ceiling(sampling_window / 2 ) - 1
272
+ # Check minimum memory needed to process one block
273
+ job_memsize <- .jobs_memsize(
274
+ job_size = .block_size(block = block , overlap = overlap ),
275
+ npaths = sampling_window ,
276
+ nbytes = 8 ,
277
+ proc_bloat = .conf(" processing_bloat_cpu" )
278
+ )
279
+ # Update multicores parameter
280
+ multicores <- .jobs_max_multicores(
281
+ job_memsize = job_memsize ,
282
+ memsize = memsize ,
283
+ multicores = multicores
284
+ )
285
+ # Update block parameter
286
+ block <- .jobs_optimal_block(
287
+ job_memsize = job_memsize ,
288
+ block = block ,
289
+ image_size = .tile_size(.tile(probs_cube )),
290
+ memsize = memsize ,
291
+ multicores = multicores
292
+ )
293
+ # Prepare parallel processing
294
+ .parallel_start(workers = multicores )
295
+ on.exit(.parallel_stop(), add = TRUE )
216
296
# get labels
217
297
labels <- sits_labels(probs_cube )
218
-
219
298
# Slide on cube tiles
220
299
samples_tb <- slider :: slide_dfr(probs_cube , function (tile ) {
221
- # Open raster
222
- r_obj <- .raster_open_rast(.tile_path(tile ))
223
-
224
- # Get samples for each label
225
- purrr :: map2_dfr(labels , seq_along(labels ), function (lab , i ) {
226
- # Get a list of values of high confidence & apply threshold
227
- top_values <- r_obj | >
228
- .raster_get_top_values(
229
- band = i ,
230
- n = n ,
231
- sampling_window = sampling_window
232
- ) | >
233
- dplyr :: mutate(
234
- value = .data [[" value" ]] *
235
- .conf(" probs_cube_scale_factor" )
236
- ) | >
237
- dplyr :: filter(
238
- .data [[" value" ]] > = min_margin
239
- ) | >
240
- dplyr :: select(dplyr :: matches(
241
- c(" longitude" , " latitude" , " value" )
242
- )) | >
243
- tibble :: as_tibble()
300
+ # Create chunks as jobs
301
+ chunks <- .tile_chunks_create(
302
+ tile = tile ,
303
+ overlap = overlap ,
304
+ block = block
305
+ )
306
+ # Tile path
307
+ tile_path <- .tile_path(tile )
308
+ # Get a list of values of high uncertainty
309
+ # Process jobs in parallel
310
+ .jobs_map_parallel_dfr(chunks , function (chunk ) {
311
+ # Get samples for each label
312
+ purrr :: map2_dfr(labels , seq_along(labels ), function (lab , i ) {
313
+ # Get a list of values of high confidence & apply threshold
314
+ top_values <- .raster_open_rast(tile_path ) | >
315
+ .raster_get_top_values(
316
+ block = .block(chunk ),
317
+ band = i ,
318
+ n = n ,
319
+ sampling_window = sampling_window
320
+ ) | >
321
+ dplyr :: mutate(
322
+ value = .data [[" value" ]] *
323
+ .conf(" probs_cube_scale_factor" )
324
+ ) | >
325
+ dplyr :: filter(
326
+ .data [[" value" ]] > = min_margin
327
+ ) | >
328
+ dplyr :: select(dplyr :: matches(
329
+ c(" longitude" , " latitude" , " value" )
330
+ )) | >
331
+ tibble :: as_tibble()
244
332
245
- # All the cube's uncertainty images have the same start &
246
- # end dates.
247
- top_values [[" start_date" ]] <- .tile_start_date(tile )
248
- top_values [[" end_date" ]] <- .tile_end_date(tile )
249
- top_values [[" label" ]] <- lab
333
+ # All the cube's uncertainty images have the same start &
334
+ # end dates.
335
+ top_values [[" start_date" ]] <- .tile_start_date(tile )
336
+ top_values [[" end_date" ]] <- .tile_end_date(tile )
337
+ top_values [[" label" ]] <- lab
250
338
251
- return (top_values )
339
+ return (top_values )
340
+ })
252
341
})
253
342
})
254
-
255
343
# Slice result samples
256
344
result_tb <- samples_tb | >
257
345
dplyr :: group_by(.data [[" label" ]]) | >
0 commit comments