240
240
# ' value of `clobberable_versions_start` does not fully trust these empty
241
241
# ' updates, and assumes that any version `>= max(x$version)` could be
242
242
# ' clobbered.) If `nrow(x) == 0`, then this argument is mandatory.
243
+ # ' @param compactify_tol double. the tolerance used to detect approximate equality for compactification
243
244
# ' @return An `epi_archive` object.
244
245
# '
245
246
# ' @importFrom data.table as.data.table key setkeyv
@@ -295,15 +296,16 @@ new_epi_archive <- function(
295
296
additional_metadata ,
296
297
compactify ,
297
298
clobberable_versions_start ,
298
- versions_end ) {
299
+ versions_end ,
300
+ compactify_tol = .Machine $ double.eps ^ 0.5 ) {
299
301
# Create the data table; if x was an un-keyed data.table itself,
300
302
# then the call to as.data.table() will fail to set keys, so we
301
303
# need to check this, then do it manually if needed
302
304
key_vars <- c(" geo_value" , " time_value" , other_keys , " version" )
303
- DT <- as.data.table(x , key = key_vars ) # nolint: object_name_linter
304
- if (! identical(key_vars , key(DT ))) setkeyv(DT , cols = key_vars )
305
+ data_table <- as.data.table(x , key = key_vars ) # nolint: object_name_linter
306
+ if (! identical(key_vars , key(data_table ))) setkeyv(data_table , cols = key_vars )
305
307
306
- if (anyDuplicated(DT , by = key(DT )) != 0L ) {
308
+ if (anyDuplicated(data_table , by = key(data_table )) != 0L ) {
307
309
cli_abort(" `x` must have one row per unique combination of the key variables. If you
308
310
have additional key variables other than `geo_value`, `time_value`, and
309
311
`version`, such as an age group column, please specify them in `other_keys`.
@@ -313,15 +315,15 @@ new_epi_archive <- function(
313
315
)
314
316
}
315
317
316
- nrow_before_compactify <- nrow(DT )
318
+ nrow_before_compactify <- nrow(data_table )
317
319
# Runs compactify on data frame
318
320
if (is.null(compactify ) || compactify == TRUE ) {
319
- DT <- compactify_tibble(DT , key_vars ) # nolint: object_name_linter
321
+ data_table <- compactify_tibble(data_table , key_vars , compactify_tol )
320
322
}
321
323
# Warns about redundant rows if the number of rows decreased, and we didn't
322
324
# explicitly say to compactify
323
- if (is.null(compactify ) && nrow(DT ) < nrow_before_compactify ) {
324
- elim <- removed_by_compactify(DT , key_vars )
325
+ if (is.null(compactify ) && nrow(data_table ) < nrow_before_compactify ) {
326
+ elim <- removed_by_compactify(data_table , key_vars , compactify_tol )
325
327
warning_intro <- cli :: format_inline(
326
328
" Found rows that appear redundant based on
327
329
last (version of each) observation carried forward;
@@ -343,7 +345,7 @@ new_epi_archive <- function(
343
345
344
346
structure(
345
347
list (
346
- DT = DT ,
348
+ DT = data_table ,
347
349
geo_type = geo_type ,
348
350
time_type = time_type ,
349
351
additional_metadata = additional_metadata ,
@@ -362,51 +364,51 @@ new_epi_archive <- function(
362
364
# ' changed, and so is kept.
363
365
# ' @keywords internal
364
366
# ' @importFrom dplyr filter
365
- compactify_tibble <- function (df , keys ) {
367
+ compactify_tibble <- function (df , keys , tolerance = .Machine $ double.eps ^ .5 ) {
366
368
df %> %
367
369
arrange(!!! keys ) %> %
368
370
filter(if_any(
369
371
c(everything(), - version ), # all non-version columns
370
- ~ ! is_locf(. )
372
+ ~ ! is_locf(. , tolerance )
371
373
))
372
374
}
373
375
374
376
# ' get the entries that `compactify_tibble` would remove
375
377
# ' @keywords internal
376
378
# ' @importFrom dplyr filter if_all everything
377
- removed_by_compactify <- function (df , keys ) {
379
+ removed_by_compactify <- function (df , keys , tolerance ) {
378
380
df %> %
379
381
arrange(!!! keys ) %> %
380
- filter(if_all (
382
+ filter(if_any (
381
383
c(everything(), - version ),
382
- ~ is_locf(. )
384
+ ~ is_locf(. , tolerance )
383
385
)) # nolint: object_usage_linter
384
386
}
385
387
386
388
# ' Checks to see if a value in a vector is LOCF
387
389
# ' @description
388
390
# ' LOCF meaning last observation carried forward. lags the vector by 1, then
389
- # ' compares with itself. For floats it uses float comparison, otherwise it
390
- # ' uses equality.
391
- # ' `NA`'s are considered equal to `NA`'s, while nan's are not .
391
+ # ' compares with itself. For doubles it uses float comparison via
392
+ # ' [`dplyr::near`], otherwise it uses equality. `NA`'s and `NaN`'s are
393
+ # ' considered equal to themselves and each other .
392
394
# ' @importFrom dplyr lag if_else near
393
395
# ' @keywords internal
394
- is_locf <- function (vec ) { # nolint: object_usage_linter
395
- lag_vec <- lag(vec )
396
+ is_locf <- function (vec , tolerance ) { # nolint: object_usage_linter
397
+ lag_vec <- dplyr :: lag(vec )
396
398
if (typeof(vec ) == " double" ) {
397
- if_else(
399
+ res <- if_else(
398
400
! is.na(vec ) & ! is.na(lag_vec ),
399
- near(vec , lag_vec ),
401
+ near(vec , lag_vec , tol = tolerance ),
400
402
is.na(vec ) & is.na(lag_vec )
401
- ) % > %
402
- return ()
403
+ )
404
+ return (res )
403
405
} else {
404
- if_else(
406
+ res <- if_else(
405
407
! is.na(vec ) & ! is.na(lag_vec ),
406
408
vec == lag_vec ,
407
409
is.na(vec ) & is.na(lag_vec )
408
- ) % > %
409
- return ()
410
+ )
411
+ return (res )
410
412
}
411
413
}
412
414
0 commit comments