Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to secretbase::siphash13() #1262

Merged
merged 13 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Description: Pipeline tools coordinate the pieces of computationally
The methodology in this package
borrows from GNU 'Make' (2015, ISBN:978-9881443519)
and 'drake' (2018, <doi:10.21105/joss.00550>).
Version: 1.6.0.9000
Version: 1.6.0.9001
License: MIT + file LICENSE
URL: https://docs.ropensci.org/targets/, https://github.com/ropensci/targets
BugReports: https://github.com/ropensci/targets/issues
Expand Down Expand Up @@ -60,13 +60,12 @@ Imports:
cli (>= 2.0.2),
codetools (>= 0.2.16),
data.table (>= 1.12.8),
digest (>= 0.6.25),
igraph (>= 2.0.0),
knitr (>= 1.34),
ps,
R6 (>= 2.4.1),
rlang (>= 1.0.0),
secretbase,
secretbase (>= 0.4.0),
stats,
tibble (>= 3.0.1),
tidyselect (>= 1.1.0),
Expand Down
10 changes: 5 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

S3method(future_value_target,condition)
S3method(future_value_target,tar_target)
S3method(hash_object,"function")
S3method(hash_object,character)
S3method(hash_object,default)
S3method(hash_import_object,"function")
S3method(hash_import_object,character)
S3method(hash_import_object,default)
S3method(imports_init,default)
S3method(imports_init,tar_imports)
S3method(pipeline_from_list,default)
Expand Down Expand Up @@ -532,8 +532,6 @@ importFrom(data.table,fread)
importFrom(data.table,fwrite)
importFrom(data.table,rbindlist)
importFrom(data.table,set)
importFrom(digest,digest)
importFrom(digest,getVDigest)
importFrom(igraph,V)
importFrom(igraph,adjacent_vertices)
importFrom(igraph,as_edgelist)
Expand All @@ -557,6 +555,7 @@ importFrom(rlang,is_installed)
importFrom(rlang,quo_squash)
importFrom(rlang,warn)
importFrom(secretbase,sha3)
importFrom(secretbase,siphash13)
importFrom(stats,complete.cases)
importFrom(stats,runif)
importFrom(tibble,as_tibble)
Expand All @@ -573,6 +572,7 @@ importFrom(tidyselect,starts_with)
importFrom(tools,file_path_sans_ext)
importFrom(utils,browseURL)
importFrom(utils,capture.output)
importFrom(utils,compareVersion)
importFrom(utils,data)
importFrom(utils,globalVariables)
importFrom(utils,head)
Expand Down
9 changes: 8 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# targets 1.6.0.9000
# targets 1.6.0.9001

## Invalidating changes

* Use `secretbase::siphash13()` instead of `digest(algo = "xxhash64", serializationVersion = 3)` so hashes of in-memory objects no longer depend on serialization version 3 headers (#1244, @shikokuchuo). Unfortunately, pipelines built with earlier versions of `targets` will need to rerun.

## Other improvements

* Inform and prompt the user when the pipeline was built with an old version of `targets` and changes to the package will cause the current work to rerun (#1244). For the `tar_make*()` functions, `utils::menu()` prompts the user to give people a chance to downgrade if necessary.
* For type safety in the internal database class, read all columns as character vectors in `data.table::fread()`, then convert them to the correct types afterwards.

# targets 1.6.0

Expand Down
2 changes: 1 addition & 1 deletion R/class_aws.R
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ store_upload_object_aws <- function(store) {
invert = TRUE
)
store$file$path <- c(path, paste0("version=", head$VersionId))
store$file$hash <- digest_chr64(head$ETag)
store$file$hash <- hash_object(head$ETag)
invisible()
}

Expand Down
2 changes: 1 addition & 1 deletion R/class_command.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ command_init <- function(
expr <- as.expression(expr)
deps <- deps %|||% deps_function(embody_expr(expr))
string <- string %|||% mask_pointers(tar_deparse_safe(expr))
hash <- digest_chr64(string)
hash <- hash_object(string)
command_new(expr, packages, library, deps, seed, string, hash)
}

Expand Down
4 changes: 3 additions & 1 deletion R/class_crew.R
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,9 @@ database_crew <- function(path_store) {
database_init(
path = file.path(path_meta_dir(path_store), "crew"),
subkey = file.path(basename(path_meta("")), "crew"),
header = c("controller", "worker", "seconds", "targets")
header = c("controller", "worker", "seconds", "targets"),
integer_columns = "targets",
numeric_columns = "seconds"
)
}

Expand Down
74 changes: 69 additions & 5 deletions R/class_database.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ database_init <- function(
path = tempfile(),
subkey = basename(tempfile()),
header = "name",
logical_columns = character(0L),
integer_columns = character(0L),
numeric_columns = character(0L),
list_columns = character(0L),
list_column_modes = character(0L),
repository = tar_options$get_repository_meta(),
Expand All @@ -19,6 +22,9 @@ database_init <- function(
path = path,
key = key,
header = header,
logical_columns = logical_columns,
integer_columns = integer_columns,
numeric_columns = numeric_columns,
list_columns = list_columns,
list_column_modes = list_column_modes,
resources = resources
Expand All @@ -28,6 +34,9 @@ database_init <- function(
path = path,
key = key,
header = header,
logical_columns = logical_columns,
integer_columns = integer_columns,
numeric_columns = numeric_columns,
list_columns = list_columns,
list_column_modes = list_column_modes,
resources = resources
Expand All @@ -37,6 +46,9 @@ database_init <- function(
path = path,
key = key,
header = header,
logical_columns = logical_columns,
integer_columns = integer_columns,
numeric_columns = numeric_columns,
list_columns = list_columns,
list_column_modes = list_column_modes,
resources = resources
Expand All @@ -59,6 +71,9 @@ database_class <- R6::R6Class(
path = NULL,
key = NULL,
header = NULL,
logical_columns = NULL,
integer_columns = NULL,
numeric_columns = NULL,
list_columns = NULL,
list_column_modes = NULL,
resources = NULL,
Expand All @@ -69,6 +84,9 @@ database_class <- R6::R6Class(
path = NULL,
key = NULL,
header = NULL,
logical_columns = NULL,
integer_columns = NULL,
numeric_columns = NULL,
list_columns = NULL,
list_column_modes = NULL,
resources = NULL,
Expand All @@ -78,6 +96,9 @@ database_class <- R6::R6Class(
self$path <- path
self$key <- key
self$header <- header
self$logical_columns <- logical_columns
self$integer_columns <- integer_columns
self$numeric_columns <- numeric_columns
self$list_columns <- list_columns
self$list_column_modes <- list_column_modes
self$resources <- resources
Expand Down Expand Up @@ -275,9 +296,28 @@ database_class <- R6::R6Class(
sep = database_sep_outer,
fill = TRUE,
na.strings = "",
encoding = encoding
encoding = encoding,
colClasses = "character"
)
out <- as_data_frame(out)
for (name in self$logical_columns) {
value <- out[[name]]
if (!is.null(value)) {
out[[name]] <- as.logical(value)
}
}
for (name in self$integer_columns) {
value <- out[[name]]
if (!is.null(value)) {
out[[name]] <- as.integer(value)
}
}
for (name in self$numeric_columns) {
value <- out[[name]]
if (!is.null(value)) {
out[[name]] <- as.numeric(value)
}
}
if (nrow(out) < 1L) {
return(out)
}
Expand Down Expand Up @@ -387,9 +427,23 @@ database_class <- R6::R6Class(
invisible()
}
},
validate_columns = function(header, list_columns) {
if (!all(list_columns %in% header)) {
tar_throw_validate("all list columns must be in the header")
validate_columns = function(
header,
logical_columns,
integer_columns,
numeric_columns,
list_columns
) {
special_columns <- c(
logical_columns,
integer_columns,
numeric_columns,
list_columns
)
if (!all(special_columns %in% header)) {
tar_throw_validate(
"all logical/integer/numeric/list columns must be in the header"
)
}
if (!is.null(header) && !("name" %in% header)) {
tar_throw_validate("header must have a column called \"name\"")
Expand All @@ -416,7 +470,6 @@ database_class <- R6::R6Class(
},
validate = function() {
memory_validate(self$memory)
self$validate_columns(self$header, self$list_columns)
self$validate_file()
tar_assert_chr(self$path)
tar_assert_scalar(self$path)
Expand All @@ -427,7 +480,18 @@ database_class <- R6::R6Class(
tar_assert_none_na(self$key)
tar_assert_nzchar(self$key)
tar_assert_chr(self$header)
tar_assert_chr(self$logical_columns)
tar_assert_chr(self$integer_columns)
tar_assert_chr(self$numeric_columns)
tar_assert_chr(self$list_columns)
tar_assert_chr(self$list_column_modes)
self$validate_columns(
self$header,
self$logical_columns,
self$integer_columns,
self$numeric_columns,
self$list_columns
)
}
)
)
Expand Down
6 changes: 6 additions & 0 deletions R/class_database_aws.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ database_aws_new <- function(
path = NULL,
key = NULL,
header = NULL,
logical_columns = NULL,
integer_columns = NULL,
numeric_columns = NULL,
list_columns = NULL,
list_column_modes = NULL,
buffer = NULL,
Expand All @@ -15,6 +18,9 @@ database_aws_new <- function(
path = path,
key = key,
header = header,
logical_columns = logical_columns,
integer_columns = integer_columns,
numeric_columns = numeric_columns,
list_columns = list_columns,
list_column_modes = list_column_modes,
buffer = buffer,
Expand Down
6 changes: 6 additions & 0 deletions R/class_database_gcp.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ database_gcp_new <- function(
path = NULL,
key = NULL,
header = NULL,
logical_columns = NULL,
integer_columns = NULL,
numeric_columns = NULL,
list_columns = NULL,
list_column_modes = NULL,
buffer = NULL,
Expand All @@ -15,6 +18,9 @@ database_gcp_new <- function(
path = path,
key = key,
header = header,
logical_columns = logical_columns,
integer_columns = integer_columns,
numeric_columns = numeric_columns,
list_columns = list_columns,
list_column_modes = list_column_modes,
buffer = buffer,
Expand Down
6 changes: 6 additions & 0 deletions R/class_database_local.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ database_local_new <- function(
path = NULL,
key = NULL,
header = NULL,
logical_columns = NULL,
integer_columns = NULL,
numeric_columns = NULL,
list_columns = NULL,
list_column_modes = NULL,
resources = NULL,
Expand All @@ -13,6 +16,9 @@ database_local_new <- function(
path = path,
key = key,
header = header,
logical_columns = logical_columns,
integer_columns = integer_columns,
numeric_columns = numeric_columns,
list_columns = list_columns,
list_column_modes = list_column_modes,
resources = resources,
Expand Down
8 changes: 4 additions & 4 deletions R/class_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,13 +150,13 @@ file_list_files <- function(path) {
file_hash <- function(files) {
n <- length(files)
if (identical(n, 0L)) {
return(null64)
return(hash_null)
}
hash <- digest_file64(files)
hash <- map_chr(x = files, f = hash_file, USE.NAMES = FALSE)
if (identical(n, 1L)) {
return(hash)
}
digest_chr64(paste(hash, collapse = ""))
hash_object(paste(hash, collapse = ""))
}

file_info <- function(files) {
Expand Down Expand Up @@ -189,7 +189,7 @@ file_bytes <- function(info) {
}

file_size <- function(bytes) {
digest_obj64(bytes)
hash_object(bytes)
}

file_diff_chr <- function(dbl) {
Expand Down
2 changes: 1 addition & 1 deletion R/class_gcp.R
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ store_upload_object_gcp <- function(store) {
invert = TRUE
)
store$file$path <- c(path, paste0("version=", head$generation))
store$file$hash <- digest_chr64(head$md5)
store$file$hash <- hash_object(head$md5)
invisible()
}

Expand Down
2 changes: 1 addition & 1 deletion R/class_inventory_aws.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ inventory_aws_class <- R6::R6Class(
)
for (key in names(results)) {
name <- self$get_name(key = key, bucket = bucket)
self$cache[[name]] <- digest_chr64(results[[key]])
self$cache[[name]] <- hash_object(results[[key]])
}
}
)
Expand Down
2 changes: 1 addition & 1 deletion R/class_inventory_gcp.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ inventory_gcp_class <- R6::R6Class(
)
for (key in names(results)) {
name <- self$get_name(key = key, bucket = bucket)
self$cache[[name]] <- digest_chr64(results[[key]])
self$cache[[name]] <- hash_object(results[[key]])
}
}
)
Expand Down
2 changes: 1 addition & 1 deletion R/class_mermaid.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ mermaid_class <- R6::R6Class(
produce_mermaid_vertices = function(data) {
sprintf(
"%s%s%s%s:::%s",
sprintf("x%s", as.character(map_chr(data$name, digest_chr64))),
sprintf("x%s", as.character(map_chr(data$name, hash_object))),
data$open,
sprintf("\"%s\"", data$label),
data$close,
Expand Down
4 changes: 3 additions & 1 deletion R/class_meta.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ meta_class <- R6::R6Class(
)
hashes <- hashes[nzchar(hashes)]
string <- paste(c(names(hashes), hashes), collapse = "")
digest_chr64(string)
hash_object(string)
},
produce_depend = function(target, pipeline) {
self$hash_deps(target$command$deps, pipeline)
Expand Down Expand Up @@ -160,6 +160,8 @@ database_meta <- function(path_store) {
path = path_meta(path_store = path_store),
subkey = file.path(basename(path_meta("")), "meta"),
header = header_meta(),
integer_columns = "seed",
numeric_columns = c("bytes", "seconds"),
list_columns = c("path", "children"),
list_column_modes = c("character", "character")
)
Expand Down
Loading
Loading