Skip to content

Commit

Permalink
Add tar_format_nanoparquet()
Browse files Browse the repository at this point in the history
  • Loading branch information
wlandau committed Sep 25, 2024
1 parent 3ac3c2a commit 14606ee
Show file tree
Hide file tree
Showing 9 changed files with 208 additions and 1 deletion.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ Imports:
Suggests:
curl (>= 4.3),
knitr (>= 1.28),
nanoparquet,
quarto (>= 1.4),
rmarkdown (>= 2.1),
testthat (>= 3.0.0),
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ export(tar_force)
export(tar_force_change)
export(tar_format_aws_feather)
export(tar_format_feather)
export(tar_format_nanoparquet)
export(tar_fst)
export(tar_fst_dt)
export(tar_fst_tbl)
Expand Down Expand Up @@ -91,6 +92,9 @@ export(tar_map2_size)
export(tar_map2_size_raw)
export(tar_map_rep)
export(tar_map_rep_raw)
export(tar_nanoparquet_convert)
export(tar_nanoparquet_read)
export(tar_nanoparquet_write)
export(tar_parquet)
export(tar_plan)
export(tar_qs)
Expand Down Expand Up @@ -201,6 +205,7 @@ importFrom(targets,tar_deparse_safe)
importFrom(targets,tar_dir)
importFrom(targets,tar_envir)
importFrom(targets,tar_exist_meta)
importFrom(targets,tar_format)
importFrom(targets,tar_group)
importFrom(targets,tar_load)
importFrom(targets,tar_meta)
Expand Down
96 changes: 96 additions & 0 deletions R/tar_format_nanoparquet.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#' @title Nanoparquet format
#' @export
#' @keywords storage formats
#' @description Nanoparquet storage format for data frames.
#' Uses [nanoparquet::read_parquet()] and [nanoparquet::write_parquet()]
#' to read and write data frames returned by targets in a pipeline.
#' Note: attributes such as `dplyr` row groupings and `posterior`
#' draws info are dropped during the writing process.
#' @return A [targets::tar_format()] storage format specification string
#' that can be directly supplied to the `format` argument of
#' [targets::tar_target()] or [targets::tar_option_set()].
#' @param compression Character string, compression type for saving the
#' data. See the `compression` argument of [nanoparquet::write_parquet()]
#' for details.
#' @param class Character vector with the data frame subclasses to assign.
#' See the `class` argument of [nanoparquet::parquet_options()] for details.
#' @examples
#' if (identical(Sys.getenv("TAR_LONG_EXAMPLES"), "true")) {
#' targets::tar_dir({ # tar_dir() runs code from a temporary directory.
#' targets::tar_script({
#' library(targets)
#' libary(tarchetypes)
#' list(
#' tar_target(
#' name = data,
#' command = airquality,
#' format = tar_format_nanoparquet()
#' )
#' )
#' })
#' tar_make()
#' tar_read(data)
#' })
#' }
tar_format_nanoparquet <- function(compression = "snappy", class = "tbl") {
rlang::check_installed("nanoparquet")
read <- function(path) {}
body(read) <- substitute(
tarchetypes::tar_nanoparquet_read(path, class),
env = list(class = class)
)
write <- function(object, path) {}
body(write) <- substitute(
tarchetypes::tar_nanoparquet_write(object, path, compression),
env = list(compression = compression)
)
convert <- function(object) {}
body(convert) <- substitute(
tarchetypes::tar_nanoparquet_convert(object, class),
env = list(class = class)
)
targets::tar_format(read = read, write = write, convert = convert)
}

#' @title Nanoparquet read method
#' @export
#' @keywords internal
#' @description Internal function.
#' @param path Path to the data.
#' @param class S3 classes to assign to the returned object.
tar_nanoparquet_read <- function(path, class) {
nanoparquet::read_parquet(
file = path,
options = nanoparquet::parquet_options(
class = class,
use_arrow_metadata = TRUE
)
)
}

#' @title Nanoparquet write method
#' @export
#' @keywords internal
#' @description Internal function.
#' @param object R object to save.
#' @param path Path to the data.
#' @param compression Compression type.
tar_nanoparquet_write <- function(object, path, compression) {
nanoparquet::write_parquet(
x = object,
file = path,
compression = compression,
options = nanoparquet::parquet_options(write_arrow_metadata = TRUE)
)
}

#' @title Nanoparquet convert method
#' @export
#' @keywords internal
#' @description Internal function.
#' @param object R object to convert.
#' @param class S3 classes to assign to the returned object.
tar_nanoparquet_convert <- function(object, class) {
class(object) <- c(class, "data.frame")
object
}
2 changes: 1 addition & 1 deletion R/tar_package.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#' tar_assert_scalar tar_assert_target tar_assert_target_list
#' tar_assert_true tar_assert_unique tar_assert_unique_targets
#' tar_cue tar_definition tar_deparse_language tar_deparse_safe
#' tar_dir tar_envir tar_exist_meta tar_group tar_load tar_meta
#' tar_dir tar_envir tar_exist_meta tar_format tar_group tar_load tar_meta
#' tar_option_get tar_option_set tar_path_target tar_read
#' tar_runtime_object tar_script tar_seed_create tar_seed_set
#' tar_target tar_target_raw tar_test tar_tidy_eval
Expand Down
3 changes: 3 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ reference:
- title: Target factories for storage formats
contents:
- 'tar_formats'
- title: Storage formats
contents:
- 'tar_format_nanoparquet'
- title: Simple files
contents:
- 'tar_file_read'
Expand Down
49 changes: 49 additions & 0 deletions man/tar_format_nanoparquet.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/tar_nanoparquet_convert.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/tar_nanoparquet_read.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/tar_nanoparquet_write.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 14606ee

Please sign in to comment.