From 8d5d3f639270e3e79bed4d191896031a47defb2d Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 7 Feb 2025 10:58:40 +0100 Subject: [PATCH 1/8] draft stat_chain --- R/stat-chain.R | 127 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 R/stat-chain.R diff --git a/R/stat-chain.R b/R/stat-chain.R new file mode 100644 index 0000000000..c7840048e3 --- /dev/null +++ b/R/stat-chain.R @@ -0,0 +1,127 @@ +stat_chain <- function( + mapping = NULL, + data = NULL, + geom = "path", + position = "identity", + ..., + stats = "identity", + stat.params = list(), + redirect = list(), + na.rm = FALSE, + show.legend = NA, + inherit.aes = TRUE) { + + layer( + data = data, + mapping = mapping, + stat = StatChain, + geom = geom, + position = position, + show.legend = show.legend, + inherit.aes = inherit.aes, + params = list2( + na.rm = na.rm, + stats = stats, + stat.params = stat.params, + redirect = redirect, + ... + ) + ) +} + +StatChain <- ggproto( + "StatChain", Stat, + + extra_params = c("na.rm", "stats", "stat.params", "redirect"), + + setup_params = function(data, params) { + params$stats <- lapply(params$stats, validate_subclass, subclass = "Stat") + n_stats <- length(params$stats) + + params$stat.params <- force_length( + params$stat.params, n_stats, + warn_longer = TRUE, arg = "stat.params" + ) + + params$redirect <- force_length( + params$redirect, n_stats, + warn_longer = TRUE, arg = "redirect" + ) + + params + }, + + compute_layer = function(self, data, params, layout) { + + n_stats <- length(params$stats) + + for (i in seq_len(n_stats)) { + stat <- params$stats[[i]] + param <- params$stat.params[[i]] + + # We repeat the `layer()` duty of rejecting unknown parameters + valid <- stat$parameters(TRUE) + extra_param <- setdiff(names(param), valid) + if (length(extra_param) > 0) { + cli::cli_warn("Ignoring unknown parameters: {.arg {extra_param}}.") + } + param <- param[intersect(names(param), valid)] + if (length(param) < 1) { + param <- list() + } + + # Repeat `Layer$compute_statistic()` duty + computed_param <- stat$setup_params(data, param) + computed_param$na.rm <- computed_param$na.rm %||% params$na.rm + data <- stat$setup_data(data, computed_param) + data <- stat$compute_layer(data, computed_param, layout) + if (nrow(data) < 1) { + return(data) + } + + # Repeat `Layer$map_statistic()` duty, skipping backtransforms and such + aes <- stat$default_aes[is_calculated_aes(stat$default_aes)] + aes <- aes[setdiff(names(aes), names(data))] + aes <- compact(defaults(params$redirect[[i]], aes)) + if (length(aes) == 0) { + next + } + new <- eval_aesthetics(substitute_aes(aes), data) + check_nondata_cols( + new, aes, + problem = "Aesthetics must be valid computed stats.", + hint = "Did you specify the `redirect` argument correctly?" + ) + data[names(new)] <- new + } + + data + } +) + +force_length <- function(x, n = length(x), padding = list(NULL), + warn_longer = FALSE, warn_shorter = FALSE, + arg = caller_arg(x)) { + force(arg) + nx <- length(x) + if (nx == n) { + return(x) + } + n_pad <- n - nx + if (n_pad > 0) { + x <- c(x, rep(padding, length = n_pad)) + if (isTRUE(warn_shorter)) { + cli::cli_warn( + "Padded {.arg {arg}} with {n_pad} element{?s}." + ) + } + } else if (n_pad < 0) { + x <- x[seq_len(n)] + if (isTRUE(warn_longer)) { + cli::cli_warn( + "Dropped {abs(n_pad)} excess element{?s} from {.arg {arg}}." + ) + } + } + x +} From 0e75964a32eafc504acd2ea39d20cff907737d61 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 7 Feb 2025 11:38:13 +0100 Subject: [PATCH 2/8] document --- DESCRIPTION | 1 + NAMESPACE | 2 + R/stat-chain.R | 38 +++++++++++ man/ggplot2-ggproto.Rd | 11 ++-- man/stat_chain.Rd | 145 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 192 insertions(+), 5 deletions(-) create mode 100644 man/stat_chain.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 753e7dd49a..d85c7f6c98 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -247,6 +247,7 @@ Collate: 'stat-bindot.R' 'stat-binhex.R' 'stat-boxplot.R' + 'stat-chain.R' 'stat-contour.R' 'stat-count.R' 'stat-density-2d.R' diff --git a/NAMESPACE b/NAMESPACE index b58765ecc1..e34f2fd24f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -257,6 +257,7 @@ export(StatBin2d) export(StatBindot) export(StatBinhex) export(StatBoxplot) +export(StatChain) export(StatContour) export(StatContourFilled) export(StatCount) @@ -684,6 +685,7 @@ export(stat_bin_2d) export(stat_bin_hex) export(stat_binhex) export(stat_boxplot) +export(stat_chain) export(stat_contour) export(stat_contour_filled) export(stat_count) diff --git a/R/stat-chain.R b/R/stat-chain.R index c7840048e3..313f54a2bd 100644 --- a/R/stat-chain.R +++ b/R/stat-chain.R @@ -1,3 +1,37 @@ +#' Chain statistic computation +#' +#' This statistic layer can take multiple stats and chain these together +#' to transform the data in a series of computations. +#' +#' @inheritParams layer +#' @inheritParams geom_point +#' @param stats A character vector or list of statistical transformations to use +#' for this layer. Every element needs to be one of the following: +#' * A `Stat` ggproto subclass, for example `StatCount` +#' * A string naming the stat. To give the stat as a string, strip the +#' function name of the `stat_` prefix. For example, to use `stat_count()`, +#' give the stat as `"count"`. +#' @param stat.params A list of parameters parallel to the `stats` argument. +#' Use `NULL` elements to declare no parameters. +#' @param redirect A list of mappings parallel to the `stats` argument that +#' are evaluated after the stat has been computed. +#' +#' @export +#' +#' @examples +#' p <- ggplot(mpg, aes(displ, colour = drv)) +#' # Binning unique observations +#' p + stat_chain(stats = c("unique", "bin")) +#' # Controlling parameters +#' p + stat_chain( +#' stats = c("unique", "bin"), +#' stat.params = list(NULL, list(bins = 10)) +#' ) +#' # Evaluate expressions after computing stats +#' p + stat_chain( +#' stats = c("unique", "bin"), +#' redirect = list(aes(x = x + 1), aes(y = density)) +#' ) stat_chain <- function( mapping = NULL, data = NULL, @@ -29,6 +63,10 @@ stat_chain <- function( ) } +#' @rdname ggplot2-ggproto +#' @format NULL +#' @usage NULL +#' @export StatChain <- ggproto( "StatChain", Stat, diff --git a/man/ggplot2-ggproto.Rd b/man/ggplot2-ggproto.Rd index 6658fdafb9..8f7227f890 100644 --- a/man/ggplot2-ggproto.Rd +++ b/man/ggplot2-ggproto.Rd @@ -21,11 +21,11 @@ % R/position-stack.R, R/scale-.R, R/scale-binned.R, R/scale-continuous.R, % R/scale-date.R, R/scale-discrete-.R, R/scale-identity.R, R/stat-align.R, % R/stat-bin.R, R/stat-summary-2d.R, R/stat-bin2d.R, R/stat-bindot.R, -% R/stat-binhex.R, R/stat-boxplot.R, R/stat-contour.R, R/stat-count.R, -% R/stat-density-2d.R, R/stat-density.R, R/stat-ecdf.R, R/stat-ellipse.R, -% R/stat-function.R, R/stat-identity.R, R/stat-manual.R, R/stat-qq-line.R, -% R/stat-qq.R, R/stat-quantilemethods.R, R/stat-smooth.R, R/stat-sum.R, -% R/stat-summary-bin.R, R/stat-summary-hex.R, R/stat-summary.R, +% R/stat-binhex.R, R/stat-boxplot.R, R/stat-chain.R, R/stat-contour.R, +% R/stat-count.R, R/stat-density-2d.R, R/stat-density.R, R/stat-ecdf.R, +% R/stat-ellipse.R, R/stat-function.R, R/stat-identity.R, R/stat-manual.R, +% R/stat-qq-line.R, R/stat-qq.R, R/stat-quantilemethods.R, R/stat-smooth.R, +% R/stat-sum.R, R/stat-summary-bin.R, R/stat-summary-hex.R, R/stat-summary.R, % R/stat-unique.R, R/stat-ydensity.R \docType{data} \name{ggplot2-ggproto} @@ -131,6 +131,7 @@ \alias{StatBindot} \alias{StatBinhex} \alias{StatBoxplot} +\alias{StatChain} \alias{StatContour} \alias{StatContourFilled} \alias{StatCount} diff --git a/man/stat_chain.Rd b/man/stat_chain.Rd new file mode 100644 index 0000000000..d5e3532880 --- /dev/null +++ b/man/stat_chain.Rd @@ -0,0 +1,145 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stat-chain.R +\name{stat_chain} +\alias{stat_chain} +\title{Chain statistic computation} +\usage{ +stat_chain( + mapping = NULL, + data = NULL, + geom = "path", + position = "identity", + ..., + stats = "identity", + stat.params = list(), + redirect = list(), + na.rm = FALSE, + show.legend = NA, + inherit.aes = TRUE +) +} +\arguments{ +\item{mapping}{Set of aesthetic mappings created by \code{\link[=aes]{aes()}}. If specified and +\code{inherit.aes = TRUE} (the default), it is combined with the default mapping +at the top level of the plot. You must supply \code{mapping} if there is no plot +mapping.} + +\item{data}{The data to be displayed in this layer. There are three +options: + +If \code{NULL}, the default, the data is inherited from the plot +data as specified in the call to \code{\link[=ggplot]{ggplot()}}. + +A \code{data.frame}, or other object, will override the plot +data. All objects will be fortified to produce a data frame. See +\code{\link[=fortify]{fortify()}} for which variables will be created. + +A \code{function} will be called with a single argument, +the plot data. The return value must be a \code{data.frame}, and +will be used as the layer data. A \code{function} can be created +from a \code{formula} (e.g. \code{~ head(.x, 10)}).} + +\item{geom}{The geometric object to use to display the data for this layer. +When using a \verb{stat_*()} function to construct a layer, the \code{geom} argument +can be used to override the default coupling between stats and geoms. The +\code{geom} argument accepts the following: +\itemize{ +\item A \code{Geom} ggproto subclass, for example \code{GeomPoint}. +\item A string naming the geom. To give the geom as a string, strip the +function name of the \code{geom_} prefix. For example, to use \code{geom_point()}, +give the geom as \code{"point"}. +\item For more information and other ways to specify the geom, see the +\link[=layer_geoms]{layer geom} documentation. +}} + +\item{position}{A position adjustment to use on the data for this layer. This +can be used in various ways, including to prevent overplotting and +improving the display. The \code{position} argument accepts the following: +\itemize{ +\item The result of calling a position function, such as \code{position_jitter()}. +This method allows for passing extra arguments to the position. +\item A string naming the position adjustment. To give the position as a +string, strip the function name of the \code{position_} prefix. For example, +to use \code{position_jitter()}, give the position as \code{"jitter"}. +\item For more information and other ways to specify the position, see the +\link[=layer_positions]{layer position} documentation. +}} + +\item{...}{Other arguments passed on to \code{\link[=layer]{layer()}}'s \code{params} argument. These +arguments broadly fall into one of 4 categories below. Notably, further +arguments to the \code{position} argument, or aesthetics that are required +can \emph{not} be passed through \code{...}. Unknown arguments that are not part +of the 4 categories below are ignored. +\itemize{ +\item Static aesthetics that are not mapped to a scale, but are at a fixed +value and apply to the layer as a whole. For example, \code{colour = "red"} +or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics} +section that lists the available options. The 'required' aesthetics +cannot be passed on to the \code{params}. Please note that while passing +unmapped aesthetics as vectors is technically possible, the order and +required length is not guaranteed to be parallel to the input data. +\item When constructing a layer using +a \verb{stat_*()} function, the \code{...} argument can be used to pass on +parameters to the \code{geom} part of the layer. An example of this is +\code{stat_density(geom = "area", outline.type = "both")}. The geom's +documentation lists which parameters it can accept. +\item Inversely, when constructing a layer using a +\verb{geom_*()} function, the \code{...} argument can be used to pass on parameters +to the \code{stat} part of the layer. An example of this is +\code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation +lists which parameters it can accept. +\item The \code{key_glyph} argument of \code{\link[=layer]{layer()}} may also be passed on through +\code{...}. This can be one of the functions described as +\link[=draw_key]{key glyphs}, to change the display of the layer in the legend. +}} + +\item{stats}{A character vector or list of statistical transformations to use +for this layer. Every element needs to be one of the following: +\itemize{ +\item A \code{Stat} ggproto subclass, for example \code{StatCount} +\item A string naming the stat. To give the stat as a string, strip the +function name of the \code{stat_} prefix. For example, to use \code{stat_count()}, +give the stat as \code{"count"}. +}} + +\item{stat.params}{A list of parameters parallel to the \code{stats} argument. +Use \code{NULL} elements to declare no parameters.} + +\item{redirect}{A list of mappings parallel to the \code{stats} argument that +are evaluated after the stat has been computed.} + +\item{na.rm}{If \code{FALSE}, the default, missing values are removed with +a warning. If \code{TRUE}, missing values are silently removed.} + +\item{show.legend}{logical. Should this layer be included in the legends? +\code{NA}, the default, includes if any aesthetics are mapped. +\code{FALSE} never includes, and \code{TRUE} always includes. +It can also be a named logical vector to finely select the aesthetics to +display. To include legend keys for all levels, even +when no data exists, use \code{TRUE}. If \code{NA}, all levels are shown in legend, +but unobserved levels are omitted.} + +\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, +rather than combining with them. This is most useful for helper functions +that define both data and aesthetics and shouldn't inherit behaviour from +the default plot specification, e.g. \code{\link[=borders]{borders()}}.} +} +\description{ +This statistic layer can take multiple stats and chain these together +to transform the data in a series of computations. +} +\examples{ +p <- ggplot(mpg, aes(displ, colour = drv)) +# Binning unique observations +p + stat_chain(stats = c("unique", "bin")) +# Controlling parameters +p + stat_chain( + stats = c("unique", "bin"), + stat.params = list(NULL, list(bins = 10)) +) +# Evaluate expressions after computing stats +p + stat_chain( + stats = c("unique", "bin"), + redirect = list(aes(x = x + 1), aes(y = density)) +) +} From 5b906b9ff9b341972c41029a535aafabeaed0395 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 7 Feb 2025 11:38:34 +0100 Subject: [PATCH 3/8] add test --- tests/testthat/test-stat-chain.R | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/testthat/test-stat-chain.R diff --git a/tests/testthat/test-stat-chain.R b/tests/testthat/test-stat-chain.R new file mode 100644 index 0000000000..c3af412997 --- /dev/null +++ b/tests/testthat/test-stat-chain.R @@ -0,0 +1,31 @@ +test_that("stat_chain can chain multiple stats", { + + df <- data.frame(x = c(1, 1.9, 2.1, 3, 3, 3)) + + p <- ggplot(df, aes(x)) + + stat_chain( + stats = "bin", stat.params = list(list(breaks = c(0.5:3.5))) + ) + + stat_chain( + stats = c("unique", "bin"), + stat.params = list(NULL, list(breaks = 0.5:3.5)) + ) + + stat_chain( + stats = c("unique", "bin"), + stat.params = list(NULL, list(breaks = 0.5:3.5)), + redirect = list(NULL, aes(y = -count)) + ) + p <- ggplot_build(p) + + ld <- get_layer_data(p, 1L) + expect_equal(ld$x, 1:3) + expect_equal(ld$y, 1:3) + + ld <- get_layer_data(p, 2L) + expect_equal(ld$x, 1:3) + expect_equal(ld$y, c(1, 2, 1)) + + ld <- get_layer_data(p, 3L) + expect_equal(ld$x, 1:3) + expect_equal(ld$y, c(-1, -2, -1)) +}) From bc2f32c33c0727db1cf385acd916309ea554f32b Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 7 Feb 2025 13:50:22 +0100 Subject: [PATCH 4/8] rethink API --- NAMESPACE | 1 + R/stat-chain.R | 124 +++++++++++++++---------------- man/link_stat.Rd | 36 +++++++++ man/stat_chain.Rd | 12 +-- tests/testthat/test-stat-chain.R | 9 +-- 5 files changed, 102 insertions(+), 80 deletions(-) create mode 100644 man/link_stat.Rd diff --git a/NAMESPACE b/NAMESPACE index e34f2fd24f..5d94ac4a5c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -499,6 +499,7 @@ export(layer_grob) export(layer_scales) export(layer_sf) export(lims) +export(link_stat) export(map_data) export(margin) export(margin_auto) diff --git a/R/stat-chain.R b/R/stat-chain.R index 313f54a2bd..7d4fe1cfcf 100644 --- a/R/stat-chain.R +++ b/R/stat-chain.R @@ -11,11 +11,9 @@ #' * A string naming the stat. To give the stat as a string, strip the #' function name of the `stat_` prefix. For example, to use `stat_count()`, #' give the stat as `"count"`. -#' @param stat.params A list of parameters parallel to the `stats` argument. -#' Use `NULL` elements to declare no parameters. -#' @param redirect A list of mappings parallel to the `stats` argument that -#' are evaluated after the stat has been computed. +#' * The result of [`link_stat()`] to pass parameters or mapping instructions. #' +#' @seealso [link_stat()] #' @export #' #' @examples @@ -39,8 +37,6 @@ stat_chain <- function( position = "identity", ..., stats = "identity", - stat.params = list(), - redirect = list(), na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) { @@ -56,13 +52,52 @@ stat_chain <- function( params = list2( na.rm = na.rm, stats = stats, - stat.params = stat.params, - redirect = redirect, ... ) ) } +#' Parameterise a statistic computation +#' +#' This is a helper function for [`stat_chain()`] to pass parameters and declare +#' mappings. +#' +#' @param stat The statistical transformation to use on the data. The `stat` +#' argument accepts the following: +#' * A `Stat` ggproto subclass, for example `StatCount`. +#' * A string naming the stat. To give the stat as a string, strip the +#' function name of the `stat_` prefix. For example, for `stat_count()`, give +#' the string `"count"`. +#' @param ... Other arguments passed to the stat as a parameter. +#' @param mapping Set of aesthetic mappings created by [`aes()`] to be +#' evaluated only after the stat has been computed. +#' +#' @seealso [stat_chain()] +#' @returns A list bundling the stat, parameters and mapping. +#' @export +#' +#' @examples +#' # See `?stat_chain` +link_stat <- function(stat, ..., mapping = aes()) { + if (inherits(stat, "linked_stat")) { + return(stat) + } + + stat <- validate_subclass(stat, "Stat") + + params <- list2(...) + extra <- setdiff(names(params), stat$parameters(TRUE)) + if (length(extra) > 0) { + cli::cli_warn("Ignoring unknown parameters: {.arg {extra}}.") + params <- params[setdiff(names(params), extra)] + } + + structure( + list(stat = stat, params = params, mapping = validate_mapping(mapping)), + class = "linked_stat" + ) +} + #' @rdname ggplot2-ggproto #' @format NULL #' @usage NULL @@ -70,46 +105,28 @@ stat_chain <- function( StatChain <- ggproto( "StatChain", Stat, - extra_params = c("na.rm", "stats", "stat.params", "redirect"), + extra_params = c("na.rm", "stats"), setup_params = function(data, params) { - params$stats <- lapply(params$stats, validate_subclass, subclass = "Stat") - n_stats <- length(params$stats) - - params$stat.params <- force_length( - params$stat.params, n_stats, - warn_longer = TRUE, arg = "stat.params" - ) - - params$redirect <- force_length( - params$redirect, n_stats, - warn_longer = TRUE, arg = "redirect" - ) + if (inherits(params$stats, "linked_stat")) { + # When a single linked stat is passed outside a list, repair to list + # When using a single stat, using the appropriate `stat_*()` constructor + # is better, but we should consider programmatic use too. + params$stats <- list(params$stats) + } + params$stats <- lapply(params$stats, link_stat) params }, compute_layer = function(self, data, params, layout) { - n_stats <- length(params$stats) - - for (i in seq_len(n_stats)) { - stat <- params$stats[[i]] - param <- params$stat.params[[i]] - - # We repeat the `layer()` duty of rejecting unknown parameters - valid <- stat$parameters(TRUE) - extra_param <- setdiff(names(param), valid) - if (length(extra_param) > 0) { - cli::cli_warn("Ignoring unknown parameters: {.arg {extra_param}}.") - } - param <- param[intersect(names(param), valid)] - if (length(param) < 1) { - param <- list() - } + for (i in seq_along(params$stats)) { + link <- params$stats[[i]] + stat <- link$stat # Repeat `Layer$compute_statistic()` duty - computed_param <- stat$setup_params(data, param) + computed_param <- stat$setup_params(data, link$params) computed_param$na.rm <- computed_param$na.rm %||% params$na.rm data <- stat$setup_data(data, computed_param) data <- stat$compute_layer(data, computed_param, layout) @@ -119,8 +136,10 @@ StatChain <- ggproto( # Repeat `Layer$map_statistic()` duty, skipping backtransforms and such aes <- stat$default_aes[is_calculated_aes(stat$default_aes)] + # TODO: ideally we'd have access to Layer$computed_mapping to properly + # not touch user-specified mappings. aes <- aes[setdiff(names(aes), names(data))] - aes <- compact(defaults(params$redirect[[i]], aes)) + aes <- compact(defaults(link$mapping, aes)) if (length(aes) == 0) { next } @@ -136,30 +155,3 @@ StatChain <- ggproto( data } ) - -force_length <- function(x, n = length(x), padding = list(NULL), - warn_longer = FALSE, warn_shorter = FALSE, - arg = caller_arg(x)) { - force(arg) - nx <- length(x) - if (nx == n) { - return(x) - } - n_pad <- n - nx - if (n_pad > 0) { - x <- c(x, rep(padding, length = n_pad)) - if (isTRUE(warn_shorter)) { - cli::cli_warn( - "Padded {.arg {arg}} with {n_pad} element{?s}." - ) - } - } else if (n_pad < 0) { - x <- x[seq_len(n)] - if (isTRUE(warn_longer)) { - cli::cli_warn( - "Dropped {abs(n_pad)} excess element{?s} from {.arg {arg}}." - ) - } - } - x -} diff --git a/man/link_stat.Rd b/man/link_stat.Rd new file mode 100644 index 0000000000..31fa69655b --- /dev/null +++ b/man/link_stat.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stat-chain.R +\name{link_stat} +\alias{link_stat} +\title{Parameterise a statistic computation} +\usage{ +link_stat(stat, ..., mapping = aes()) +} +\arguments{ +\item{stat}{The statistical transformation to use on the data. The \code{stat} +argument accepts the following: +\itemize{ +\item A \code{Stat} ggproto subclass, for example \code{StatCount}. +\item A string naming the stat. To give the stat as a string, strip the +function name of the \code{stat_} prefix. For example, for \code{stat_count()}, give +the string \code{"count"}. +}} + +\item{...}{Other arguments passed to the stat as a parameter.} + +\item{mapping}{Set of aesthetic mappings created by \code{\link[=aes]{aes()}} to be +evaluated only after the stat has been computed.} +} +\value{ +A list bundling the stat, parameters and mapping. +} +\description{ +This is a helper function for \code{\link[=stat_chain]{stat_chain()}} to pass parameters and declare +mappings. +} +\examples{ +# See `?stat_chain` +} +\seealso{ +\code{\link[=stat_chain]{stat_chain()}} +} diff --git a/man/stat_chain.Rd b/man/stat_chain.Rd index d5e3532880..6c61739ce0 100644 --- a/man/stat_chain.Rd +++ b/man/stat_chain.Rd @@ -11,8 +11,6 @@ stat_chain( position = "identity", ..., stats = "identity", - stat.params = list(), - redirect = list(), na.rm = FALSE, show.legend = NA, inherit.aes = TRUE @@ -100,14 +98,9 @@ for this layer. Every element needs to be one of the following: \item A string naming the stat. To give the stat as a string, strip the function name of the \code{stat_} prefix. For example, to use \code{stat_count()}, give the stat as \code{"count"}. +\item The result of \code{\link[=link_stat]{link_stat()}} to pass parameters or mapping instructions. }} -\item{stat.params}{A list of parameters parallel to the \code{stats} argument. -Use \code{NULL} elements to declare no parameters.} - -\item{redirect}{A list of mappings parallel to the \code{stats} argument that -are evaluated after the stat has been computed.} - \item{na.rm}{If \code{FALSE}, the default, missing values are removed with a warning. If \code{TRUE}, missing values are silently removed.} @@ -143,3 +136,6 @@ p + stat_chain( redirect = list(aes(x = x + 1), aes(y = density)) ) } +\seealso{ +\code{\link[=link_stat]{link_stat()}} +} diff --git a/tests/testthat/test-stat-chain.R b/tests/testthat/test-stat-chain.R index c3af412997..924eb7be5c 100644 --- a/tests/testthat/test-stat-chain.R +++ b/tests/testthat/test-stat-chain.R @@ -4,16 +4,13 @@ test_that("stat_chain can chain multiple stats", { p <- ggplot(df, aes(x)) + stat_chain( - stats = "bin", stat.params = list(list(breaks = c(0.5:3.5))) + stats = list(link_stat("bin", breaks = 0.5:3.5)) ) + stat_chain( - stats = c("unique", "bin"), - stat.params = list(NULL, list(breaks = 0.5:3.5)) + stats = list("unique", link_stat("bin", breaks = 0.5:3.5)), ) + stat_chain( - stats = c("unique", "bin"), - stat.params = list(NULL, list(breaks = 0.5:3.5)), - redirect = list(NULL, aes(y = -count)) + stats = list("unique", link_stat("bin", breaks = 0.5:3.5, mapping = aes(y = -count))) ) p <- ggplot_build(p) From 3ad38f50ff35d1cf6cb941a482a4fac857572a0c Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 7 Feb 2025 14:04:11 +0100 Subject: [PATCH 5/8] fix pkgdown complaints --- R/stat-chain.R | 1 + _pkgdown.yml | 1 + man/link_stat.Rd | 1 + 3 files changed, 3 insertions(+) diff --git a/R/stat-chain.R b/R/stat-chain.R index 7d4fe1cfcf..93d5f1339b 100644 --- a/R/stat-chain.R +++ b/R/stat-chain.R @@ -75,6 +75,7 @@ stat_chain <- function( #' @seealso [stat_chain()] #' @returns A list bundling the stat, parameters and mapping. #' @export +#' @keywords internal #' #' @examples #' # See `?stat_chain` diff --git a/_pkgdown.yml b/_pkgdown.yml index 0259312234..2ed79eca41 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -69,6 +69,7 @@ reference: - stat_unique - stat_sf_coordinates - stat_manual + - stat_chain - after_stat - subtitle: Position adjustment diff --git a/man/link_stat.Rd b/man/link_stat.Rd index 31fa69655b..55dc1f6d9d 100644 --- a/man/link_stat.Rd +++ b/man/link_stat.Rd @@ -34,3 +34,4 @@ mappings. \seealso{ \code{\link[=stat_chain]{stat_chain()}} } +\keyword{internal} From b7913107fd6f0b5db9a1b53169fc2fcd1082a765 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 26 Feb 2025 11:08:17 +0100 Subject: [PATCH 6/8] rename `link_stat(mapping)` to `after.stat` --- R/stat-chain.R | 8 ++++---- man/link_stat.Rd | 4 ++-- tests/testthat/test-stat-chain.R | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/stat-chain.R b/R/stat-chain.R index 93d5f1339b..b6d11ea6fe 100644 --- a/R/stat-chain.R +++ b/R/stat-chain.R @@ -69,7 +69,7 @@ stat_chain <- function( #' function name of the `stat_` prefix. For example, for `stat_count()`, give #' the string `"count"`. #' @param ... Other arguments passed to the stat as a parameter. -#' @param mapping Set of aesthetic mappings created by [`aes()`] to be +#' @param after.stat Set of aesthetic mappings created by [`aes()`] to be #' evaluated only after the stat has been computed. #' #' @seealso [stat_chain()] @@ -79,7 +79,7 @@ stat_chain <- function( #' #' @examples #' # See `?stat_chain` -link_stat <- function(stat, ..., mapping = aes()) { +link_stat <- function(stat, ..., after.stat = aes()) { if (inherits(stat, "linked_stat")) { return(stat) } @@ -94,7 +94,7 @@ link_stat <- function(stat, ..., mapping = aes()) { } structure( - list(stat = stat, params = params, mapping = validate_mapping(mapping)), + list(stat = stat, params = params, after_stat = validate_mapping(after.stat)), class = "linked_stat" ) } @@ -140,7 +140,7 @@ StatChain <- ggproto( # TODO: ideally we'd have access to Layer$computed_mapping to properly # not touch user-specified mappings. aes <- aes[setdiff(names(aes), names(data))] - aes <- compact(defaults(link$mapping, aes)) + aes <- compact(defaults(link$after_stat, aes)) if (length(aes) == 0) { next } diff --git a/man/link_stat.Rd b/man/link_stat.Rd index 55dc1f6d9d..77a3d33009 100644 --- a/man/link_stat.Rd +++ b/man/link_stat.Rd @@ -4,7 +4,7 @@ \alias{link_stat} \title{Parameterise a statistic computation} \usage{ -link_stat(stat, ..., mapping = aes()) +link_stat(stat, ..., after.stat = aes()) } \arguments{ \item{stat}{The statistical transformation to use on the data. The \code{stat} @@ -18,7 +18,7 @@ the string \code{"count"}. \item{...}{Other arguments passed to the stat as a parameter.} -\item{mapping}{Set of aesthetic mappings created by \code{\link[=aes]{aes()}} to be +\item{after.stat}{Set of aesthetic mappings created by \code{\link[=aes]{aes()}} to be evaluated only after the stat has been computed.} } \value{ diff --git a/tests/testthat/test-stat-chain.R b/tests/testthat/test-stat-chain.R index 924eb7be5c..2d3fd18367 100644 --- a/tests/testthat/test-stat-chain.R +++ b/tests/testthat/test-stat-chain.R @@ -10,7 +10,7 @@ test_that("stat_chain can chain multiple stats", { stats = list("unique", link_stat("bin", breaks = 0.5:3.5)), ) + stat_chain( - stats = list("unique", link_stat("bin", breaks = 0.5:3.5, mapping = aes(y = -count))) + stats = list("unique", link_stat("bin", breaks = 0.5:3.5, after.stat = aes(y = -count))) ) p <- ggplot_build(p) From 495c7fa9d1adc21d1aa9c72bd918bdcbb1b3b350 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 26 Feb 2025 11:26:52 +0100 Subject: [PATCH 7/8] explain procedure in details --- R/stat-chain.R | 15 +++++++++++++++ man/stat_chain.Rd | 15 +++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/R/stat-chain.R b/R/stat-chain.R index b6d11ea6fe..4a93affab9 100644 --- a/R/stat-chain.R +++ b/R/stat-chain.R @@ -14,6 +14,21 @@ #' * The result of [`link_stat()`] to pass parameters or mapping instructions. #' #' @seealso [link_stat()] +#' @details +#' The procedure in which stats are chained are as follows. First, the +#' layer-level, undelayed aesthetics in the `mapping` argument are evaluated. +#' The data that results from that evaluation is passed to the first stat in +#' the `stats` argument to perform that stat's computation. If that first stat +#' is a [`link_stat`] with an `after.stat` component, the `after.stat` component +#' is evaluated before passing on the data to the next stat in the `stats` +#' argument. The next components in the `stats` argument work the same: the +#' data is passed on to compute the stat, then `after.stat` is evaluated. In +#' essence, the `after.stat` allows control over how computed variables are +#' passed to the next stat in the chain. Finally, once all components in the +#' `stats` arguments have been handled, the staged after stat components of +#' the layer-level `mapping` is evaluated. Per usual, the data are then handled +#' by the position and geom parts of a layer. +#' #' @export #' #' @examples diff --git a/man/stat_chain.Rd b/man/stat_chain.Rd index 6c61739ce0..8ec89366ce 100644 --- a/man/stat_chain.Rd +++ b/man/stat_chain.Rd @@ -121,6 +121,21 @@ the default plot specification, e.g. \code{\link[=borders]{borders()}}.} This statistic layer can take multiple stats and chain these together to transform the data in a series of computations. } +\details{ +The procedure in which stats are chained are as follows. First, the +layer-level, undelayed aesthetics in the \code{mapping} argument are evaluated. +The data that results from that evaluation is passed to the first stat in +the \code{stats} argument to perform that stat's computation. If that first stat +is a \code{\link{link_stat}} with an \code{after.stat} component, the \code{after.stat} component +is evaluated before passing on the data to the next stat in the \code{stats} +argument. The next components in the \code{stats} argument work the same: the +data is passed on to compute the stat, then \code{after.stat} is evaluated. In +essence, the \code{after.stat} allows control over how computed variables are +passed to the next stat in the chain. Finally, once all components in the +\code{stats} arguments have been handled, the staged after stat components of +the layer-level \code{mapping} is evaluated. Per usual, the data are then handled +by the position and geom parts of a layer. +} \examples{ p <- ggplot(mpg, aes(displ, colour = drv)) # Binning unique observations From f64a8450e99b3f5401f77ae01f01966ceeb4b732 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 26 Feb 2025 12:02:29 +0100 Subject: [PATCH 8/8] adapt examples --- R/stat-chain.R | 14 ++++++++++---- man/stat_chain.Rd | 14 ++++++++++---- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/R/stat-chain.R b/R/stat-chain.R index 4a93affab9..6ae1bfc35d 100644 --- a/R/stat-chain.R +++ b/R/stat-chain.R @@ -37,13 +37,19 @@ #' p + stat_chain(stats = c("unique", "bin")) #' # Controlling parameters #' p + stat_chain( -#' stats = c("unique", "bin"), -#' stat.params = list(NULL, list(bins = 10)) +#' stats = list("unique", link_stat("bin", bins = 10)) #' ) #' # Evaluate expressions after computing stats +#' p + stat_chain(stats = list( +#' link_stat("unique", after.stat = aes(x = x + 1)), +#' link_stat("density", after.stat = aes(y = density)) +#' )) +#' # Note that the last `after.stat` argument serves the same role as the +#' # `after_stat()` function in the layer mapping, so the following is +#' # equivalent to the previous plot #' p + stat_chain( -#' stats = c("unique", "bin"), -#' redirect = list(aes(x = x + 1), aes(y = density)) +#' mapping = aes(y = after_stat(density)), +#' stats = list(link_stat("unique", after.stat = aes(x = x + 1)), "density") #' ) stat_chain <- function( mapping = NULL, diff --git a/man/stat_chain.Rd b/man/stat_chain.Rd index 8ec89366ce..a0917bda3b 100644 --- a/man/stat_chain.Rd +++ b/man/stat_chain.Rd @@ -142,13 +142,19 @@ p <- ggplot(mpg, aes(displ, colour = drv)) p + stat_chain(stats = c("unique", "bin")) # Controlling parameters p + stat_chain( - stats = c("unique", "bin"), - stat.params = list(NULL, list(bins = 10)) + stats = list("unique", link_stat("bin", bins = 10)) ) # Evaluate expressions after computing stats +p + stat_chain(stats = list( + link_stat("unique", after.stat = aes(x = x + 1)), + link_stat("density", after.stat = aes(y = density)) +)) +# Note that the last `after.stat` argument serves the same role as the +# `after_stat()` function in the layer mapping, so the following is +# equivalent to the previous plot p + stat_chain( - stats = c("unique", "bin"), - redirect = list(aes(x = x + 1), aes(y = density)) + mapping = aes(y = after_stat(density)), + stats = list(link_stat("unique", after.stat = aes(x = x + 1)), "density") ) } \seealso{