From 21df89ff3f78fa69a1a2ff72eb0accc56524ae23 Mon Sep 17 00:00:00 2001 From: Yunuuuu Date: Tue, 7 Jan 2025 23:56:58 +0800 Subject: [PATCH] re-design `Align` --- R/align-.R | 153 +-------------- R/align-dendrogram.R | 77 +++----- R/align-group.R | 32 ++-- R/align-hclust.R | 276 ++++++++++++++------------- R/align-kmeans.R | 22 +-- R/align-order.R | 90 ++++----- R/align-reorder.R | 82 ++++++-- R/dendrogram.R | 13 +- R/utils-assert.R | 9 +- man/align.Rd | 9 +- man/align_kmeans.Rd | 17 +- man/fortify_data_frame.dendrogram.Rd | 13 +- tests/testthat/_snaps/plot-align.md | 8 +- 13 files changed, 333 insertions(+), 468 deletions(-) diff --git a/R/align-.R b/R/align-.R index d4656910..1a957730 100644 --- a/R/align-.R +++ b/R/align-.R @@ -8,14 +8,11 @@ #' #' @param align An `Align` object. #' @param ... Additional fields passed to the `align` object. -#' @param params A list of parameters for `align`. #' @param plot A ggplot object. #' @inheritParams ggalign #' @param schemes Options for `schemes`: #' - `NULL`: Used when `align` do not add a plot. #' - [`waiver()`][ggplot2::waiver]: Try to infer `schemes` based on `data`. -#' @param check.param Logical; if `TRUE`, checks parameters and provides -#' warnings as necessary. #' @param call The `call` used to construct the `Align` object, for reporting #' messages. #' @@ -32,11 +29,9 @@ #' @importFrom ggplot2 ggproto #' @export #' @keywords internal -align <- function(align, data, ..., - params = list(), plot = NULL, +align <- function(align, data = NULL, ..., plot = NULL, size = NULL, schemes = NULL, no_axes = NULL, - active = NULL, check.param = TRUE, - call = caller_call()) { + active = NULL, call = caller_call()) { if (override_call(call)) { call <- current_call() } @@ -47,18 +42,6 @@ align <- function(align, data, ..., getOption(sprintf("%s.align_no_axes", pkg_nm()), default = TRUE) schemes <- schemes %|w|% default_schemes(data) - # Warn about extra params or missing parameters --------------- - all <- align$parameters() - input <- names(params) %||% character() - if (isTRUE(check.param)) { - if (length(extra_param <- vec_set_difference(input, all))) { # nolint - cli_warn("Ignoring unknown parameters: {.arg {extra_param}}") - } - if (length(missing <- vec_set_difference(all, input))) { # nolint - cli_abort("missing parameters: {missing}", call = call) - } - } - input_params <- params[vec_set_intersect(input, all)] new_ggalign_plot( align = align, @@ -70,18 +53,13 @@ align <- function(align, data, ..., # and will be saved and accessed across the plot rendering process direction = NULL, position = NULL, - params = NULL, # `$setup_params` method - data = NULL, # $setup_data method + data = NULL, # Used to save the modified `input_data` statistics = NULL, # `$compute` method labels = NULL, # the original `vec_names()` of the `input_data` - # use `NULL` if this align don't require any data - # use `waiver()` to inherit from the layout data + # the input data input_data = data, - # collect parameters - input_params = input_params, - # object slots plot = plot, active = active, @@ -113,109 +91,16 @@ align <- function(align, data, ..., #' @rdname align #' @include plot-.R Align <- ggproto("Align", AlignProto, - parameters = function(self) { - c( - align_method_params( - self$compute, - align_method_params(Align$compute) - ), - align_method_params( - self$align, - align_method_params(Align$align) - ), - self$extra_params - ) - }, interact_layout = function(self, layout) { - layout_name <- self$layout_name - object_name <- object_name(self) # check plot is compatible with the layout if (is_layout_continuous(layout)) { + layout_name <- self$layout_name # `Align` object is special for discrete variables cli_abort(c( - sprintf("Cannot add %s to %s", object_name, layout_name), + sprintf("Cannot add %s to %s", object_name(self), layout_name), i = sprintf("%s cannot align discrete variables", layout_name) )) } - input_data <- self$input_data - input_params <- self$input_params - layout_data <- layout@data - design <- layout@design - layout_nobs <- .subset2(design, "nobs") - # we must have the same observations across all plots - # 1. if `Align` require data, the `nobs` should be nrow(data) - # 2. if not, we run `nobs()` method to initialize the layout nobs - if (!is.null(input_data)) { # this `Align` object require data - if (is.waive(input_data)) { # inherit from the layout - if (is.null(data <- layout_data)) { - cli_abort(c( - sprintf( - "you must provide {.arg data} in %s", - object_name - ), - i = sprintf("no data was found in %s", layout_name) - )) - } - } else { - if (is.function(input_data)) { - if (is.null(layout_data)) { - cli_abort(c( - sprintf( - "{.arg data} in %s cannot be a function", - object_name - ), - i = sprintf("no data was found in %s", layout_name) - )) - } - data <- input_data(layout_data) - } else { - data <- input_data - } - } - # we always regard rows as the observations - if (is.null(layout_nobs)) { - layout_nobs <- NROW(data) - if (layout_nobs == 0L) { - cli_abort("{.arg data} cannot be empty", call = self$call) - } - } else if (NROW(data) != layout_nobs) { - cli_abort(sprintf( - "%s (nobs: %d) is not compatible with the %s (nobs: %d)", - object_name, NROW(data), layout_name, layout_nobs - )) - } - - # save the labels - self$labels <- vec_names(data) %||% vec_names(layout_data) - params <- self$setup_params(layout_nobs, input_params) - self$data <- ggalign_attr_restore( - self$setup_data(params, data), - layout_data - ) - } else { # this `Align` object doesn't require any data - # we keep the names from the layout data for usage - self$labels <- vec_names(layout_data) - # If `nobs` is `NULL`, it means we don't initialize the layout - # observations, we initialize `nobs` with the `Align` obect - if (is.null(layout_nobs)) { - layout_nobs <- self$nobs(input_params) - if (!(is_scalar(layout_nobs) && is.integer(layout_nobs))) { - cli_abort(c( - sprintf( - "invalid {.field nobs} defined by %s", - object_name(self) - ), - i = "{.field nobs} must be a single integer" - )) - } - } - params <- self$setup_params(layout_nobs, input_params) - } - design["nobs"] <- list(layout_nobs) - layout@design <- design - - # save the parameters into the object ------------ - self$params <- params layout }, setup_design = function(self, design) { @@ -223,16 +108,10 @@ Align <- ggproto("Align", AlignProto, old_index <- .subset2(design, "index") # prepare the data ------------------------------- # compute statistics --------------------------------- - self$statistics <- align_inject( - self$compute, - c(list(panel = old_panel, index = old_index), self$params) - ) + self$statistics <- self$compute(panel = old_panel, index = old_index) # make the new layout ------------------------------- - panel_and_index <- align_inject( - self$align, - c(list(panel = old_panel, index = old_index), self$params) - ) + panel_and_index <- self$align(panel = old_panel, index = old_index) # check panel layout_name <- self$layout_name @@ -335,22 +214,6 @@ Align <- ggproto("Align", AlignProto, discrete_design(panel, index, nobs) }, - # Most parameters for the `Align` are taken automatically from `compute()`, - # `align()` and `build_plot()`. However, some additional parameters may be - # removed in `setup_params`. You should put these paramters here, otherwise, - # they won't be collected. - extra_params = character(), - setup_params = function(nobs, params) params, - setup_data = function(params, data) data, - - # You must provide `nobs()` function or data shouldn't be `NULL` - # If this `Align` doesn't initialize the layout observations, we should - # return `NULL`, in this way, you cannot use this `Align` object to - # initialize the layout panel or index. We always ensure the panel and index - # number is equal to `nobs`. If you want to indicates no obervations, you - # must return `0L`. - nobs = function(params) NULL, - # Following fields should be defined for the new `Align` object. # argument name in these function doesn't matter. compute = function(self, panel, index) NULL, diff --git a/R/align-dendrogram.R b/R/align-dendrogram.R index f67f0305..8483a63d 100644 --- a/R/align-dendrogram.R +++ b/R/align-dendrogram.R @@ -49,33 +49,15 @@ align_dendro <- function(mapping = aes(), ..., center = FALSE, type = "rectangle", size = NULL, data = NULL, no_axes = NULL, active = NULL) { - reorder_dendrogram <- allow_lambda(reorder_dendrogram) - if (!rlang::is_bool(reorder_dendrogram) && - !is.null(reorder_dendrogram) && - !is.function(reorder_dendrogram)) { - cli_abort( - "{.arg reorder_dendrogram} must be a single boolean value or a function" - ) - } - - assert_number_whole(k, allow_null = TRUE) - assert_number_decimal(h, allow_null = TRUE) - assert_bool(plot_cut_height, allow_null = TRUE, arg = "plot_cut_height") + assert_bool(plot_cut_height, allow_null = TRUE) assert_bool(merge_dendrogram) - assert_bool(reorder_group) - cutree <- allow_lambda(cutree) - assert_(cutree, is.function, "a function", allow_null = TRUE) - assert_bool(plot_dendrogram) - assert_mapping(mapping) - if (is.null(data)) { - if (inherits(method, "hclust") || inherits(method, "dendrogram")) { - data <- NULL # no need for data - } else { - data <- waiver() - } - } - assert_active(active) - active <- update_active(active, new_active(use = TRUE)) + + # setup the default value for `plot_cut_height` + plot_cut_height <- plot_cut_height %||% ( + # we by default don't draw the height of the user-provided cutree + # since function like `dynamicTreeCut` will merge tree + (!is.null(k) || !is.null(h)) && is.null(cutree) + ) plot <- ggplot(mapping = mapping) if (plot_dendrogram) { plot <- plot + ggplot2::geom_segment( @@ -88,29 +70,28 @@ align_dendro <- function(mapping = aes(), ..., data = function(data) ggalign_attr(data, "edge") ) } - align( + assert_active(active) + active <- update_active(active, new_active(use = TRUE)) + .align_hclust( align = AlignDendro, - params = list( - distance = distance, method = method, use_missing = use_missing, - k = k, h = h, plot_cut_height = plot_cut_height, - center = center, type = type, root = root, - reorder_dendrogram = reorder_dendrogram, - merge_dendro = merge_dendrogram, - reorder_group = reorder_group, - cutree = cutree - ), - no_axes = no_axes, active = active, - size = size, + distance = distance, + method = method, + use_missing = use_missing, + merge_dendro = merge_dendrogram, + plot_cut_height = plot_cut_height, + type = type, root = root, center = center, + reorder_dendrogram = reorder_dendrogram, + reorder_group = reorder_group, schemes = default_schemes(th = theme_no_strip()), - data = data, - plot = plot + k = k, h = h, cutree = cutree, data = data, active = active, + size = size, no_axes = no_axes, plot = plot ) } +#' @importFrom ggplot2 aes ggplot +#' @importFrom rlang inject #' @include align-hclust.R AlignDendro <- ggproto("AlignDendro", AlignHclust, - #' @importFrom ggplot2 aes ggplot - #' @importFrom rlang inject setup_plot = function(self, plot) { ggadd_default(plot, aes(x = .data$x, y = .data$y)) + switch_direction( self$direction, @@ -118,15 +99,12 @@ AlignDendro <- ggproto("AlignDendro", AlignHclust, ggplot2::labs(y = "height") ) }, - # other arguments - extra_params = c("plot_cut_height", "center", "type", "root"), build_plot = function(self, plot, design, extra_design = NULL, previous_design = NULL) { - params <- self$params - plot_cut_height <- .subset2(params, "plot_cut_height") - center <- .subset2(params, "center") - type <- .subset2(params, "type") - root <- .subset2(params, "root") + plot_cut_height <- self$plot_cut_height + center <- self$center + type <- self$type + root <- self$root panel <- .subset2(design, "panel") index <- .subset2(design, "index") @@ -210,6 +188,7 @@ AlignDendro <- ggproto("AlignDendro", AlignHclust, ) node <- rename(node, c(x = "y", y = "x")) } + # we do some tricks, since ggplot2 won't remove the attributes # we attach the `edge` data plot <- gguse_data(plot, ggalign_attr_set(node, list(edge = edge))) diff --git a/R/align-group.R b/R/align-group.R index 71eacd48..a702c100 100644 --- a/R/align-group.R +++ b/R/align-group.R @@ -16,32 +16,32 @@ #' @export align_group <- function(group, active = NULL) { assert_active(active) + if (vec_size(group) == 0L) { + cli_abort("{.arg group} cannot be empty") + } active <- update_active(active, new_active(use = FALSE)) align( align = AlignGroup, - params = list(group = group), - data = NULL, active = active, + group = group, + active = active, check.param = TRUE ) } #' @importFrom ggplot2 ggproto AlignGroup <- ggproto("AlignGroup", Align, - nobs = function(self, params) { - nobs <- vec_size(.subset2(params, "group")) - if (nobs == 0L) { - cli_abort("{.arg group} cannot be empty", call = self$call) + interact_layout = function(self, layout) { + layout <- ggproto_parent(Align, self)$interact_layout(layout) + if (is.null(layout_nobs <- .subset2(layout@design, "nobs"))) { + layout@design["nobs"] <- list(vec_size(self$group)) + } else { + assert_mismatch_nobs( + self, layout_nobs, vec_size(self$group), + arg = "group" + ) } - nobs + layout }, - setup_params = function(self, nobs, params) { - assert_mismatch_nobs( - self, nobs, self$nobs(params), - action = "must be an atomic vector", - arg = "group" - ) - params - }, - align = function(self, panel, index, group) list(group, index), + align = function(self, panel, index) list(self$group, index), summary_align = function(self) c(FALSE, TRUE) ) diff --git a/R/align-hclust.R b/R/align-hclust.R index a46a9c23..b8d7d9a1 100644 --- a/R/align-hclust.R +++ b/R/align-hclust.R @@ -42,146 +42,159 @@ align_hclust <- function(distance = "euclidean", reorder_group = FALSE, k = NULL, h = NULL, cutree = NULL, data = NULL, active = NULL) { + assert_active(active) + active <- update_active(active, new_active(use = FALSE)) + .align_hclust( + align = AlignHclust, + distance = distance, + method = method, + use_missing = use_missing, + reorder_dendrogram = reorder_dendrogram, + reorder_group = reorder_group, + k = k, h = h, cutree = cutree, data = data, active = active + ) +} + +.align_hclust <- function(align, ..., plot = NULL, + distance = "euclidean", + method = "complete", + use_missing = "pairwise.complete.obs", + reorder_dendrogram = FALSE, + reorder_group = FALSE, + k = NULL, h = NULL, cutree = NULL, + data = NULL, schemes = NULL, active = NULL, + call = caller_call()) { reorder_dendrogram <- allow_lambda(reorder_dendrogram) if (!rlang::is_bool(reorder_dendrogram) && !is.null(reorder_dendrogram) && !is.function(reorder_dendrogram)) { cli_abort( - "{.arg reorder_dendrogram} must be a single boolean value or a function" + "{.arg reorder_dendrogram} must be a single boolean value or a function", + call = call ) } - assert_number_whole(k, allow_null = TRUE) - assert_number_decimal(h, allow_null = TRUE) - assert_bool(reorder_group) + assert_number_whole(k, allow_null = TRUE, call = call) + assert_number_decimal(h, allow_null = TRUE, call = call) + assert_bool(reorder_group, call = call) cutree <- allow_lambda(cutree) - assert_(cutree, is.function, "a function", allow_null = TRUE) - if (is.null(data)) { - if (inherits(method, "hclust") || inherits(method, "dendrogram")) { - data <- NULL # no need for data - } else { - data <- waiver() + assert_(cutree, is.function, "a function", allow_null = TRUE, call = call) + if (inherits(method, "hclust")) { + if (vec_size(.subset2(method, "order")) == 0L) { + cli_abort("{.cls hclust} defined in {.arg method} cannot be empty", + call = call + ) + } + } else if (inherits(method, "dendrogram")) { + if (stats::nobs(method) == 0L) { + cli_abort( + "{.cls dendrogram} defined in {.arg method} cannot be empty", + call = call + ) } } - assert_active(active) - active <- update_active(active, new_active(use = FALSE)) + + if (isTRUE(reorder_dendrogram)) { + reorder_dendrogram <- function(tree, data) { + if (!inherits(tree, "dendrogram")) { + tree <- stats::as.dendrogram(tree) + } + reorder(x = tree, wts = rowMeans(data), agglo.FUN = mean) + } + } else if (is.function(reorder_dendrogram)) { + user_reorder <- reorder_dendrogram + reorder_dendrogram <- function(tree, data) { + # we ensure, what we input for user is a `hclust` object. + if (!inherits(tree, "hclust")) tree <- stats::as.hclust(tree) + ans <- user_reorder(tree, data) + if (!inherits(ans, "hclust") && + !inherits(ans, "dendrogram")) { + cli_abort( + "{.fn reorder_dendrogram} must return a {.cls hclust} or {.cls dendrogram} object", + call = call + ) + } + ans + } + } + align( - align = AlignHclust, - params = list( - distance = distance, method = method, use_missing = use_missing, - k = k, h = h, - reorder_dendrogram = reorder_dendrogram, - reorder_group = reorder_group, - cutree = cutree, - # used by align_dendro - plot_cut_height = FALSE, - merge_dendro = FALSE - ), + align = align, + distance = distance, method = method, + use_missing = use_missing, + reorder_dendrogram = reorder_dendrogram, + reorder_group = reorder_group, + k = k, h = h, cutree = cutree, active = active, - schemes = default_schemes(), + ..., # additional fields to be added, used by align_dendro + schemes = schemes %||% default_schemes(), data = data, - plot = NULL + plot = plot, + call = call ) } #' @importFrom ggplot2 ggproto aes AlignHclust <- ggproto("AlignHclust", Align, - - #' @importFrom stats reorder - setup_params = function(self, nobs, params) { - # setup the default value for `plot_cut_height` - params$plot_cut_height <- .subset2(params, "plot_cut_height") %||% ( - # we by default don't draw the height of the user-provided cutree - # since function like `dynamicTreeCut` will merge tree - (!is.null(params$k) || !is.null(params$h)) && is.null(params$cutree) - ) - # setup the default value for `plot_cut_height` - if (isTRUE(params$reorder_dendrogram)) { - params$reorder_dendrogram <- function(tree, data) { - if (!inherits(tree, "dendrogram")) { - tree <- stats::as.dendrogram(tree) - } - reorder(x = tree, wts = rowMeans(data), agglo.FUN = mean) + interact_layout = function(self, layout) { + if (inherits(self$method, "hclust") || + inherits(self$method, "dendrogram")) { + layout <- ggproto_parent(Align, self)$interact_layout(layout) + if (inherits(self$method, "hclust")) { + nobs <- vec_size(.subset2(self$method, "order")) + } else { + nobs <- stats::nobs(self$method) } - } else if (is.function(params$reorder_dendrogram)) { - user_reorder <- params$reorder_dendrogram - params$reorder_dendrogram <- function(tree, data) { - # we ensure, what we input for user is a hclust object. - if (!inherits(tree, "hclust")) tree <- stats::as.hclust(tree) - ans <- user_reorder(tree, data) - if (!inherits(ans, "hclust") && - !inherits(ans, "dendrogram")) { - cli_abort( - "{.fn reorder_dendrogram} must return a {.cls hclust} or {.cls dendrogram} object", - call = self$call - ) - } - ans + + if (is.null(layout_nobs <- .subset2(layout@design, "nobs"))) { + layout@design["nobs"] <- list(nobs) + } else { + assert_mismatch_nobs(self, layout_nobs, nobs, arg = "method") } + } else { + layout <- ggproto_parent(AlignReorder, self)$interact_layout(layout) } + # initialize the internal parameters self$multiple_tree <- FALSE self$height <- NULL self$panel <- NULL - params + layout }, - setup_data = function(self, params, data) { - ans <- fortify_matrix(data) - assert_( - ans, function(x) is.numeric(x), - "numeric", - arg = "data", - call = .subset2(self, "call") - ) - ans - }, - nobs = function(self, params) { - if (inherits(tree <- .subset2(params, "method"), "hclust")) { - self$labels <- .subset2(tree, "labels") - nobs <- length(.subset2(tree, "order")) - } else { # a dendrogram - self$labels <- labels(tree) - nobs <- stats::nobs(tree) - } - if (nobs == 0L) { - cli_abort("tree defined in {.arg method} cannot be empty", - call = self$call - ) - } - nobs - }, - compute = function(self, panel, index, distance, method, use_missing, - reorder_dendrogram, k = NULL, h = NULL, cutree = NULL) { - data <- .subset2(self, "data") - if (!is.null(data) && nrow(data) < 2L) { + compute = function(self, panel, index) { + if (!is.null(self$data) && vec_size(self$data) < 2L) { cli_abort(c( "Cannot do Hierarchical Clustering", i = "must have >= 2 observations to cluster" - ), call = .subset2(self, "call")) + ), call = self$call) } + # if the old panel exist, we do sub-clustering - if (!is.null(panel) && is.null(k) && is.null(h) && is.null(cutree)) { - if (is.null(data)) { - cli_abort(c( - "Cannot do sub-clustering", - i = "Try to provide the {.arg data}" - ), call = .subset2(self, "call")) + if (!is.null(panel) && is.null(self$k) && is.null(self$h) && + is.null(self$cutree)) { + # in this way, we prevent sub-clustering + if (inherits(self$method, "hclust") || + inherits(self$method, "dendrogram")) { + cli_abort( + "{.arg method} cannot be a {.cls hclust} or {.cls dendrogram} when previous layout panel groups exist", + call = self$call + ) } children <- vector("list", nlevels(panel)) names(children) <- levels(panel) - labels <- vec_names(data) + labels <- vec_names(self$data) # we do clustering within each group --------------- for (g in levels(panel)) { idx <- which(panel == g) - gdata <- vec_slice(data, idx) - if (nrow(gdata) == 1L) { + gdata <- vec_slice(self$data, idx) + if (vec_size(gdata) == 1L) { children[[g]] <- tree_one_node(idx, .subset(labels, idx)) } else { child <- stats::as.dendrogram(hclust2( gdata, - distance = distance, - method = method, - use_missing = use_missing + distance = self$distance, + method = self$method, + use_missing = self$use_missing )) # we restore the actual index of the original matrix child <- stats::dendrapply(child, function(x) { @@ -193,41 +206,40 @@ AlignHclust <- ggproto("AlignHclust", Align, x } }) - if (is.function(reorder_dendrogram)) { - child <- reorder_dendrogram(child, gdata) + if (is.function(self$reorder_dendrogram)) { + child <- self$reorder_dendrogram(child, gdata) } children[[g]] <- child } } return(children) # can be a list of `dendrogram` or `hclust` or mix } - hclust2(data, distance, method, use_missing) + hclust2(self$data, self$distance, self$method, self$use_missing) }, #' @importFrom stats order.dendrogram - align = function(self, panel, index, distance, method, use_missing, - reorder_dendrogram, merge_dendro, reorder_group, - k, h, cutree, plot_cut_height) { - statistics <- .subset2(self, "statistics") - if (!is.null(panel) && is.null(k) && is.null(h) && is.null(cutree)) { + align = function(self, panel, index) { + statistics <- self$statistics + if (!is.null(panel) && is.null(self$k) && is.null(self$h) && + is.null(self$cutree)) { # reordering the dendrogram ------------------------ - if (nlevels(panel) > 1L && reorder_group) { - data <- .subset2(self, "data") + if (nlevels(panel) > 1L && self$reorder_group) { parent_levels <- levels(panel) parent_data <- t(sapply(parent_levels, function(g) { - colMeans(vec_slice(data, panel == g), na.rm = TRUE) + colMeans(vec_slice(self$data, panel == g), na.rm = TRUE) })) rownames(parent_data) <- parent_levels parent <- hclust2( parent_data, - distance = distance, method = method, - use_missing = use_missing + distance = self$distance, + method = self$method, + use_missing = self$use_missing ) # reorder parent based on the parent tree - if (is.function(reorder_dendrogram)) { - parent <- reorder_dendrogram(parent, parent_data) + if (is.function(self$reorder_dendrogram)) { + parent <- self$reorder_dendrogram(parent, parent_data) } # we always ensure the parent is a dendrogram - # since we'll use `merge_dendrogram()` which requires a + # since we'll call `merge_dendrogram()` which requires a # dendrogram parent <- stats::as.dendrogram(parent) panel <- factor(panel, parent_levels[order.dendrogram(parent)]) @@ -240,9 +252,9 @@ AlignHclust <- ggproto("AlignHclust", Align, # merge children tree ------------------------------ if (nlevels(panel) == 1L) { statistics <- .subset2(statistics, 1L) - } else if (merge_dendro) { + } else if (self$merge_dendro) { # we have a function named merge_dendrogram(), so we use - # parameter `merge_dendro` + # `merge_dendro` as the argument name # `merge_dendrogram` will follow the order of the parent statistics <- lapply(statistics, stats::as.dendrogram) statistics <- merge_dendrogram(parent, statistics) @@ -255,31 +267,29 @@ AlignHclust <- ggproto("AlignHclust", Align, self$multiple_tree <- TRUE } } else { + # hclust2() will attach the distance used distance <- attr(statistics, "distance") - if (is.function(reorder_dendrogram)) { - statistics <- reorder_dendrogram( - statistics, .subset2(self, "data") - ) + if (is.function(self$reorder_dendrogram)) { + statistics <- self$reorder_dendrogram(statistics, self$data) } - if (!is.null(k) || !is.null(h) || !is.null(cutree)) { - if (is.null(cutree)) { - cutree <- function(tree, dist, k, h) { + if (!is.null(self$k) || !is.null(self$h) || !is.null(self$cutree)) { + if (is.null(self$cutree)) { + self$cutree <- function(tree, dist, k, h) { if (!is.null(k)) { stats::cutree(tree, k = k) } else { stats::cutree(tree, h = h) } } - # For `cutree`, we always respect the height user specified - # For user defined function, we always calculate - # height from the number of `panels` - if (is.null(k) && plot_cut_height) self$height <- h } # we need `hclust` object to cutree statistics <- stats::as.hclust(statistics) - panel <- cutree(statistics, distance, k, h) - if (is.null(self$height) && plot_cut_height) { - self$height <- cutree_k_to_h( + panel <- self$cutree(statistics, distance, self$k, self$h) + # For `cutree`, we always respect the height user specified + # For user defined function, we always calculate + # height from the number of `panels` + if (isTRUE(self$plot_cut_height)) { + self$height <- self$h %||% cutree_k_to_h( statistics, vec_unique_count(panel) ) } @@ -301,10 +311,6 @@ AlignHclust <- ggproto("AlignHclust", Align, list(panel, index) }, summary_align = function(self, ...) { - params <- self$input_params - c( - TRUE, - !is.null(.subset2(params, "k")) || !is.null(.subset2(params, "h")) - ) + c(TRUE, !is.null(self$k) || !is.null(self$h) || !is.null(self$cutree)) } ) diff --git a/R/align-kmeans.R b/R/align-kmeans.R index 4b20e096..58a4624c 100644 --- a/R/align-kmeans.R +++ b/R/align-kmeans.R @@ -6,8 +6,9 @@ #' Aligns and groups observations based on k-means clustering, enabling #' observation splits by cluster groups. #' -#' @inheritParams stats::kmeans #' @inheritDotParams stats::kmeans -x -centers +#' @param data A numeric matrix to be used by k-means. By default, it will +#' inherit from the layout matrix. #' @inheritParams align #' @inheritSection align Discrete Axis Alignment #' @examples @@ -16,30 +17,25 @@ #' align_kmeans(3L) #' @importFrom rlang list2 #' @export -align_kmeans <- function(centers, ..., data = NULL, active = NULL) { +align_kmeans <- function(..., data = NULL, active = NULL) { assert_active(active) active <- update_active(active, new_active(use = FALSE)) align( align = AlignKmeans, - params = list(centers = centers, params = list2(...)), + params = list2(...), active = active, - data = data %||% waiver() + data = data ) } #' @importFrom ggplot2 ggproto #' @importFrom rlang inject AlignKmeans <- ggproto("AlignKmeans", Align, - setup_data = function(self, params, data) { - ans <- fortify_matrix(data) - assert_( - ans, is.numeric, "a numeric matrix", - arg = "data", call = self$call - ) - ans + interact_layout = function(self, layout) { + ggproto_parent(AlignReorder, self)$interact_layout(layout) }, - compute = function(self, panel, index, centers, params) { - inject(stats::kmeans(x = self$data, centers = centers, !!!params)) + compute = function(self, panel, index) { + inject(stats::kmeans(x = self$data, !!!self$params)) }, align = function(self, panel, index) { list(.subset2(self$statistics, "cluster"), index) diff --git a/R/align-order.R b/R/align-order.R index 5a705b81..4f98e181 100644 --- a/R/align-order.R +++ b/R/align-order.R @@ -39,21 +39,21 @@ align_order <- function(weights = rowMeans, ..., # vec_duplicate_any is slight faster than `anyDuplicated` if (vec_any_missing(weights) || vec_duplicate_any(weights)) { cli_abort(paste( - "{.arg order} must be an ordering numeric or character", + "{.arg weights} must be an ordering numeric or character", "without missing value or ties" )) } else if (is.numeric(weights)) { weights <- vec_cast(weights, integer()) } - if (!is.null(data) && !is.waive(data)) { + if (vec_size(weights) == 0L) { + cli_abort("{.arg weights} cannot be empty") + } + if (!is.null(data)) { cli_warn(c( "{.arg data} won't be used", - i = "{.arg order} is not a {.cls function}" + i = "{.arg weights} is not a {.cls function}" )) } - # we always inherit from parent layout - # in this way, we obtain the names of the layout data - data <- NULL } else { weights <- rlang::as_function(weights) data <- data %||% waiver() @@ -64,14 +64,11 @@ align_order <- function(weights = rowMeans, ..., active <- update_active(active, new_active(use = FALSE)) align( align = AlignOrder, - params = list( - weights = weights, - weights_params = list2(...), - reverse = reverse, - strict = strict - ), + weights = weights, + params = list2(...), + reverse = reverse, + strict = strict, active = active, - check.param = TRUE, data = data ) } @@ -79,37 +76,35 @@ align_order <- function(weights = rowMeans, ..., #' @importFrom ggplot2 ggproto #' @importFrom rlang inject is_atomic AlignOrder <- ggproto("AlignOrder", Align, - nobs = function(self, params) { - nobs <- length(.subset2(params, "weights")) - if (nobs == 0L) { - cli_abort("{.arg weights} cannot be empty", call = self$call) - } - nobs - }, - setup_params = function(self, nobs, params) { - if (!is.function(weights <- .subset2(params, "weights"))) { - assert_mismatch_nobs( - self, nobs, length(weights), - action = "must be an ordering integer or character index of", - arg = "weights" - ) + interact_layout = function(self, layout) { + if (is.function(self$weights)) { + layout <- ggproto_parent(AlignReorder, self)$interact_layout(layout) + } else { + layout <- ggproto_parent(Align, self)$interact_layout(layout) + if (is.null(layout_nobs <- .subset2(layout@design, "nobs"))) { + layout@design["nobs"] <- list(vec_size(self$weights)) + } else { + assert_mismatch_nobs( + self, layout_nobs, vec_size(self$weights), + arg = "weights" + ) + } + self$labels <- vec_names(layout@data) } - params + layout }, - compute = function(self, panel, index, weights, weights_params, strict) { - assert_reorder(self, panel, strict) - if (is.function(weights)) { - data <- .subset2(self, "data") - ans <- inject(weights(data, !!!weights_params)) + compute = function(self, panel, index) { + assert_reorder(self, panel, self$strict) + if (is.function(self$weights)) { + ans <- inject(self$weights(self$data, !!!self$params)) if (!is_atomic(ans)) { cli_abort( "{.arg weights} must return an atomic weights", - call = .subset2(self, "call") + call = self$call ) } assert_mismatch_nobs( - self, nrow(data), length(ans), - action = "must return weights with", + self, vec_size(ans), vec_size(ans), arg = "weights" ) } else { @@ -117,24 +112,19 @@ AlignOrder <- ggproto("AlignOrder", Align, } ans }, - align = function(self, panel, index, weights, reverse) { - if (is.function(weights)) { - index <- order(.subset2(self, "statistics")) + align = function(self, panel, index) { + if (is.function(self$weights)) { + index <- order(self$statistics) } else { - index <- vec_as_location(weights, - n = length(weights), - names = .subset2(self, "labels"), + index <- vec_as_location( + self$weights, + n = vec_size(self$weights), + names = self$labels, missing = "error", - call = .subset2(self, "call") + call = self$call ) - if (vec_duplicate_any(index)) { - cli_abort( - "find ties in the ordering index {.arg weights}", - call = .subset2(self, "call") - ) - } } - if (reverse) index <- rev(index) + if (self$reverse) index <- rev(index) list(panel, index) }, summary_align = function(self) c(TRUE, FALSE) diff --git a/R/align-reorder.R b/R/align-reorder.R index c643ab90..79c3d9b8 100644 --- a/R/align-reorder.R +++ b/R/align-reorder.R @@ -42,36 +42,84 @@ align_reorder <- function(stat, ..., reverse = FALSE, active <- update_active(active, new_active(use = FALSE)) align( align = AlignReorder, - params = list( - stat = stat, - stat_params = list2(...), - reverse = reverse, - strict = strict - ), + stat = stat, + params = list2(...), + reverse = reverse, + strict = strict, active = active, - data = data %||% waiver() + data = data ) } #' @importFrom ggplot2 ggproto #' @importFrom rlang inject AlignReorder <- ggproto("AlignReorder", Align, - compute = function(self, panel, index, stat, stat_params, strict) { - assert_reorder(self, panel, strict) - inject(stat(self$data, !!!stat_params)) + interact_layout = function(self, layout) { + layout <- ggproto_parent(Align, self)$interact_layout(layout) + layout_data <- layout@data + if (is.null(input_data <- self$input_data) || + is.waive(input_data)) { # inherit from the layout + if (is.null(data <- layout_data)) { + cli_abort(c( + sprintf( + "you must provide {.arg data} in %s", + object_name(self) + ), + i = sprintf("no data was found in %s", self$layout_name) + )) + } + } else if (is.function(input_data)) { + if (is.null(layout_data)) { + cli_abort(c( + sprintf( + "{.arg data} in %s cannot be a function", + object_name(self) + ), + i = sprintf("no data was found in %s", self$layout_name) + )) + } + data <- input_data(layout_data) + } else { + data <- input_data + } + + design <- layout@design + layout_nobs <- .subset2(design, "nobs") + + # we always regard rows as the observations + if (is.null(layout_nobs)) { + layout_nobs <- vec_size(data) + if (layout_nobs == 0L) { + cli_abort("{.arg data} cannot be empty", call = self$call) + } + design["nobs"] <- list(layout_nobs) + layout@design <- design + } else if (vec_size(data) != layout_nobs) { + cli_abort(sprintf( + "%s (nobs: %d) is not compatible with the %s (nobs: %d)", + object_name, vec_size(data), layout_name, layout_nobs + )) + } + + # save the labels + self$labels <- vec_names(data) %||% vec_names(layout_data) + self$data <- ggalign_attr_restore(data, layout_data) + layout + }, + compute = function(self, panel, index) { + assert_reorder(self, panel, self$strict) + inject(self$stat(self$data, !!!self$params)) }, - align = function(self, panel, index, reverse) { + align = function(self, panel, index) { index <- vec_cast( - order2(.subset2(self, "statistics")), integer(), - x_arg = "stat", - call = self$call + order2(self$statistics), integer(), + x_arg = "stat", call = self$call ) assert_mismatch_nobs( - self, NROW(self$data), length(index), - action = "must return a statistic with", + self, vec_size(self$data), vec_size(index), arg = "stat" ) - if (reverse) index <- rev(index) + if (self$reverse) index <- rev(index) list(panel, index) }, summary_align = function(self) c(TRUE, FALSE) diff --git a/R/dendrogram.R b/R/dendrogram.R index 1ea9f7cc..1cd22014 100644 --- a/R/dendrogram.R +++ b/R/dendrogram.R @@ -113,11 +113,6 @@ make_dist <- function(matrix, distance, use_missing, #' #' @param data A [`hclust`][stats::hclust] or a #' [`dendrogram`][stats::as.dendrogram] object. -#' @param priority A string of "left" or "right". if we draw from right to left, -#' the left will override the right, so we take the `"left"` as the priority. If -#' we draw from `left` to `right`, the right will override the left, so we take -#' the `"right"` as priority. This is used by [align_dendro()] to provide -#' support of facet operation in ggplot2. #' @param center A boolean value. if `TRUE`, nodes are plotted centered with #' respect to all leaves/tips in the branch. Otherwise (default), plot them in #' the middle of the direct child nodes. @@ -131,10 +126,16 @@ make_dist <- function(matrix, distance, use_missing, #' @param branch_gap A single numeric value indicates the gap between different #' branches. #' @param root A length one string or numeric indicates the root branch. +#' @param priority A string of "left" or "right". if we draw from `right` to +#' `left`, the left will override the right, so we take the `"left"` as the +#' priority. If we draw from `left` to `right`, the right will override the +#' left, so we take the `"right"` as priority. This is used by +#' [`align_dendro()`] to provide support of facet operation in ggplot2. #' @param double A single logical value indicating whether horizontal lines #' should be doubled when segments span multiple branches. If `TRUE`, the #' horizontal lines will be repeated for each branch that the segment spans. If -#' `FALSE`, only one horizontal line will be drawn. +#' `FALSE`, only one horizontal line will be drawn. This is used by +#' [`align_dendro()`] to provide support of facet operation in ggplot2. #' @return A `data frame` with the node coordinates: #' - `.panel`: Similar with `panel` column, but always give the correct #' branch for usage of the ggplot facet. diff --git a/R/utils-assert.R b/R/utils-assert.R index d2cdffa6..ceb5a0bf 100644 --- a/R/utils-assert.R +++ b/R/utils-assert.R @@ -16,13 +16,12 @@ assert_mapping <- function(mapping, arg = caller_arg(mapping), } } -assert_mismatch_nobs <- function(align, n, nobs, action, arg) { +assert_mismatch_nobs <- function(align, n, nobs, arg) { if (n != nobs) { cli_abort(sprintf( - "{.arg %s} of %s %s the same length of layout %s-axis (%d)", - arg, object_name(align), action, - to_coord_axis(align$direction), n - ), call = align$call) + "{.arg %s} (nobs: %d) of %s is not compatible with the %s (nobs: %d)", + arg, nobs, object_name(align), align$layout_name, n + )) } } diff --git a/man/align.Rd b/man/align.Rd index c951b0be..68b5d113 100644 --- a/man/align.Rd +++ b/man/align.Rd @@ -8,15 +8,13 @@ \usage{ align( align, - data, + data = NULL, ..., - params = list(), plot = NULL, size = NULL, schemes = NULL, no_axes = NULL, active = NULL, - check.param = TRUE, call = caller_call() ) } @@ -35,8 +33,6 @@ data, please use \code{\link[=scheme_data]{scheme_data()}}. \item{...}{Additional fields passed to the \code{align} object.} -\item{params}{A list of parameters for \code{align}.} - \item{plot}{A ggplot object.} \item{size}{The relative size of the plot, can be specified as a @@ -58,9 +54,6 @@ default, will use the option- \item{active}{A \code{\link[=active]{active()}} object that defines the context settings when added to a layout.} -\item{check.param}{Logical; if \code{TRUE}, checks parameters and provides -warnings as necessary.} - \item{call}{The \code{call} used to construct the \code{Align} object, for reporting messages.} } diff --git a/man/align_kmeans.Rd b/man/align_kmeans.Rd index 203ae479..b592f4ba 100644 --- a/man/align_kmeans.Rd +++ b/man/align_kmeans.Rd @@ -4,13 +4,9 @@ \alias{align_kmeans} \title{Split observations by k-means clustering groups.} \usage{ -align_kmeans(centers, ..., data = NULL, active = NULL) +align_kmeans(..., data = NULL, active = NULL) } \arguments{ -\item{centers}{either the number of clusters, say \eqn{k}, or a set of - initial (distinct) cluster centres. If a number, a random set of - (distinct) rows in \code{x} is chosen as the initial centres.} - \item{...}{ Arguments passed on to \code{\link[stats:kmeans]{stats::kmeans}} \describe{ @@ -26,15 +22,8 @@ align_kmeans(centers, ..., data = NULL, active = NULL) produced. Higher values may produce more tracing information.} }} -\item{data}{The following options can be used: -\itemize{ -\item \code{NULL}: No data is set. -\item \code{\link[ggplot2:waiver]{waiver()}}: Inherits the data from the layout matrix. -\item A \code{function} (including purrr-like lambda syntax): Applied to the layout -matrix to transform the data before use. To transform the final plot -data, please use \code{\link[=scheme_data]{scheme_data()}}. -\item A \code{matrix}, \code{data.frame}, or atomic vector. -}} +\item{data}{A numeric matrix to be used by k-means. By default, it will +inherit from the layout matrix.} \item{active}{A \code{\link[=active]{active()}} object that defines the context settings when added to a layout.} diff --git a/man/fortify_data_frame.dendrogram.Rd b/man/fortify_data_frame.dendrogram.Rd index 6c7cef46..c0c521e0 100644 --- a/man/fortify_data_frame.dendrogram.Rd +++ b/man/fortify_data_frame.dendrogram.Rd @@ -27,11 +27,11 @@ \item{...}{Additional arguments passed to \code{dendrogram} method.} -\item{priority}{A string of "left" or "right". if we draw from right to left, -the left will override the right, so we take the \code{"left"} as the priority. If -we draw from \code{left} to \code{right}, the right will override the left, so we take -the \code{"right"} as priority. This is used by \code{\link[=align_dendro]{align_dendro()}} to provide -support of facet operation in ggplot2.} +\item{priority}{A string of "left" or "right". if we draw from \code{right} to +\code{left}, the left will override the right, so we take the \code{"left"} as the +priority. If we draw from \code{left} to \code{right}, the right will override the +left, so we take the \code{"right"} as priority. This is used by +\code{\link[=align_dendro]{align_dendro()}} to provide support of facet operation in ggplot2.} \item{center}{A boolean value. if \code{TRUE}, nodes are plotted centered with respect to all leaves/tips in the branch. Otherwise (default), plot them in @@ -56,7 +56,8 @@ branches.} \item{double}{A single logical value indicating whether horizontal lines should be doubled when segments span multiple branches. If \code{TRUE}, the horizontal lines will be repeated for each branch that the segment spans. If -\code{FALSE}, only one horizontal line will be drawn.} +\code{FALSE}, only one horizontal line will be drawn. This is used by +\code{\link[=align_dendro]{align_dendro()}} to provide support of facet operation in ggplot2.} } \value{ A \verb{data frame} with the node coordinates: diff --git a/tests/testthat/_snaps/plot-align.md b/tests/testthat/_snaps/plot-align.md index f7aeb9dd..18b7dfd5 100644 --- a/tests/testthat/_snaps/plot-align.md +++ b/tests/testthat/_snaps/plot-align.md @@ -6,19 +6,19 @@ --- - `group` of `align_group()` must be an atomic vector the same length of layout x-axis (8) + `group` (nobs: 4) of `align_group()` is not compatible with the the top annotation `stack_discrete()` (nobs: 8) --- - `group` of `align_group()` must be an atomic vector the same length of layout x-axis (8) + `group` (nobs: 9) of `align_group()` is not compatible with the the top annotation `stack_discrete()` (nobs: 8) --- - `group` of `align_group()` must be an atomic vector the same length of layout x-axis (8) + `group` (nobs: 4) of `align_group()` is not compatible with the the top annotation `stack_discrete()` (nobs: 8) --- - `group` of `align_group()` must be an atomic vector the same length of layout x-axis (8) + `group` (nobs: 9) of `align_group()` is not compatible with the the top annotation `stack_discrete()` (nobs: 8) # `align_order` works well