Merge pull request #677 from ropensci/render

Refactor summary() to separate compute and print
ropensci · Jan 17, 2022 · 279fe93 · 279fe93
2 parents eca8edf + be19a6a
commit 279fe93
Show file tree

Hide file tree

Showing 4 changed files with 82 additions and 50 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 S3method(ctl_new_pillar,one_skim_df)
+S3method(format,summary_skim_df)
 S3method(get_skimmers,AsIs)
 S3method(get_skimmers,Date)
 S3method(get_skimmers,POSIXct)

diff --git a/R/skim_print.R b/R/skim_print.R
@@ -101,11 +101,11 @@ print.skim_list <- function(x,
 #' @param .summary_rule_width the width for the main rule above the summary.
 #' @export
 print.summary_skim_df <- function(x, .summary_rule_width = 40, ...) {
-  cat(paste0(cli::rule(
-    line = 1, left = "Data Summary",
-    width = .summary_rule_width
-  ), "\n"))
-  print.table(x)
+  with_title <- c(
+    cli::rule(line = 1, left = "Data Summary", width = .summary_rule_width),
+    format(x)
+  )
+  writeLines(with_title)
 }
 
 #' Provide a default printing method for knitr.
@@ -139,14 +139,7 @@ knit_print.skim_df <- function(x, options = NULL, ...) {
   if (is_skim_df(x) && nrow(x) > 0) {
     if (options$skimr_include_summary %||% TRUE) {
       summary_stats <- summary(x)
-
-      kabled <- knitr::kable(
-        summary_stats,
-        table.attr = "style='width: auto;'
-        class='table table-condensed'",
-        col.names = c(" "),
-        caption = "Data summary"
-      )
+      kabled <- knit_print(summary_stats)
     } else {
       kabled <- c()
     }
@@ -193,11 +186,15 @@ knit_print.one_skim_df <- function(x, options = NULL, ...) {
 #' @describeIn knit_print Default `knitr` print for `skim_df` summaries.
 #' @export
 knit_print.summary_skim_df <- function(x, options = NULL, ...) {
+  summary_mat <- cbind(
+    get_summary_dnames(x),
+    get_summary_values(x),
+    deparse.level = 0
+  )
   kabled <- knitr::kable(
-    x,
+    summary_mat,
     table.attr = "style='width: auto;'
       class='table table-condensed'",
-    col.names = c(" "),
     caption = "Data summary"
   )
 

diff --git a/R/summary.R b/R/summary.R
@@ -13,47 +13,77 @@ summary.skim_df <- function(object, ...) {
   if (is.null(object)) {
     stop("dataframe is null.")
   }
-  data_name <- df_name(object)
-  data_name <- ifelse(data_name %in% c("`.`", ".data"), "Piped data", data_name)
-  data_name <- gsub("`", "", data_name)
-  data_name <- ifelse(nchar(data_name) > 25,
-    paste0(substring(data_name, 1, 25), "..."),
-    data_name
-  )
-
   duplicated <- duplicated(object$skim_variable)
   counts <- table(type = object$skim_type[!duplicated])
   types <- dimnames(counts)[[1]]
-  types <- paste0("  ", types)
+
   possible_names <- group_names(object)
   possible_groups <- if (length(possible_names) > 0) {
     paste(possible_names, collapse = ", ")
   } else {
     "None"
   }
 
-  summary_object <- c(
-    data_name,
-    data_rows(object),
-    data_cols(object),
-    if (!is.na(dt_key(object))) dt_key(object),
-    " ",
-    " ",
-    unname(counts),
-    " ",
-    possible_groups
+  structure(
+    list(
+      data_name = process_data_name(object),
+      counts = counts,
+      types = types,
+      possible_groups = possible_groups,
+      dt_key = dt_key(object),
+      data_rows = data_rows(object),
+      data_cols = data_cols(object)
+    ),
+    class = "summary_skim_df"
+  )
+}
+
+process_data_name <- function(object) {
+  raw_name <- df_name(object)
+  no_ticks <- gsub("`", "", raw_name)
+  if (no_ticks %in% c(".", ".data")) {
+    "Piped data"
+  } else if (nchar(no_ticks) > 25) {
+    paste0(substring(no_ticks, 1, 25), "...")
+  } else {
+    no_ticks
+  }
+}
+
+#' @export
+format.summary_skim_df <- function(x, ...) {
+  dnames <- c("", get_summary_dnames(x))
+  summary_values <- c("Values", get_summary_values(x))
+  paste(
+    format(dnames),
+    format(summary_values)
   )
+}
 
-  summary_object <- array(summary_object, dim = c(length(summary_object), 1))
-  dnames <- c(
-    "Name", "Number of rows ", "Number of columns ",
-    if (!is.na(dt_key(object))) "Key", "_______________________ ",
-    "Column type frequency: ", types, "________________________  ",
+get_summary_dnames <- function(summary_object) {
+  c(
+    "Name",
+    "Number of rows ",
+    "Number of columns ",
+    if (!is.na(summary_object$dt_key)) "Key",
+    "_______________________ ",
+    "Column type frequency: ",
+    paste0("  ", summary_object$types),
+    "________________________  ",
     "Group variables"
   )
+}
 
-  summary_object <- as.table(summary_object)
-  dimnames(summary_object) <- list(dnames, c("Values"))
-  class(summary_object) <- c("summary_skim_df", "table")
-  summary_object
+get_summary_values <- function(summary_object) {
+  c(
+    summary_object$data_name,
+    summary_object$data_rows,
+    summary_object$data_cols,
+    if (!is.na(summary_object$dt_key)) summary_object$dt_key,
+    " ",
+    " ",
+    unname(summary_object$counts),
+    " ",
+    summary_object$possible_groups
+  )
 }
diff --git a/tests/testthat/test-summary.R b/tests/testthat/test-summary.R
@@ -3,13 +3,17 @@ test_that("Summary creates the correct summary object", {
   # Test it creates the correct 4 parts
   skim_input <- skim(iris)
   summary_input <- summary(skim_input)
-  expect_equal(rownames(summary_input), c(
-    "Name", "Number of rows ", "Number of columns ",
-    "_______________________ ",
-    "Column type frequency: ", "  factor", "  numeric",
-    "________________________  ",
-    "Group variables"
-  ))
+  expect_named(
+    summary_input,
+    c(
+      "data_name", "counts", "types", "possible_groups",
+      "dt_key", "data_rows", "data_cols"
+    )
+  )
+  expect_identical(summary_input$data_name, "iris")
+  expect_identical(summary_input$types, c("factor", "numeric"))
+  expect_identical(summary_input$data_rows, 150L)
+  expect_identical(summary_input$data_cols, 5L)
 })
 
 test_that("The summary print method prints the correct object", {