Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add layout_dedupe_ph_labels() to hanble duplicate placholder labels (close #589) #594

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: officer
Title: Manipulation of Microsoft Word and PowerPoint Documents
Version: 0.6.7.005
Version: 0.6.7.006
Authors@R: c(
person("David", "Gohel", , "[email protected]", role = c("aut", "cre")),
person("Stefan", "Moog", , "[email protected]", role = "aut"),
Expand Down Expand Up @@ -48,7 +48,8 @@ Imports:
utils,
uuid,
xml2 (>= 1.1.0),
zip (>= 2.1.0)
zip (>= 2.1.0),
cli
Suggests:
devEMF,
doconv (>= 0.3.0),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ export(headers_replace_img_at_bkm)
export(headers_replace_text_at_bkm)
export(hyperlink_ftext)
export(image_to_base64)
export(layout_dedupe_ph_labels)
export(layout_properties)
export(layout_summary)
export(media_extract)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ that can not contain ' ' and trigger an error if it contains a ' '.

## Features

- add `layout_dedupe_ph_labels()` to handle duplicate placholder labels (#589).
By default, it will only detect duplicate labels, but apply no changes. With
`action = "rename"`, it auto-renames duplicate labels and `action = "delete"`
deletes duplicates, only keeping their first occurence.
- new convenience functions `body_replace_gg_at_bkm()` and `body_replace_plot_at_bkm()`
to replace text content enclosed in a bookmark with a ggplot or a base plot.
- add `unit` (in, cm, mm) argument in function `page_size()`.
Expand Down
2 changes: 1 addition & 1 deletion R/ph_location.R
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ fortify_location.location_label <- function( x, doc, ...){

if( nrow(props) > 1) {
stop("Placeholder ", shQuote(x$ph_label),
" in the slide layout is duplicated. It needs to be unique.")
" in the slide layout is duplicated. It needs to be unique. Hint: layout_dedupe_ph_labels() helps handling duplicates.")
}

props <- props[, c("offx", "offy", "cx", "cy", "ph_label", "ph", "type", "rotation", "fld_id", "fld_type")]
Expand Down
140 changes: 140 additions & 0 deletions R/ppt_ph_dedupe_layout.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#' Detect and handle duplicate placeholder labels
#'
#' PowerPoint does not enforce unique placeholder labels in a layout.
#' Selecting a placeholder via its label using [ph_location_label] will throw
#' an error, if the label is not unique. [layout_dedupe_ph_labels] helps to detect,
#' rename, or delete duplicate placholder labels.
#'
#' @param x An `rpptx` object.
#' @param action Action to perform on duplicate placeholder labels. One of:
#' * `detect` (default) = show info on dupes only, make no changes
#' * `rename` = create unique labels. Labels are renamed by appending a sequential number
#' separated by dot to duplicate labels. For example, `c("title", "title")` becomes `c("title.1", "title.2")`.
#' * `delete` = only keep one of the placeholders with a duplicate label
#' @param print_info Print action information (e.g. renamed placeholders) to console?
#' Default is `FALSE`. Always `TRUE` for action `detect`.
#' @return A `rpptx` object (with modified placeholder labels).
#' @export
#' @examples
#' x <- read_pptx()
#' layout_dedupe_ph_labels(x)
#'
#' file <- system.file("doc_examples", "ph_dupes.pptx", package = "officer")
#' x <- read_pptx(file)
#' layout_dedupe_ph_labels(x)
#' layout_dedupe_ph_labels(x, "rename", print_info = TRUE)
#'
layout_dedupe_ph_labels <- function(x, action = "detect", print_info = FALSE) {
if (!inherits(x, "rpptx")) {
stop("'x' must be an 'rpptx' object", call. = FALSE)
}
action <- match.arg(action, c("detect", "rename", "delete"))
layout_names <- x$slideLayouts$get_metadata()$filename
xfrm_list <- lapply(layout_names, .dedupe_phs_in_layout, x = x, action = action)
x <- reload_slidelayouts(x) # reinit slideLayouts to get processed ph labels [e.g. when calling x$slideLayouts$get_xfrm_data()]
if (print_info | action == "detect") {
.print_dedupe_info(x = x, xfrm_list = xfrm_list, action = action)
}
invisible(x)
}


# handle placeholder labels in a single layout
#
# layout_file: layout filename (e.g. "slideLayout1.xml").
# x: An `rpptx` object
#
# returns: Dataframe with placeholder info. Only needed for .print_dedupe_info()
.dedupe_phs_in_layout <- function(layout_file, x, action = "rename") {
ph_label <- NULL
if (!grepl("\\.xml$", layout_file, ignore.case = TRUE)) {
stop("'layout_file' must be an .xml file", call. = FALSE)
}
action <- match.arg(action, c("detect", "rename", "delete"))
layout <- x$slideLayouts$collection_get(layout_file)
xfrm <- layout$xfrm()
xfrm <- subset(xfrm, duplicated(ph_label) | duplicated(ph_label, fromLast = TRUE))
if (nrow(xfrm) == 0) {
return()
}
xfrm <- transform(xfrm, ph_label_new = make_strings_unique(ph_label), delete_flag = duplicated(ph_label)) # prepare once for all action types
if (action == "detect") {
return(xfrm) # no further action required
} else if (action == "rename") {
xfrm$delete_flag <- FALSE
} else if (action == "delete") {
xfrm$ph_label_new <- xfrm$ph_label
}

# rename label or delete ph shape
layout_xml <- layout$get()
for (i in 1L:nrow(xfrm)) {
shape <- xml2::xml_find_first(layout_xml, sprintf("p:cSld/p:spTree/*[p:nvSpPr/p:cNvPr[@id='%s']]", xfrm$id[i]))
if (xfrm$delete_flag[i]) {
xml2::xml_remove(shape)
} else {
xml2::xml_find_first(shape, ".//p:cNvPr") |> xml2::xml_set_attr("name", xfrm$ph_label_new[i])
}
}
layout$save() # persist changes in slideout xml file
xfrm
}


# reload slideLayouts (if layout XML in package_dir has changed)
reload_slidelayouts <- function(x) {
x$slideLayouts$initialize(x$package_dir,
master_metadata = x$masterLayouts$get_metadata(),
master_xfrm = x$masterLayouts$xfrm()
)
x
}


# Create unique string by appending a sepatator and a number
# make_strings_unique(c("A", "B", "B", "C", "A"))
make_strings_unique <- function(x, sep = ".") {
ii <- stats::ave(x, x, FUN = seq_along)
paste0(x, sep, ii)
}


# helper mostly for testing
has_ph_dupes <- function(x) {
if (!inherits(x, "rpptx")) {
stop("'x' must be an 'rpptx' object", call. = FALSE)
}
xfrm <- x$slideLayouts$get_xfrm_data()
dupes <- stats::aggregate(ph_label ~ master_name + name, data = xfrm, FUN = function(x) sum(duplicated(x)) > 0)
any(dupes$ph_label)
}


# print info on what was done (if print_info = TRUE)
.print_dedupe_info <- function(x, xfrm_list, action) {
.df_1 <- do.call(rbind, xfrm_list)
if (is.null(.df_1)) {
cat("No duplicate placeholder labels detected.")
return(invisible(NULL))
}
.df_2 <- x$slideLayouts$get_xfrm_data()
.df_2 <- .df_2[, c("master_file", "master_name"), drop = FALSE] |> unique()
df <- merge(.df_1, .df_2, sort = FALSE)
rownames(df) <- NULL
df <- df[, c("master_name", "name", "ph_label", "ph_label_new", "delete_flag"), drop = FALSE]
colnames(df)[2] <- "layout_name"
if (action == "detect") {
cat("Placeholders with duplicate labels:\n")
cat(cli::col_grey("* 'ph_label_new' = new placeholder label for action = 'rename'\n"))
cat(cli::col_grey("* 'delete_flag' = deleted placeholders for action = 'delete'\n"))
} else if (action == "rename") {
df$delete_flag <- NULL
cat("Renamed duplicate placeholder labels:\n")
cat(cli::col_grey("* 'ph_label_new' = new placeholder label\n"))
} else if (action == "delete") {
df <- df[df$delete_flag, , drop = FALSE]
df$ph_label_new <- NULL
cat("Removed placeholders with duplicate labels:\n")
}
print(df)
}
1 change: 1 addition & 0 deletions R/pptx_informations.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ plot_layout_properties <- function (x, layout = NULL, master = NULL, labels = TR
#' \code{ph_location*} calls. The parameters are printed in their corresponding shapes.
#'
#' Note that if there are duplicated \code{ph_label}, you should not use \code{ph_location_label}.
#' Hint: You can dedupe labels using \code{\link{layout_dedupe_ph_labels}}.
#'
#' @param path path to the pptx file to use as base document or NULL to use the officer default
#' @param output_file filename to store the annotated powerpoint file or NULL to suppress generation
Expand Down
Binary file added inst/doc_examples/ph_dupes.pptx
Binary file not shown.
1 change: 1 addition & 0 deletions man/annotate_base.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions man/layout_dedupe_ph_labels.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added tests/testthat/docs_dir/test-pptx-dedupe-ph.pptx
Binary file not shown.
74 changes: 74 additions & 0 deletions tests/testthat/test-pptx-dedupe-ph-labels.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@



test_that("incorrect input formats are detected", {
expect_error(layout_dedupe_ph_labels("file.xxx"), regexp = "'x' must be an 'rpptx' object")
expect_error(has_ph_dupes("file.xxx"), regexp = "'x' must be an 'rpptx' object")
expect_error(.dedupe_phs_in_layout("file.xxx"), regexp = "'layout_file' must be an .xml file")
})



test_that("handling ph dupes function works when there are none", {
x <- read_pptx() # sample PPTX has no dupes
expect_false(has_ph_dupes(x))
. <- capture.output(expect_no_error({
layout_dedupe_ph_labels(x, print_info = TRUE)
layout_dedupe_ph_labels(x, action = "rename", print_info = TRUE)
layout_dedupe_ph_labels(x, action = "delete", print_info = TRUE)
}))
})


test_that("handling ph dupes works", {
file_in <- test_path("docs_dir/test-pptx-dedupe-ph.pptx")

# referencing a duplicate placeholder via its label should throw an error.
# if this should change for some reason, the test fails as we would need to
# check if deduplication is still relevant
x <- read_pptx(file_in)
x <- add_slide(x, layout = "2x2-dupes", master = "Master1")
expect_no_error(ph_with(x, "abc", ph_location_label(ph_label = "Title 1")))
expect_error(ph_with(x, "abc", ph_location_label(ph_label = "Content")))

# action = detect
x_det <- read_pptx(file_in)
expect_true(has_ph_dupes(x_det))
out <- capture.output({
x_det <- layout_dedupe_ph_labels(x_det)
})
expect_true(any(grepl("Content 7.1", out)))
expect_true(has_ph_dupes(x_det))

# action = rename
x_rename <- read_pptx(file_in)
before <- x_rename$slideLayouts$get_xfrm_data()$ph_label
out <- capture.output({
x_rename <- layout_dedupe_ph_labels(x_rename, action = "rename", print_info = TRUE)
})
expect_true(any(grepl("Content 7", out)))
expect_true(any(grepl("Content 7.1", out)))
after <- x_rename$slideLayouts$get_xfrm_data()$ph_label
expect_false(has_ph_dupes(x_rename))
expect_true(any(before != after))
expect_equal(length(before), length(after))

# action = delete
x_delete <- read_pptx(file_in)
before <- x_delete$slideLayouts$get_xfrm_data()$ph_label
out <- capture.output({
x_delete <- layout_dedupe_ph_labels(x_delete, action = "delete", print_info = TRUE)
})
expect_true(any(grepl("Content 7", out)))
after <- x_delete$slideLayouts$get_xfrm_data()$ph_label
expect_false(has_ph_dupes(x_delete))
expect_gt(length(before), length(after))

# annotate base should work with deduped phs
file <- tempfile(fileext = ".pptx")
output_file <- tempfile(fileext = ".pptx")
print(x_rename, target = file)
expect_no_error(annotate_base(file, output_file = output_file))
print(x_delete, target = file)
expect_no_error(annotate_base(file, output_file = output_file))
})
Loading