Skip to content

Commit d958976

Browse files
committed
Merge pull request #200 from ropensci/carson-bars
Let ggplot handle histogran binning. Fix #198
2 parents a9daa28 + 3cd43f8 commit d958976

File tree

6 files changed

+199
-166
lines changed

6 files changed

+199
-166
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: plotly
22
Type: Package
33
Title: Interactive, publication-quality graphs online.
4-
Version: 0.5.27
4+
Version: 0.5.28
55
Authors@R: c(person("Chris", "Parmer", role = c("aut", "cre"),
66
email = "[email protected]"),
77
person("Scott", "Chamberlain", role = "aut",

NEWS

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
0.5.27 -- 15 April 2015
2+
3+
Let ggplot handle histogram binning. Fix #198
4+
15
0.5.27 -- 19 Mar 2015
26

37
Reimplement geom_ribbon as a basic polygon. Fix #191. Fix #192.

R/ggplotly.R

+35
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,41 @@ gg2list <- function(p){
242242
if (!all(barmodes == barmodes[1]))
243243
warning(paste0("You have multiple barcharts or histograms with different positions; ",
244244
"Plotly's layout barmode will be '", layout$barmode, "'."))
245+
# for stacked bar charts, plotly cumulates bar heights, but ggplot doesn't
246+
if (layout$barmode == "stack") {
247+
# could speed up this function with environments or C/C++
248+
unStack <- function(vec) {
249+
n <- length(vec)
250+
if (n == 1) return(vec)
251+
seq.n <- seq_len(n)
252+
names(vec) <- seq.n
253+
vec <- sort(vec)
254+
for (k in seq(2, n)) {
255+
vec[k] <- vec[k] - sum(vec[seq(1, k-1)])
256+
}
257+
as.numeric(vec[as.character(seq.n)])
258+
}
259+
ys <- lapply(trace.list, "[[", "y")
260+
xs <- lapply(trace.list, "[[", "x")
261+
x.vals <- unique(unlist(xs))
262+
# if there are two or more y-values (for a particular x value),
263+
# then modify those y-values so they *add up* to the correct value(s)
264+
for (val in x.vals) {
265+
zs <- lapply(xs, function(x) which(x == val))
266+
ys.given.x <- Map(function(x, y) y[x], zs, ys)
267+
if (length(unlist(ys.given.x)) < 2) next
268+
st <- unStack(unlist(ys.given.x))
269+
lens <- sapply(ys.given.x, length)
270+
trace.seq <- seq_along(trace.list)
271+
ws <- split(st, rep(trace.seq, lens))
272+
for (tr in seq_along(ws)) {
273+
idx <- zs[[tr]]
274+
replacement <- ws[[tr]]
275+
if (length(idx) > 0 && length(replacement) > 0)
276+
trace.list[[tr]]$y[idx] <- replacement
277+
}
278+
}
279+
}
245280
}
246281

247282
# Bar Gap for histograms should be 0

R/trace_generation.R

+32-67
Original file line numberDiff line numberDiff line change
@@ -41,25 +41,9 @@ layer2traces <- function(l, d, misc) {
4141
g$geom <- "smoothLine"
4242
}
4343
}
44-
# Barmode and bargap
45-
barmode <- "group"
46-
if (g$geom == "bar" || g$geom == "histogram") {
47-
if (l$stat$objname == "bin") {
48-
if (g$geom != "histogram") {
49-
warning("You may want to use geom_histogram.")
50-
}
51-
} else {
52-
bargap <- "default"
53-
}
54-
g$geom <- "bar" # histogram is just an alias for geom_bar + stat_bin
55-
pos <- l$position$.super$objname
56-
if (pos == "identity") {
57-
barmode <- "overlay"
58-
} else if (pos == "stack") {
59-
barmode <- "stack"
60-
}
61-
}
62-
if (g$geom == "density") {
44+
# histogram is essentially a bar chart with no gaps (after stats are computed)
45+
if (g$geom == "histogram") {
46+
g$geom <- "bar"
6347
bargap <- 0
6448
}
6549

@@ -182,12 +166,11 @@ layer2traces <- function(l, d, misc) {
182166
lapply(df.list, function(df){
183167
params <- basic$params
184168
params[invariable.names] <- if (ncol(x <- df[1, invariable.names]) > 0) x else NULL
185-
list(data=df[other.names],
169+
list(data=df[other.names],
186170
params=params)
187171
})
188172
}
189173
}
190-
191174
# Split hline and vline when multiple panels or intercepts:
192175
# Need multiple traces accordingly.
193176
if (g$geom == "hline" || g$geom == "vline") {
@@ -216,7 +199,6 @@ layer2traces <- function(l, d, misc) {
216199
}
217200
traces <- NULL
218201
names.in.legend <- NULL
219-
220202
for (data.i in seq_along(data.list)) {
221203
data.params <- data.list[[data.i]]
222204
data.params$params$stat.type <- l$stat$objname
@@ -260,18 +242,23 @@ layer2traces <- function(l, d, misc) {
260242
if (is.null(tr$name) || tr$name %in% names.in.legend)
261243
tr$showlegend <- FALSE
262244
names.in.legend <- c(names.in.legend, tr$name)
263-
264-
if (g$geom == "bar")
265-
tr$barmode <- barmode
266-
267-
# Bar Gap
268-
if (exists("bargap")) {
269-
tr$bargap <- bargap
245+
246+
# special handling for bars
247+
if (g$geom == "bar") {
248+
tr$bargap <- if (exists("bargap")) bargap else "default"
249+
pos <- l$position$.super$objname
250+
tr$barmode <- if (pos %in% c("identity", "stack", "fill")) {
251+
"stack"
252+
} else "group"
270253
}
254+
# TODO: remove this once we reimplement density as area
255+
if (g$geom == "density") {
256+
tr$bargap <- 0
257+
}
258+
271259
traces <- c(traces, list(tr))
272260
}
273261

274-
275262
sort.val <- sapply(traces, function(tr){
276263
rank.val <- unlist(tr$sort)
277264
if(is.null(rank.val)){
@@ -357,16 +344,9 @@ toBasic <- list(
357344
g$data <- g$prestats.data
358345
g
359346
},
360-
bar=function(g) {
361-
if (any(is.na(g$prestats.data$x)))
362-
g$prestats.data$x <- g$prestats.data$x.name
363-
for(a in c("fill", "colour")){
364-
g$prestats.data[[a]] <-
365-
g$data[[a]][match(g$prestats.data$group, g$data$group)]
366-
}
367-
g$params$xstart <- min(g$data$xmin)
368-
g$params$xend <- max(g$data$xmax)
369-
g$data <- g$prestats.data
347+
bar=function(g){
348+
g <- group2NA(g, "bar")
349+
g$data <- g$data[!is.na(g$data$y), ]
370350
g
371351
},
372352
contour=function(g) {
@@ -591,40 +571,25 @@ geom2trace <- list(
591571
L
592572
},
593573
bar=function(data, params) {
594-
L <- list(x=data$x,
574+
x <- if ("x.name" %in% names(data)) data$x.name else data$x
575+
if (inherits(x, "POSIXt")) {
576+
# Convert seconds into milliseconds
577+
x <- as.numeric(x) * 1000
578+
} else if (inherits(x, "Date")) {
579+
# Convert days into milliseconds
580+
x <- as.numeric(x) * 24 * 60 * 60 * 1000
581+
}
582+
L <- list(x=x,
583+
y=data$y,
584+
type="bar",
595585
name=params$name,
596586
text=data$text,
597587
marker=list(color=toRGB(params$fill)))
598-
599588
if (!is.null(params$colour)) {
600589
L$marker$line <- list(color=toRGB(params$colour))
601590
L$marker$line$width <- if (is.null(params$size)) 1 else params$size
602591
}
603-
604-
if (!is.null(params$alpha))
605-
L$opacity <- params$alpha
606-
607-
if (params$stat.type == "bin") {
608-
L$type <- "histogram"
609-
if (is.null(params$binwidth)) {
610-
L$autobinx <- TRUE
611-
} else {
612-
L$autobinx <- FALSE
613-
L$xbins=list(start=params$xstart,
614-
end=params$xend,
615-
size=params$binwidth)
616-
if (inherits(data$x.name, "POSIXt")) {
617-
# Convert seconds into milliseconds
618-
L$xbins <- lapply(L$xbins, function(x) x * 1000)
619-
} else if (inherits(data$x.name, "Date")) {
620-
# Convert days into milliseconds
621-
L$xbins <- lapply(L$xbins, function(x) x * 24 * 60 * 60 * 1000)
622-
}
623-
}
624-
} else {
625-
L$y <- data$y
626-
L$type <- "bar"
627-
}
592+
if (!is.null(params$alpha)) L$opacity <- params$alpha
628593
L
629594
},
630595
step=function(data, params) {

tests/testthat/test-ggplot-bar.R

+58-49
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,20 @@
11
context("bar")
22

3+
expect_traces <- function(gg, n.traces, name){
4+
stopifnot(is.ggplot(gg))
5+
stopifnot(is.numeric(n.traces))
6+
save_outputs(gg, paste0("bar-", name))
7+
L <- gg2list(gg)
8+
is.trace <- names(L) == ""
9+
all.traces <- L[is.trace]
10+
no.data <- sapply(all.traces, function(tr) {
11+
is.null(tr[["x"]]) && is.null(tr[["y"]])
12+
})
13+
has.data <- all.traces[!no.data]
14+
expect_equal(length(has.data), n.traces)
15+
list(traces=has.data, kwargs=L$kwargs)
16+
}
17+
318
researchers <-
419
data.frame(country=c("Canada", "Canada", "Germany", "USA"),
520
name=c("Warren", "Andreanne", "Stefan", "Toby"),
@@ -10,75 +25,47 @@ gg <- ggplot(researchers, aes(country, papers, fill=field))
1025

1126
test_that("position_dodge is translated to barmode=group", {
1227
gg.dodge <- gg + geom_bar(stat="identity", position="dodge")
13-
L <- gg2list(gg.dodge)
14-
expect_equal(length(L), 3)
15-
trace.names <- sapply(L[1:2], "[[", "name")
28+
info <- expect_traces(gg.dodge, 2, "dodge")
29+
trs <- info$traces
30+
trace.names <- sapply(trs[1:2], "[[", "name")
1631
expect_true(all(c("Math", "Bio") %in% trace.names))
17-
expect_identical(L$kwargs$layout$barmode, "group")
32+
expect_identical(info$kwargs$layout$barmode, "group")
1833
# Check x values
19-
expect_identical(as.character(L[[1]]$x[1]), "Canada")
20-
expect_identical(as.character(L[[1]]$x[2]), "Germany")
21-
expect_identical(as.character(L[[2]]$x[1]), "Canada")
22-
expect_identical(as.character(L[[2]]$x[2]), "USA")
23-
24-
save_outputs(gg.dodge, "bar-dodge")
34+
expect_identical(as.character(trs[[1]]$x), c("Canada", "Germany"))
35+
expect_identical(as.character(trs[[2]]$x), c("Canada", "USA"))
2536
})
2637

2738
test_that("position_stack is translated to barmode=stack", {
2839
gg.stack <- gg + geom_bar(stat="identity", position="stack")
29-
L <- gg2list(gg.stack)
30-
expect_equal(length(L), 3)
31-
trace.names <- sapply(L[1:2], "[[", "name")
40+
info <- expect_traces(gg.stack, 2, "stack")
41+
trs <- info$traces
42+
trace.names <- sapply(trs[1:2], "[[", "name")
3243
expect_true(all(c("Math", "Bio") %in% trace.names))
33-
expect_identical(L$kwargs$layout$barmode, "stack")
34-
35-
save_outputs(gg.stack, "bar-stack")
44+
expect_identical(info$kwargs$layout$barmode, "stack")
3645
})
3746

38-
test_that("position_identity is translated to barmode=overlay", {
47+
test_that("position_identity is translated to barmode=stack", {
3948
gg.identity <- gg + geom_bar(stat="identity", position="identity")
40-
L <- gg2list(gg.identity)
41-
expect_equal(length(L), 3)
42-
trace.names <- sapply(L[1:2], "[[", "name")
49+
info <- expect_traces(gg.identity, 2, "identity")
50+
trs <- info$traces
51+
trace.names <- sapply(trs[1:2], "[[", "name")
4352
expect_true(all(c("Math", "Bio") %in% trace.names))
44-
expect_identical(L$kwargs$layout$barmode, "overlay")
45-
46-
save_outputs(gg.identity, "bar-identity")
53+
expect_identical(info$kwargs$layout$barmode, "stack")
4754
})
4855

4956
test_that("dates work well with bar charts", {
50-
5157
researchers$month <- c("2012-01-01", "2012-01-01", "2012-02-01", "2012-02-01")
5258
researchers$month <- as.Date(researchers$month)
53-
5459
gd <- ggplot(researchers, aes(month, papers, fill=field)) +
5560
geom_bar(stat="identity")
56-
57-
L <- gg2list(gd)
58-
59-
expect_equal(length(L), 3) # 2 traces + layout
60-
expect_identical(L$kwargs$layout$xaxis$type, "date")
61-
expect_identical(L[[1]]$x[1], "2012-01-01 00:00:00")
62-
expect_identical(L[[1]]$x[2], "2012-02-01 00:00:00")
63-
64-
save_outputs(gd, "bar-dates")
61+
info <- expect_traces(gd, 2, "dates")
62+
trs <- info$traces
63+
expect_identical(info$kwargs$layout$xaxis$type, "date")
64+
# plotly likes time in milliseconds
65+
t <- as.numeric(unique(researchers$month)) * 24 * 60 * 60 * 1000
66+
expect_identical(trs[[1]]$x, t)
6567
})
6668

67-
expect_traces <- function(gg, n.traces, name){
68-
stopifnot(is.ggplot(gg))
69-
stopifnot(is.numeric(n.traces))
70-
save_outputs(gg, paste0("bar-", name))
71-
L <- gg2list(gg)
72-
is.trace <- names(L) == ""
73-
all.traces <- L[is.trace]
74-
no.data <- sapply(all.traces, function(tr) {
75-
is.null(tr[["x"]]) && is.null(tr[["y"]])
76-
})
77-
has.data <- all.traces[!no.data]
78-
expect_equal(length(has.data), n.traces)
79-
list(traces=has.data, kwargs=L$kwargs)
80-
}
81-
8269
## http://www.cookbook-r.com/Graphs/Bar_and_line_graphs_%28ggplot2%29/
8370
df <- data.frame(time = factor(c("Lunch","Dinner"), levels=c("Lunch","Dinner")),
8471
total_bill = c(14.89, 17.23))
@@ -182,3 +169,25 @@ test_that("guides(fill=FALSE) does not affect colour legend", {
182169
expect_true(info$kwargs$layout$showlegend)
183170
})
184171

172+
173+
base <- ggplot(mtcars, aes(factor(vs), fill=factor(cyl)))
174+
175+
test_that("geom_bar() stacks counts", {
176+
info <- expect_traces(base + geom_bar(), 3, "position-stack")
177+
expect_identical(info$kwargs$layout$barmode, "stack")
178+
trs <- info$traces
179+
# sum of y values for each trace
180+
test <- as.numeric(sort(sapply(trs, function(x) sum(x$y))))
181+
true <- as.numeric(sort(table(mtcars$cyl)))
182+
expect_identical(test, true)
183+
})
184+
185+
test_that("geom_bar(position = 'fill') stacks proportions", {
186+
info <- expect_traces(base + geom_bar(position = "fill"), 3, "position-fill")
187+
expect_identical(info$kwargs$layout$barmode, "stack")
188+
trs <- info$traces
189+
# sum of y-values *conditioned* on a x-value
190+
prop <- sum(sapply(sapply(trs, "[[", "y"), "[", 1))
191+
expect_identical(prop, 1)
192+
})
193+

0 commit comments

Comments
 (0)