This question is related to Create custom geom to compute summary statistics and display them *outside* the plotting region (NOTE: All functions have been simplified; no error
My solution might be a little simple but it works well.
Given an example with faceting by am I start by creating labels
using paste
and \n
.
mtcars2 <- mtcars %>%
group_by(cyl, am) %>% mutate(n = n()) %>%
mutate(label = paste0(cyl,'\nN = ',n))
I then use these labels instead of cyl in the ggplot code
ggplot(mtcars2,
aes(x = factor(label), y = mpg, color = factor(label))) +
geom_point() +
xlab('cyl') +
facet_wrap(~am, scales = 'free_x') +
theme(legend.position = "none")
To produce something like the figure below.
I have updated the EnvStats
package to include a stat
called stat_n_text
which will add the sample size (the number of unique y-values) below each unique x-value. See the help file for stat_n_text
for more information and a list of examples. Below is a simple example:
library(ggplot2)
library(EnvStats)
p <- ggplot(mtcars,
aes(x = factor(cyl), y = mpg, color = factor(cyl))) +
theme(legend.position = "none")
p + geom_point() +
stat_n_text() +
labs(x = "Number of Cylinders", y = "Miles per Gallon")
You can print the counts below the x-axis labels using geom_text
if you turn off clipping, but you'll probably have to tweak the placement. I've included a "nudge" parameter for that in the code below. Also, the method below is intended for cases where all the facets (if any) are column facets.
I realize you ultimately want code that will work inside a new geom, but perhaps the examples below can be adapted for use in a geom.
library(ggplot2)
library(dplyr)
pgg = function(dat, x, y, facet=NULL, nudge=0.17) {
# Convert x-variable to a factor
dat[,x] = as.factor(dat[,x])
# Plot points
p = ggplot(dat, aes_string(x, y)) +
geom_point(position=position_jitter(w=0.3, h=0)) + theme_bw()
# Summarise data to get counts by x-variable and (if present) facet variables
dots = lapply(c(facet, x), as.symbol)
nn = dat %>% group_by_(.dots=dots) %>% tally
# If there are facets, add them to the plot
if (!is.null(facet)) {
p = p + facet_grid(paste("~", paste(facet, collapse="+")))
}
# Add counts as text labels
p = p + geom_text(data=nn, aes(label=paste0("N = ", nn$n)),
y=min(dat[,y]) - nudge*1.05*diff(range(dat[,y])),
colour="grey20", size=3.5) +
theme(axis.title.x=element_text(margin=unit(c(1.5,0,0,0),"lines")))
# Turn off clipping and return plot
p <- ggplot_gtable(ggplot_build(p))
p$layout$clip[p$layout$name=="panel"] <- "off"
grid.draw(p)
}
pgg(mtcars, "cyl", "mpg")
pgg(mtcars, "cyl", "mpg", facet=c("am","vs"))
Another, potentially more flexible, option is to add the counts to the bottom of the plot panel. For example:
pgg = function(dat, x, y, facet_r=NULL, facet_c=NULL) {
# Convert x-variable to a factor
dat[,x] = as.factor(dat[,x])
# Plot points
p = ggplot(dat, aes_string(x, y)) +
geom_point(position=position_jitter(w=0.3, h=0)) + theme_bw()
# Summarise data to get counts by x-variable and (if present) facet variables
dots = lapply(c(facet_r, facet_c, x), as.symbol)
nn = dat %>% group_by_(.dots=dots) %>% tally
# If there are facets, add them to the plot
if (!is.null(facet_r) | !is.null(facet_c)) {
facets = paste(ifelse(is.null(facet_r),".",facet_r), " ~ " ,
ifelse(is.null(facet_c),".",facet_c))
p = p + facet_grid(facets)
}
# Add counts as text labels
p + geom_text(data=nn, aes(label=paste0("N = ", nn$n)),
y=min(dat[,y]) - 0.15*min(dat[,y]), colour="grey20", size=3) +
scale_y_continuous(limits=range(dat[,y]) + c(-0.1*min(dat[,y]), 0.01*max(dat[,y])))
}
pgg(mtcars, "cyl", "mpg")
pgg(mtcars, "cyl", "mpg", facet_c="am")
pgg(mtcars, "cyl", "mpg", facet_c="am", facet_r="vs")