问题
I'm trying to create a boxplot which shows only the significant p values, within the groups for each bar in a box plot. For example here it would compare I1 and SI2 for the "fair", "good", "very good" etc
I've tried using the following code to achieve the above plot
library(ggplot2)
library(dplyr)
data("diamonds")
labeldat <- diamonds %>%
group_by(cut, clarity) %>%
dplyr::summarise(labels = paste(n(), n_distinct(color), sep = "\n"))
Comparisons = list(c("I1","SI2"),c("I1","SI1"),c("I1","VS2"),c("I1","VS1"),c("I1","VVS2"),c("I1","VVS1"),c("I1","IF"),
c("SI2","SI1"),c("SI2","VS2"),c("SI2","VS1"),c("SI2","VVS2"),c("SI2","VVS1"),c("SI2","IF"),
c("SI1","VS2"),c("SI1","VS1"),c("SI1","VVS2"),c("SI1","VVS1"),c("SI1","IF"),
c("VS2","VS1"),c("VS2","VVS2"),c("VS2","VVS1"),c("VS2","IF"),
c("VS1","VVS2"),c("VS1","VVS1"),c("VS1","IF"),
c("VVS2","VVS1"),c("VVS2","IF"),
c("VVS1","IF"))
ggplot(diamonds, aes(x=cut, y=price)) +
geom_boxplot(aes(fill=clarity), position = position_dodge2(width=0.75)) +
theme_bw() +
geom_text(data = labeldat, aes(x = cut, y = -250, label = labels), hjust = 0.5, position = position_dodge2(width = .75))+
stat_compare_means(aes(group=clarity), label = "p.signif", method="t.test", comparisons = Comparisons)
Unfortunately using the comparisons argument seems through a computation error which I can't work out how to solve:
Warning message:
Computation failed in stat_signif()
:
missing value where TRUE/FALSE needed
I have tried running this without the comparisons, but it seems to just give me an overall score
回答1:
I'll preface this by saying that in this example there are too many comparisons being made so the result is cluttered and to fit the extra info the y-axis is greatly expanded and the boxplots are squashed. But for the sake of providing an answer and imagining you might have a dataset that has fewer comparisons, the issue is that stat_compare_means()
compares groups on the x-axis. To compare by clarity
, you need to put it on the x-axis and then facet by cut
.
library(ggplot2)
library(ggpubr)
library(dplyr)
labeldat <- diamonds %>%
group_by(cut, clarity) %>%
dplyr::summarise(labels = paste(n(), n_distinct(color), sep = "\n"))
ggplot(diamonds, aes(x=clarity, y=price)) +
geom_boxplot(aes(fill=clarity), position = position_dodge2(width=0.75)) +
stat_compare_means(aes(group=clarity), label = "p.signif", method="t.test", comparisons = combn(1:8, 2, FUN = list)) +
facet_grid(cols = vars(cut)) +
theme_bw() +
geom_text(data = labeldat, aes(x = clarity, y = -2000, label = labels), hjust = 0.5, position = position_dodge2(width = .75)) +
theme(axis.text.x = element_blank())
回答2:
You could use ggsignif for that. It allows for manual annotation, so you could calculate p-values separately, and create an annotation data.frame with filtered comparisons. Example:
library(ggplot2)
library(ggsignif)
library(dplyr)
library(data.table)
dm <- split(diamonds, diamonds$cut)
getp <- function(y, pval=.05){
a <- stats::pairwise.wilcox.test(x=y$price, g=y$clarity,
p.adjust.method="none", paired=FALSE)
return(as.data.table(as.table(a$p.value))[!is.na(N) & N < pval])
}
dmp <- data.table::rbindlist(lapply(dm, getp), idcol = "cut")
data.table::setnames(dmp, c("cut", "start", "end", "label"))
dmp$label <- formatC(
signif(dmp$label, digits = 3),
digits = 3,
format = "g",
flag = "#"
)
dmp[, y := (0:(.N-1)) * (2E4/.N)+2e4, by=cut]
data.table::setDF(dmp)
ggplot(diamonds, aes(x=clarity, y=price)) +
geom_boxplot(aes(fill=clarity), position = position_dodge2(width=0.75)) +
facet_wrap(~ cut)+
ggsignif::geom_signif(data=dmp,
aes(xmin=start, xmax=end, annotations=label, y_position=y),
textsize = 2, vjust = -0.2,
manual=TRUE) +
ylim(NA, 4E4) +
theme_bw() +
theme(axis.text.x = element_blank())
来源:https://stackoverflow.com/questions/60198635/add-p-values-to-comparisons-within-groups-boxplot