问题
I have followed the answer from this question: Tukey test results on geom_boxplot with facet_grid
It is great, but what I would like is to compare the facets between them as well. In other words, letter order all of the results first then divide it into facets (I have both horizontal and vertical facets). How can I do this? Also, how can I reorder the letters to start from "a" in the first variable in the first facet, and then "b" the second variable and so on? I tried the following, and it didn't work as I want it for ordering.
TUKEY <- TukeyHSD(ANOVA, ordered = TRUE)
Here is a reproducible code (the code to generate the plots was taken from the link above) and the data is taken from this link (http://sape.inf.usi.ch/quick-reference/ggplot2/facet)
d=expand.grid(obs=0:10, benchmark=c('antlr', 'bloat', 'chart'), gc=c('CopyMS', 'GenCopy', 'GenImmix'), opt=c('on', 'off', 'valid'), heapSize=seq(from=1.5, to=4, by=0.5))
d$time = rexp(nrow(d), 0.01)+1000
d$time = d$time + abs(d$heapSize-3)*100
d$time[d$opt=='on'] = d$time[d$opt=='on']-200
d$time[d$opt=='on' & d$benchmark=='bloat'] = d$time[d$opt=='on' & d$benchmark=='bloat'] + 190
generate_label_df <- function(TUKEY, variable){
# Extract labels and factor levels from Tukey post-hoc
Tukey.levels <- variable[,4]
Tukey.labels <- data.frame(multcompLetters(Tukey.levels)['Letters'])
#I need to put the labels in the same order as in the boxplot :
Tukey.labels$treatment=rownames(Tukey.labels)
Tukey.labels=Tukey.labels[order(Tukey.labels$treatment) , ]
return(Tukey.labels)
}
TUKEYplot <- function(df){
p<-ggplot(data=df)+
aes(x = opt, y = time, colour = opt) +
geom_boxplot() +
facet_grid(gc~benchmark) +
theme_linedraw() +
theme(axis.text.x=element_text(angle=45, hjust=1)) +
ylim(min(df$time),max(df$time)+0.05) +
labs(x = "type", y= "time", color = "state") +
theme(strip.background = element_rect(colour = "black", fill = "white")) +
theme(strip.text = element_text(colour = "black", size=12)) +
theme(axis.text=element_text(size=12)) +
theme(legend.text=element_text(size=12)) +
theme(legend.title=element_text(size=12,face="bold")) +
theme(axis.title=element_text(size=14,face="bold")) +
scale_color_npg()
for (facetk2 in as.character(unique(df$gc))) {
for (facetk in as.character(unique(df$benchmark))) {
subdf <- subset(df, df$benchmark==facetk & df$gc==facetk2)
model=lm(time ~ opt, data=subdf)
ANOVA=aov(model)
# Tukey test to study each pair of treatment :
TUKEY <- TukeyHSD(ANOVA)
print(TUKEY)
labels <- generate_label_df(TUKEY , TUKEY$`opt`)
names(labels) <- c('Letters', 'opt')
yvalue <- aggregate(.~opt, data=subdf, quantile, probs=.75)
final <- merge(labels, yvalue)
final$benchmark <- facetk
final$gc <- facetk2
p <- p + geom_text(data = final, aes(x=opt, y=time, label=Letters),
vjust=-1.2, hjust=-.5, show.legend = FALSE, size=5)
}
}
return (p)
}
p1<-TUKEYplot(d)
p1
Update: Visual aid of what I would like to do:
Original plot:
Desired plot partially:
回答1:
I finally figured out how to do it, so I am posting the answer! Basically, taking the calculations of Tukey out of the loop, using ANOVA on the interaction and applying Tukey after allowed what I wanted to do. The labels are then separated into columns (make sure your data does not contain ":", you can use revalue if it does), then it is looped over the levels of the data.
TUKEYplot <- function(df){
p<-ggplot(data=df)+
aes(x = opt, y = time, colour = opt) +
geom_boxplot() +
facet_grid(gc~benchmark) +
theme_linedraw() +
theme(axis.text.x=element_text(angle=45, hjust=1)) +
ylim(min(df$time),max(df$time)+0.05) +
labs(x = "type", y= "time", color = "state") +
theme(strip.background = element_rect(colour = "black", fill = "white")) +
theme(strip.text = element_text(colour = "black", size=12)) +
theme(axis.text=element_text(size=12)) +
theme(legend.text=element_text(size=12)) +
theme(legend.title=element_text(size=12,face="bold")) +
theme(axis.title=element_text(size=14,face="bold")) +
scale_color_npg()
model=lm(time ~ gc*benchmark*opt, data=df)
ANOVA=aov(model)
# Tukey test to study each pair of treatment :
TUKEY <- TukeyHSD(ANOVA)
all_labels <- generate_label_df(TUKEY , TUKEY$`gc:benchmark:opt`)
sep_labels<- all_labels %>% separate(col=treatment, into= c("gc", "benchmark", "opt"), sep=":")
for (facetk2 in as.character(unique(df$gc))) {
for (facetk in as.character(unique(df$benchmark))) {
subdf <- subset(df, df$benchmark==facetk & df$gc==facetk2)
labels <- subset(sep_labels, sep_labels$benchmark==facetk & sep_labels$gc==facetk2)
labels <- subset(labels, select = -c(gc,benchmark))
names(labels) <- c('Letters', 'opt')
yvalue <- aggregate(.~opt, data=subdf, quantile, probs=.75)
final <- merge(labels, yvalue)
final$benchmark <- facetk
final$gc <- facetk2
p <- p + geom_text(data = final, aes(x=opt, y=time, label=Letters),
vjust=-1.2, hjust=-.5, show.legend = FALSE, size=5)
}
}
return (p)
}
Resulting image: (could not embed the image, because I don't have enough reputation..)
Result
来源:https://stackoverflow.com/questions/56136421/consistent-lettering-across-facets-for-tukey-letter-plot-on-ggplot