Label ggplot with group names and their equation, possibly with ggpmisc?

情到浓时终转凉″ 提交于 2020-06-28 05:43:11

问题


I would like to label my plot, possibly using the equation method from ggpmisc to give an informative label that links to the colour and equation (then I can remove the legend altogether). For example, in the plot below, I would ideally have the factor levels of 4, 6 and 8 in the equation LHS.

library(tidyverse)
library(ggpmisc)

df_mtcars <- mtcars %>% mutate(factor_cyl = as.factor(cyl))

p <- ggplot(df_mtcars, aes(x = wt, y = mpg, group = factor_cyl, colour= factor_cyl))+
  geom_smooth(method="lm")+
  geom_point()+
  stat_poly_eq(formula = my_formula,
               label.x = "centre",
               #eq.with.lhs = paste0(expression(y), "~`=`~"),
               eq.with.lhs = paste0("Group~factor~level~here", "~Cylinders:", "~italic(hat(y))~`=`~"),
               aes(label = paste(..eq.label.., sep = "~~~")), 
               parse = TRUE)
p

There is a workaround by modifying the plot afterwards using the technique described here, but surely there is something simpler?

p <- ggplot(df_mtcars, aes(x = wt, y = mpg, group = factor_cyl, colour= factor_cyl))+
  geom_smooth(method="lm")+
  geom_point()+
  stat_poly_eq(formula = my_formula,
               label.x = "centre",
               eq.with.lhs = paste0(expression(y), "~`=`~"),
               #eq.with.lhs = paste0("Group~factor~level~here", "~Cylinders:", "~italic(hat(y))~`=`~"),
               aes(label = paste(..eq.label.., sep = "~~~")), 
               parse = TRUE)
p

# Modification of equation LHS technique from:
# https://stackoverflow.com/questions/56376072/convert-gtable-into-ggplot-in-r-ggplot2
temp <- ggplot_build(p)
temp$data[[3]]$label <- temp$data[[3]]$label %>% 
  fct_relabel(~ str_replace(.x, "y", paste0(c("8","6","4"),"~cylinder:", "~~italic(hat(y))" )))
class(temp)

#convert back to ggplot object
#https://stackoverflow.com/questions/56376072/convert-gtable-into-ggplot-in-r-ggplot2
#install.packages("ggplotify")
library("ggplotify")
q <- as.ggplot(ggplot_gtable(temp))
class(q)
q


回答1:


This first example puts the label to the right of the equation, and is partly manual. On the other hand it is very simple to code. Why this works is because group is always present in the data as seen by layer functions (statistics and geoms).

library(tidyverse)
library(ggpmisc)

df_mtcars <- mtcars %>% mutate(factor_cyl = as.factor(cyl))

my_formula <- y ~ x

p <- ggplot(df_mtcars, aes(x = wt, y = mpg, group = factor_cyl, colour = factor_cyl)) +
  geom_smooth(method="lm")+
  geom_point()+
  stat_poly_eq(formula = my_formula,
               label.x = "centre",
               eq.with.lhs = "italic(hat(y))~`=`~",
               aes(label = paste(stat(eq.label), "*\", \"*", 
                                 c("4", "6", "8")[stat(group)], 
                                 "~cylinders.",  sep = "")),
               label.x.npc = "right",
               parse = TRUE) +
  scale_colour_discrete(guide = FALSE)
p

In fact with a little bit of additional juggling one can achieve almost an answer to the question. We need to add the lhs by pasting it explicitly in aes() so that we can add also paste text to its left based on a computed variable.

library(tidyverse)
library(ggpmisc)

df_mtcars <- mtcars %>% mutate(factor_cyl = as.factor(cyl))

my_formula <- y ~ x

p <- ggplot(df_mtcars, aes(x = wt, y = mpg, group = factor_cyl, colour = factor_cyl)) +
  geom_smooth(method="lm")+
  geom_point()+
  stat_poly_eq(formula = my_formula,
               label.x = "centre",
               eq.with.lhs = "",
               aes(label = paste("bold(\"", c("4", "6", "8")[stat(group)], 
                                 " cylinders:  \")*",
                                 "italic(hat(y))~`=`~",
                                 stat(eq.label),
                                 sep = "")),
               label.x.npc = "right",
               parse = TRUE) +
  scale_colour_discrete(guide = FALSE)
p 




回答2:


What about a manual solution where you can add your equation as geom_text ?

Pros: Highly customization / Cons: Need to be manually edited based on your equation

Here, using your example and the linear regression:

library(tidyverse)

df_label <- df_mtcars %>% group_by(factor_cyl) %>%
  summarise(Inter = lm(mpg~wt)$coefficients[1],
            Coeff = lm(mpg~wt)$coefficients[2]) %>% ungroup() %>%
  mutate(ypos = max(df_mtcars$mpg)*(1-0.05*row_number())) %>%
  mutate(Label2 = paste(factor_cyl,"~Cylinders:~", "italic(y)==",round(Inter,2),ifelse(Coeff <0,"-","+"),round(abs(Coeff),2),"~italic(x)",sep =""))

# A tibble: 3 x 5
  factor_cyl Inter Coeff  ypos Label2                                      
  <fct>      <dbl> <dbl> <dbl> <chr>                                       
1 4           39.6 -5.65  32.2 4~Cylinders:~italic(y)==39.57-5.65~italic(x)
2 6           28.4 -2.78  30.5 6~Cylinders:~italic(y)==28.41-2.78~italic(x)
3 8           23.9 -2.19  28.8 8~Cylinders:~italic(y)==23.87-2.19~italic(x)

Now, you can pass it in ggplot2:

ggplot(df_mtcars,aes(x = wt, y = mpg, group = factor_cyl, colour= factor_cyl))+
  geom_smooth(method="lm")+
  geom_point()+
  geom_text(data = df_label,
            aes(x = 2.5, y = ypos, 
                label = Label2, color = factor_cyl), 
            hjust = 0, show.legend = FALSE, parse = TRUE)




回答3:


An alternative to labelling with the equation is to label with the fitted line. Here is an approach adapted from an answer on a related question here

#example of loess for multiple models
#https://stackoverflow.com/a/55127487/4927395
library(tidyverse)
library(ggpmisc)

df_mtcars <- mtcars %>% mutate(cyl = as.factor(cyl))

models <- df_mtcars %>%
  tidyr::nest(-cyl) %>%
  dplyr::mutate(
    # Perform loess calculation on each CpG group
    m = purrr::map(data, lm,
                   formula = mpg ~ wt),
    # Retrieve the fitted values from each model
    fitted = purrr::map(m, `[[`, "fitted.values")
  )

# Apply fitted y's as a new column
results <- models %>%
  dplyr::select(-m) %>%
  tidyr::unnest()

#find final x values for each group
my_last_points <- results %>% group_by(cyl) %>% summarise(wt = max(wt, na.rm=TRUE))

#Join dataframe of predictions to group labels
my_last_points$pred_y <- left_join(my_last_points, results)

# Plot with loess line for each group
ggplot(results, aes(x = wt, y = mpg, group = cyl, colour = cyl)) +
  geom_point(size=1) +
  geom_smooth(method="lm",se=FALSE)+
  geom_text(data = my_last_points, aes(x=wt+0.4, y=pred_y$fitted, label = paste0(cyl," Cylinders")))+
  theme(legend.position = "none")+  
  stat_poly_eq(formula = "y~x",
             label.x = "centre",
             eq.with.lhs = paste0(expression(y), "~`=`~"),
             aes(label = paste(..eq.label.., sep = "~~~")), 
             parse = TRUE)



来源:https://stackoverflow.com/questions/61357383/label-ggplot-with-group-names-and-their-equation-possibly-with-ggpmisc

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!