How to show whiskers and points on violin plots?

前端 未结 1 868
灰色年华
灰色年华 2020-12-21 04:59

I have a dataframe df with the following data. I want to plot the logCPM expression of the gene between two groups A and B.

Samples         


        
相关标签:
1条回答
  • 2020-12-21 05:21

    May I suggest using elephant/raincloud or hybrid boxplot plots instead?

    From the blog post linked above:

    Violin plots mirror the data density in a totally uninteresting/uninformative way, simply repeating the same exact information for the sake of visual aesthetic.

    In raincloud plot, we get basically everything we need: eyeballed statistical inference, assessment of data distributions (useful to check assumptions), and the raw data itself showing outliers and underlying patterns.

    library(tidyverse)
    library(ggrepel)
    
    df <- read_table2(txt)
    
    # create new variable for coloring & labeling `Sample10` pts
    df <- df %>% 
      mutate(colSel = ifelse(Samples == 'Sample10', '#10', 'dummy'),
             labSel = ifelse(Samples == 'Sample10', '#10', ''))
    
    # create summary statistics
    sumld <- df %>%
      group_by(Type) %>%
      summarise(
        mean     = mean(GeneA, na.rm = TRUE),
        median   = median(GeneA, na.rm = TRUE),
        sd       = sd(GeneA, na.rm = TRUE),
        N        = n(),
        ci       = 1.96 * sd/sqrt(N),
        lower95  = mean - ci,
        upper95  = mean + ci,
        lower    = mean - sd,
        upper    = mean + sd) %>% 
      ungroup()
    sumld
    #> # A tibble: 2 x 10
    #>   Type   mean median    sd     N    ci lower95 upper95 lower upper
    #>   <chr> <dbl>  <dbl> <dbl> <int> <dbl>   <dbl>   <dbl> <dbl> <dbl>
    #> 1 A      14.7   14.5  3.54    17  1.68    13.0    16.3 11.1   18.2
    #> 2 B      12.4   12.9  2.85    21  1.22    11.2    13.6  9.54  15.2
    

    raincloud plot

    ## get geom_flat_violin function
    ## https://gist.github.com/benmarwick/b7dc863d53e0eabc272f4aad909773d2
    ## mirror: https://pastebin.com/J9AzSxtF 
    devtools::source_gist("2a1bb0133ff568cbe28d", filename = "geom_flat_violin.R")
    
    pos <- position_jitter(width = 0.15, seed = 1)
    
    p0 <- ggplot(data = df, aes(x = Type, y = GeneA, fill = Type)) +
      geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .8) +
      guides(fill = FALSE) +
      guides(color = FALSE) +
      scale_color_brewer(palette = "Dark2") +
      scale_fill_brewer(palette = "Dark2") +
      theme_classic()
    
    # raincloud plot
    p1 <- p0 + 
      geom_point(aes(color = Type), 
                 position = pos, size = 3, alpha = 0.8) +
      geom_boxplot(width = .1, show.legend = FALSE, outlier.shape = NA, alpha = 0.5)
    p1
    

    # coloring Sample10
    p0 +
      geom_point(aes(color = colSel), 
                 position = pos, size = 3, alpha = 0.8) +
      geom_text_repel(aes(label = labSel),
                      point.padding = 0.25,
                      direction = 'y',
                      position = pos) +
      geom_boxplot(width = .1, show.legend = FALSE, outlier.shape = NA, alpha = 0.5) +
      scale_color_manual(values = c('dummy' = 'grey50', '#10' = 'red')) 
    

    # errorbar instead of boxplot
    p0 + 
      geom_point(aes(color = colSel), 
                 position = pos, size = 3, alpha = 0.8) +
      geom_point(data = sumld, aes(x = Type, y = mean), 
                 position = position_nudge(x = 0.3), size = 3.5) +
      geom_text_repel(aes(label = labSel),
                      point.padding = 0.25,
                      direction = 'y',
                      position = pos) +
      geom_errorbar(data = sumld, aes(ymin = lower95, ymax = upper95, y = mean), 
                    position = position_nudge(x = 0.3), width = 0) +
      guides(fill = FALSE) +
      guides(color = FALSE) +
      scale_color_manual(values = c('dummy' = 'grey50', '#10' = 'red')) +
      scale_fill_brewer(palette = "Dark2") +
      theme_classic()
    

    hybrid boxplot using geom_boxjitter() from the ggpol package

    ## https://stackoverflow.com/a/49338481/ 
    library(ggpol)
    
    half_box <- ggplot(df) + geom_boxjitter(aes(x = Type, y = GeneA, 
                                                fill = Type, color = Type),
                                            jitter.shape = 21, jitter.color = NA, 
                                            jitter.height = 0, jitter.width = 0.04,
                                            outlier.color = NA, errorbar.draw = TRUE) +
      scale_color_brewer(palette = "Dark2") +
      scale_fill_brewer(palette = "Dark2") +
      theme_classic()
    half_box
    

    Bonus: you can also replace geom_point() with geom_quasirandom() from the ggbeeswarm package. Here is one example.

    .
    .
    .
    Created on 2018-10-03 by the reprex package (v0.2.1.9000)

    0 讨论(0)
提交回复
热议问题