Summary count by multiple groups with condition in dplyr

前端 未结 2 1729
耶瑟儿~
耶瑟儿~ 2021-01-22 14:42

I have a dataframe that looks like this:

data <- data.frame(a=c(1,1,0,0,0,0,1,1,1, 0), 
               b=c(\"x\",\"x\",\"x\",\"x\",\"x\",\"y\",\"y\",\"y\",\"z         


        
2条回答
  •  一个人的身影
    2021-01-22 15:10

    library(dplyr)
    
    data <- data_frame(
      a=c(1,1,0,0,0,0,1,1,1, 0), 
                       b=c("x","x","x","x","x","y","y","y","z","z"),
                       c=c(2, 1, 2, 3, 4, NA, 4, 2, 1, 1), 
                       d= c("s", "m", "l", "l", "l", "m", "m", "s", "s", "m"))
    
    data
    #> # A tibble: 10 x 4
    #>        a     b     c     d
    #>       
    #>  1     1     x     2     s
    #>  2     1     x     1     m
    #>  3     0     x     2     l
    #>  4     0     x     3     l
    #>  5     0     x     4     l
    #>  6     0     y    NA     m
    #>  7     1     y     4     m
    #>  8     1     y     2     s
    #>  9     1     z     1     s
    #> 10     0     z     1     m
    
    data %>% 
      group_by(d, b) %>% 
      mutate(e = if_else(a == 1, c, 0)) %>% 
      summarise(e = sum(e, na.rm = TRUE))
    
    #> Source: local data frame [7 x 3]
    #> Groups: d [?]
    #> 
    #> # A tibble: 7 x 3
    #>       d     b     e
    #>     
    #> 1     l     x     0
    #> 2     m     x     1
    #> 3     m     y     4
    #> 4     m     z     0
    #> 5     s     x     2
    #> 6     s     y     2
    #> 7     s     z     1
    

    If you like, you can also just do this all in the summarise call:

    summarise(e = if_else(a == 1, c, 0) %>% sum(na.rm = TRUE))
    

提交回复
热议问题