How to get the cumulative sum by group in R?

前端未结

关注

 2  363

Suppose I have a dataframe such that:

df<-data.frame(id=1:8,group=c(1,0,0,1,1,0,1,0),rep=c(rep(\"d1\",4),rep(\"d2\",4)),value=rbinom(8,1,0.6))
df
  id gro


                      
              相关标签:


      
      
        
          2条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  心在旅途        
                
              
                            
                2020-12-03 05:48
              
            
            
                                                                       
I'd recommend working with the
tidy form of the data.
Here's an approach with dplyr, but it would be trivial to translate to
data.table or base R.

First I'll create the dataset, setting the random seed to make the
example reproducible:

set.seed(1014)
df <- data.frame(
  id = 1:8,
  group = c(1, 0, 0, 1, 1, 0, 1, 0),
  rep = c(rep("d1", 4), rep("d2", 4)),
  value = rbinom(8, 1, 0.6)
)
df

%>   id group rep value
%> 1  1     1  d1     1
%> 2  2     0  d1     0
%> 3  3     0  d1     0
%> 4  4     1  d1     1
%> 5  5     1  d2     1
%> 6  6     0  d2     1
%> 7  7     1  d2     1
%> 8  8     0  d2     1


Next, using dplyr, I'll first collapse to individual rows by group, and
then compute the cumulative sum:

library(dplyr)

df <- df %>% 
  group_by(group, rep) %>%
  summarise(value = sum(value)) %>%
  mutate(csum = cumsum(value))
df

%> Source: local data frame [4 x 4]
%> Groups: group
%> 
%>   group rep value csum
%> 1     0  d1     0    0
%> 2     0  d2     2    2
%> 3     1  d1     2    2
%> 4     1  d2     2    4


For most cases, you're best of leaving the data in this form (it will be
easier to work for), but you can reshape if you need to:

library(reshape2)

dcast(df, group ~ rep, value.var = "csum")

%>   group d1 d2
%> 1     0  0  2
%> 2     1  2  4

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  一向        
                
              
                            
                2020-12-03 05:49
              
            
            
                                                                       
library(data.table)

# convert to data.table in place
setDT(df)

# dcast and do individual sums
dt.cast = dcast.data.table(df, group ~ rep, value.var = 'value',
                           fun.aggregate = sum)
dt.cast
#   group d1 d2
#1:     0  0  1
#2:     1  1  2

# cumsum
dt.cast[, as.list(cumsum(unlist(.SD))), by = group]
#   group d1 d2
#1:     0  0  1
#2:     1  1  3

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复