Group by ID, each element of the new table is a vector

前端未结

关注

 3  1856

I have a table like this

data.table(ID = c(1,2,3,4,5,6), 
         R = c(\"s\",\"s\",\"n\",\"n\",\"s\",\"s\"), 
         S = c(\"a\",\"a\",\"a\",\"b\",\"b\",


                      
              相关标签:


      
      
        
          3条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  深忆病人        
                
              
                            
                2020-12-21 16:36
              
            
            
                                                                       
You could try:

library(dplyr)
library(tidyr)

df %>% 
  group_by(R, S) %>% 
  summarise(i = toString(ID)) %>% 
  spread(S, i) 


Which gives:

#Source: local data table [2 x 3]
#Groups: 
#
#  R    a    b
#1 n    3    4
#2 s 1, 2 5, 6


Note: This will store the result in a string. If you want a more convenient format to access the elements, you could store in a list:

df2 <- df %>% 
  group_by(R, S) %>% 
  summarise(i = list(ID)) %>% 
  spread(S, i)  


Which gives:

#Source: local data table [2 x 3]
#Groups: 
#
#  R        a        b
#1 n <dbl[1]> <dbl[1]>
#2 s <dbl[2]> <dbl[2]>


You can then access the elements by doing:

> df2$a[[2]][2]
#[1] "2"

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  心在旅途        
                
              
                            
                2020-12-21 16:38
              
            
            
                                                                       
You can use dcast from reshape2 with the appropriate aggregating function:

library(functional)
library(reshape2)

dcast(df, R~S, value.var='ID', fun.aggregate=Curry(paste0, collapse=','))
#  R   a   b
#1 n   3   4
#2 s 1,2 5,6


Or even short as @akrun underlined:

dcast(df, R~S, value.var='ID', toString)

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  小蘑菇        
                
              
                            
                2020-12-21 17:00
              
            
            
                                                                       
Here's an alternative that uses plain old data.table syntax:

DT[,lapply(split(ID,S),list),by=R]

# or...

DT[,lapply(split(ID,S),toString),by=R]

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复