select maximum row value by group

前端未结

关注

 5  2093

I\'ve been trying to do this with my data by looking at other posts, but I keep getting an error. My data new looks like this:

id  year    name


                      
              相关标签:


      
      
        
          5条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  执念已碎        
                
              
                            
                2021-01-20 07:56
              
            
            
                                                                       
You can do this with duplicated

# your data
 df <- read.table(text="id  year    name    gdp
1   1980    Jamie   45
1   1981    Jamie   60
1   1982    Jamie   70
2   1990    Kate    40
2   1991    Kate    25
2   1992    Kate    67
3   1994    Joe     35
3   1995    Joe     78
3   1996    Joe     90" , header=TRUE)

# Sort by id and year (latest year is last for each id)
df <- df[order(df$id , df$year), ]

# Select the last row by id
df <- df[!duplicated(df$id, fromLast=TRUE), ]

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  死守一世寂寞        
                
              
                            
                2021-01-20 08:00
              
            
            
                                                                       
Your ddply effort looks good to me, but you referenced the original dataset in the callback function.

ddply(new,~id,function(x){x[which.max(new$year),]})
# should be
ddply(new,.(id),function(x){x[which.max(x$year),]})

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  终归单人心        
                
              
                            
                2021-01-20 08:01
              
            
            
                                                                       
Another option that scales well for large tables is using data.table.

DT <- read.table(text = "id  year    name    gdp
                          1   1980    Jamie   45
                          1   1981    Jamie   60
                          1   1982    Jamie   70
                          2   1990    Kate    40
                          2   1991    Kate    25
                          2   1992    Kate    67
                          3   1994    Joe     35
                          3   1995    Joe     78
                          3   1996    Joe     90",
                 header = TRUE)

require("data.table")
DT <- as.data.table(DT)

setkey(DT,id,year)
res = DT[,j=list(year=year[which.max(gdp)]),by=id]
res

setkey(res,id,year)
DT[res]
# id year  name gdp
# 1:  1 1982 Jamie  70
# 2:  2 1992  Kate  67
# 3:  3 1996   Joe  90

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  醉梦人生        
                
              
                            
                2021-01-20 08:08
              
            
            
                                                                       
Just use split:

df <- do.call(rbind, lapply(split(df, df$id),
  function(subdf) subdf[which.max(subdf$year)[1], ]))


For example,

df <- data.frame(id = rep(1:10, each = 3), year = round(runif(30,0,10)) + 1980, gdp = round(runif(30, 40, 70)))
print(head(df))
#   id year gdp
# 1  1 1990  49
# 2  1 1981  47
# 3  1 1987  69
# 4  2 1985  57
# 5  2 1989  41
# 6  2 1988  54

df <- do.call(rbind, lapply(split(df, df$id), function(subdf) subdf[which.max(subdf$year)[1], ]))
print(head(df))
#    id year gdp
# 1   1 1990  49
# 2   2 1989  41
# 3   3 1989  55
# 4   4 1988  62
# 5   5 1989  48
# 6   6 1990  41

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  后悔当初        
                
              
                            
                2021-01-20 08:21
              
            
            
                                                                       
ave works here yet again, and will account for a circumstance with multiple rows for the maximum year.

new[with(new, year == ave(year,id,FUN=max) ),]

#  id year  name gdp
#3  1 1982 Jamie  70
#6  2 1992  Kate  67
#9  3 1996   Joe  90

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复