Get Value of last non-empty column for each row

前端未结

关注

 3  1826

Take this sample data:

data.frame(a_1=c(\"Apple\",\"Grapes\",\"Melon\",\"Peach\"),a_2=c(\"Nuts\",\"Kiwi\",\"Lime\",\"Honey\"),a_3=c(\"Plum\",\"Apple\",NA,NA)


                      
              相关标签:


      
      
        
          3条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  时光说笑        
                
              
                            
                2021-01-05 22:03
              
            
            
                                                                       
There's no need for regex here. Just use apply + tail + na.omit:

> apply(mydf, 1, function(x) tail(na.omit(x), 1))
[1] "Cucumber" "Apple"    "Lime"     "Honey" 




I don't know how this compares in terms of speed, but you You can also use a combination of "data.table" and "reshape2", like this:

library(data.table)
library(reshape2)
na.omit(melt(as.data.table(mydf, keep.rownames = TRUE), 
             id.vars = "rn"))[, value[.N], by = rn]
#    rn       V1
# 1:  1 Cucumber
# 2:  2    Apple
# 3:  3     Lime
# 4:  4    Honey


Or, even better:

melt(as.data.table(df, keep.rownames = TRUE), 
     id.vars = "rn", na.rm = TRUE)[, value[.N], by = rn]
#    rn       V1
# 1:  1 Cucumber
# 2:  2    Apple
# 3:  3     Lime
# 4:  4    Honey


This would be much faster. On an 800k-row dataset, apply took ~ 50 seconds while the data.table approach took about 2.5 seconds.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  感动是毒        
                
              
                            
                2021-01-05 22:04
              
            
            
                                                                       
Another alternative that might be pretty fast:

DF[cbind(seq_len(nrow(DF)), max.col(!is.na(DF), "last"))]
#[1] "Cucumber" "Apple"    "Lime"     "Honey"


Where "DF":

DF = structure(list(a_1 = structure(1:4, .Label = c("Apple", "Grapes", 
"Melon", "Peach"), class = "factor"), a_2 = structure(c(4L, 2L, 
3L, 1L), .Label = c("Honey", "Kiwi", "Lime", "Nuts"), class = "factor"), 
    a_3 = structure(c(2L, 1L, NA, NA), .Label = c("Apple", "Plum"
    ), class = "factor"), a_4 = structure(c(1L, NA, NA, NA), .Label = "Cucumber", class = "factor")), .Names = c("a_1", 
"a_2", "a_3", "a_4"), row.names = c(NA, -4L), class = "data.frame")

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  温柔的废话        
                
              
                            
                2021-01-05 22:07
              
            
            
                                                                       
You could also try: (df1 is the dataset)

 indx <- which(!is.na(df1), arr.ind=TRUE)
 df1[cbind(1:nrow(df1),tapply(indx[,2], indx[,1], FUN=max))]
 #[1] "Cucumber" "Apple"    "Lime"     "Honey"  

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复