Add missing value in column with value from row above

后端未结

关注

 5  539

Every week I a incomplete dataset for a analysis. That looks like:

df1 <- data.frame(var1 = c(\"a\",\"\",\"\",\"b\",\"\"), 
             var2 = c(\"x\",\"


                      
              相关标签:


      
      
        
          5条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  闹比i        
                
              
                            
                2020-12-30 02:20
              
            
            
                                                                       
Below is my unfill function, encontered same problem, hope will help.

unfill <- function(df,cols){
  col_names <- names(df)
  unchanged <- df[!(names(df) %in% cols)]
  changed <- df[names(df) %in% cols] %>%
    map_df(function(col){
      col[col == col %>% lag()] <- NA
      col
    })
  unchanged %>% bind_cols(changed) %>% select(one_of(col_names))
}


                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  挽巷        
                
              
                            
                2020-12-30 02:24
              
            
            
                                                                       
Here is a simpler way:

library(zoo)
df1$var1[df1$var1 == ""] <- NA
df1$var1 <- na.locf(df1$var1)

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  一生所求        
                
              
                            
                2020-12-30 02:27
              
            
            
                                                                       
Here is one way of doing it by making use of run-length encoding (rle) and its inverse rle.inverse:

fillTheBlanks <- function(x, missing=""){
  rle <- rle(as.character(x))
  empty <- which(rle$value==missing)
  rle$values[empty] <- rle$value[empty-1] 
  inverse.rle(rle)
}

df1$var1 <- fillTheBlanks(df1$var1)


The results:

df1

  var1 var2
1    a    x
2    a    y
3    a    z
4    b    x
5    b    z

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  忘掉有多难        
                
              
                            
                2020-12-30 02:31
              
            
            
                                                                       
The tidyr packages has the fill() function which does the trick.

df1 <- data.frame(var1 = c("a",NA,NA,"b",NA), stringsAsFactors = FALSE)
df1 %>% fill(var1)

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  日久生厌        
                
              
                            
                2020-12-30 02:31
              
            
            
                                                                       
Here is another way which is slightly shorter and doesn't coerce to character:

Fill <- function(x,missing="")
{
  Log <- x != missing
  y <- x[Log]
  y[cumsum(Log)]
}


Results:

# For factor:
Fill(df1$var1)
[1] a a a b b
Levels:  a b

# For character:
Fill(as.character(df1$var1))
[1] "a" "a" "a" "b" "b"

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复