Replace specific values based on another dataframe

后端未结

关注

 3  1740

First, let\'s start with DataFrame 1 (DF1) :

DF1 <- data.frame(c(\"06/19/2016\", \"06/20/2016\", \"06/21/2016\", \"06/22/2016\", 
                    \"06


                      
              相关标签:


      
      
        
          3条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  迷失自我        
                
              
                            
                2020-12-03 06:36
              
            
            
                                                                       
df <- merge(DF1, DF2, by = c("date", "id"))
df$newcolumn <- ifelse(is.na(df$column.y), df$column.x, df$column.y, all.x = TRUE)


Replace column with your variable.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  心在旅途        
                
              
                            
                2020-12-03 06:41
              
            
            
                                                                       
You could use the join functionality of the data.table-package for this:

library(data.table)
setDT(DF1)
setDT(DF2)

DF1[DF2, on = .(date, id), `:=` (city = i.city, sales = i.sales)]


which gives:


> DF1
          date id sales cost city
 1: 06/19/2016  1  9999  101  LON
 2: 06/20/2016  1   150  102  MTL
 3: 06/21/2016  1   151  104  MTL
 4: 06/22/2016  1   152  107  MTL
 5: 06/23/2016  1   155   99  MTL
 6: 06/19/2016  2    84   55   NY
 7: 06/20/2016  2    83   55   NY
 8: 06/21/2016  2    80   56   NY
 9: 06/22/2016  2   777   57   QC
10: 06/23/2016  2   555   58   QC





When you have many columns in both datasets, it is easier to use mget instead off typing all the column names. For the used data in the question it would look like:

DF1[DF2, on = .(date, id), names(DF2)[3:4] := mget(paste0("i.", names(DF2)[3:4]))]


When you want to construct a vector of columnnames that need to be added beforehand, you could do this as follows:

cols <- names(DF2)[3:4]
DF1[DF2, on = .(date, id), (cols) := mget(paste0("i.", cols))]

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  梦毁少年i        
                
              
                            
                2020-12-03 06:55
              
            
            
                                                                       
df <- merge(DF1, DF2, by = c("date", "id"), all.x=TRUE)

tmp1 <- df[is.na(df$sales.y) & is.na(df$city.y),]
tmp1$sales.y <- NULL
tmp1$city.y <- NULL
names(tmp1)[names(tmp1) == "sales.x"] <- "sales"
names(tmp1)[names(tmp1) == "city.x"] <- "city"

tmp2 <- df[!is.na(df$sales.y) & !is.na(df$city.y),]
tmp2$sales.x <- NULL
tmp2$city.x <- NULL
names(tmp2)[names(tmp2) == "sales.y"] <- "sales"
names(tmp2)[names(tmp2) == "city.y"] <- "city"

results <- rbindlist(list(tmp1,tmp2), use.names= TRUE, fill = TRUE)



                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复