Replacing values in a column where there is a match

后端未结

关注

 5  1593

I\'m new to R programming and I\'m stuck on the example below.

Basically I have two data sets:

dataset1:

ID       Category        
1        CatZ


                      
              相关标签:


      
      
        
          5条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  你的背包        
                
              
                            
                2021-01-22 07:09
              
            
            
                                                                       
In tidyverse you can do:

df1 %>%
 left_join(df2, by = c("ID" = "ID")) %>% #Merging the two dfs on ID
 mutate(Category = if_else(!is.na(Category.y), Category.y, Category.x)) %>% #If there is a match, taking the value from df2, otherwise from df1
 select(ID, Category) #Deleting the redundant variables

  ID Category
1  1   Cat600
2  2    CatVV
3  3   Cat611
4  4    CatQQ


Or:

df1 %>%
 left_join(df2, by = c("ID" = "ID")) %>% #Merging the two dfs on ID
 gather(var, val, -ID) %>% #Transforming the data from wide to long format
 arrange(ID) %>% #Arranging by ID
 group_by(ID) %>% #Grouping by ID
 mutate(Category = if_else(!is.na(nth(val, 2)), nth(val, 2), first(val))) %>% #If non-NA, taking the value from df2, otherwise from df1
 spread(var, val) %>% #Returning the data to wide format
 select(ID, Category) #Removing the redundant variables 

     ID Category
  <int> <chr>   
1     1 Cat600  
2     2 CatVV   
3     3 Cat611  
4     4 CatQQ


Sample data:

df1 <- read.table(text = "ID       Category        
1        CatZZ         
2        CatVV         
3        CatAA  
4        CatQQ", header = TRUE, stringsAsFactors = FALSE)

df2 <- read.table(text = "ID  Category  
1   Cat600  
                  3   Cat611", header = TRUE, stringsAsFactors = FALSE)

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  Happy的楠姐        
                
              
                            
                2021-01-22 07:15
              
            
            
                                                                       
A base R approach that uses match 

df1$Category[match(df2$ID, df1$ID)] <- df2$Category
df1
#  ID Category
#1  1   Cat600
#2  2    CatVV
#3  3   Cat611
#4  4    CatQQ


data

df1 <- structure(list(ID = 1:4, Category = c("CatZZ", "CatVV", "CatAA", 
"CatQQ")), .Names = c("ID", "Category"), class = "data.frame", row.names = c(NA, 
-4L))

df2 <- structure(list(ID = c(1L, 3L), Category = c("Cat600", "Cat611"
)), .Names = c("ID", "Category"), class = "data.frame", row.names = c(NA, 
-2L))

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  佛祖请我去吃肉        
                
              
                            
                2021-01-22 07:27
              
            
            
                                                                       
You can stack df2 on top of df1 and keep the first instance for each ID.

With tidyverse that would be :

library(tidyverse)
bind_rows(df2,df1) %>%
  group_by(ID) %>%
  slice(1) %>%
  ungroup()

# # A tibble: 4 x 2
#      ID Category
#   <int>    <chr>
# 1     1   Cat600
# 2     2    CatVV
# 3     3   Cat611
# 4     4    CatQQ


Or a base version (which reorders the rows) :

subset(rbind(df2,df1), !duplicated(ID))
#   ID Category
# 1  1   Cat600
# 2  3   Cat611
# 4  2    CatVV
# 6  4    CatQQ

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  慢半拍i        
                
              
                            
                2021-01-22 07:31
              
            
            
                                                                       
Using a combination of base R match function and data.table's set function:

matchinds = na.omit(match(dataset1$ID,dataset2$ID)) # this will give index of dataset2$ID where values of dataset1$ID were equal to values of dataset2$ID

set(x=dataset1,i=matchinds,j="Category",value=dataset2$category[matchinds])  #this will set values at matching indexes in dataset1 Category column equal to Category column matching index values in dataset2

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  北荒        
                
              
                            
                2021-01-22 07:32
              
            
            
                                                                       
Another option is to use data.table package.

Using the same setup as @tmfmnk in his answer:

Construct the sample data set:

df1 <- read.table(text = "ID       Category        
1        CatZZ         
2        CatVV         
3        CatAA  
4        CatQQ", header = TRUE, stringsAsFactors = FALSE)

df2 <- read.table(text = "ID  Category  
1   Cat600  
                  3   Cat611", header = TRUE, stringsAsFactors = FALSE)


Load the data.table package and convert dataframes to data tables:

library(data.table)
df1 <- data.table(df1)
df2 <- data.table(df2)


Perform a left join

(take all values from df1, where ID matches with df2, and add there the category from df2, then create a new column combining info from df1 and df2)

a <- df2[df1, on = "ID"][, a := ifelse(is.na(Category), i.Category, Category)]


There is a nice question and answer on data.table joins here: Left join using data.table

Also, to get exactly the result you asked for, you can do:

a <- df2[df1, on = "ID"][, list(ID, Category = ifelse(is.na(Category), i.Category, Category))]

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复