Remove string from a vector in R

后端未结

关注

 3  472

I have a vector that looks like

> inecodes
   [1] \"01001\" \"01002\" \"01049\" \"01003\" \"01006\" \"01037\" \"01008\" \"01004\" \"01009\" \"01010\" \"01


                      
              相关标签:


      
      
        
          3条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  盖世英雄少女心        
                
              
                            
                2020-12-22 02:58
              
            
            
                                                                       
Try this. Match should be much faster

pos<-which(!is.na(pob[match(sub('^([0-9]+)-.*$','\\1',pob),inecodes)]))
pob[pos]<-sub('^[0-9]+-(.*)$','\\1',pob[pos])


Please do post the timings if you manage to get this. Match usually solves many computational issues for large data sets lookup. Would like to see if there are any opposite scenarios.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  走了就别回头了        
                
              
                            
                2020-12-22 03:13
              
            
            
                                                                       
Not sure why you needed inecodes at all, since you can use sub to remove all digits:

sub('^\\d+-', '', pob)


Result:

 [1] "Alegría-Dulantzi"           "Amurrio"                    "Añana"                     
 [4] "Aramaio"                    "Armiñón"                    "Arraia-Maeztu"             
 [7] "Arratzua-Ubarrundia"        "Artziniega"                 "Asparrena"                 
[10] "Ayala/Aiara"                "Baños de Ebro/Mañueta"      "Barrundia"                 
[13] "Berantevilla"               "Bernedo"                    "Campezo/Kanpezu"           
[16] "Elburgo/Burgelu"            "Elciego"                    "Elvillar/Bilar"            
[19] "Erriberagoitia/Ribera Alta"


One reason that you might need inecodes is that you have codes in pob that don't exist in inecodes, but that doesn't seem like the case here. If you insist on using inecodes to remove numbers from pob, you can use str_replace_all from stringr:

library(stringr)

str_replace_all(pob, setNames(rep("", length(inecodes)), paste0(inecodes, "-")))


This gives you the exact same result:

 [1] "Alegría-Dulantzi"           "Amurrio"                    "Añana"                     
 [4] "Aramaio"                    "Armiñón"                    "Arraia-Maeztu"             
 [7] "Arratzua-Ubarrundia"        "Artziniega"                 "Asparrena"                 
[10] "Ayala/Aiara"                "Baños de Ebro/Mañueta"      "Barrundia"                 
[13] "Berantevilla"               "Bernedo"                    "Campezo/Kanpezu"           
[16] "Elburgo/Burgelu"            "Elciego"                    "Elvillar/Bilar"            
[19] "Erriberagoitia/Ribera Alta"


Data:

inecodes = c("01001", "01002", "01049", "01003", "01006", "01037", "01008", 
"01004", "01009", "01010", "01011", "01013", "01014", "01016", 
"01017", "01021", "01022", "01023", "01046", "01056", "01901", 
"01027", "01019", "01020", "01028", "01030", "01031", "01032", 
"01902", "01033", "01036", "01058", "01034", "01039", "01041", 
"01042", "01043", "01044", "01047", "01051", "01052", "01053", 
"01054", "01055")

pob = c("01001-Alegría-Dulantzi", "01002-Amurrio", "01049-Añana", "01003-Aramaio", 
"01006-Armiñón", "01037-Arraia-Maeztu", "01008-Arratzua-Ubarrundia", 
"01004-Artziniega", "01009-Asparrena", "01010-Ayala/Aiara", "01011-Baños de Ebro/Mañueta", 
"01013-Barrundia", "01014-Berantevilla", "01016-Bernedo", "01017-Campezo/Kanpezu", 
"01021-Elburgo/Burgelu", "01022-Elciego", "01023-Elvillar/Bilar", 
"01046-Erriberagoitia/Ribera Alta")

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  时光说笑        
                
              
                            
                2020-12-22 03:22
              
            
            
                                                                       
library(stringr)

for(code in inecodes) {
  ix <- which(str_detect(pob, code))
  pob[ix] <- unlist(str_split(pob, "-", 2))[2]
}

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复