match index notation of file1 to the index of file2 and pull out matching rows

后端未结

关注

 5  551

file1 contains multiple alphabetic sequences:

AETYUIOOILAKSJ
EAYEURIOPOSIDK
RYXURIAJSKDMAO
URITORIEJAHSJD
YWQIAKSJDHFKCM
HAJSUDIDSJSIAJ
AJDHDPFDIXSIBJ
JAQIAUXCNC


                      
              相关标签:


      
      
        
          5条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  悲&欢浪女        
                
              
                            
                2021-01-29 04:57
              
            
            
                                                                       
$ cat tst.awk
NR==FNR {
    lgth = length($0)
    pos2char[substr($0,1,lgth-1)] = substr($0,lgth,1)
    next
}
{
    for (pos in pos2char) {
        if ( substr($0,pos,1) == pos2char[pos] ) {
            print
            next
        }
    }
}

$ awk -f tst.awk file2 file1
AETYUIOOILAKSJ
RYXURIAJSKDMAO
URITORIEJAHSJD
JAQIAUXCNCVUFO

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  迷失自我        
                
              
                            
                2021-01-29 05:07
              
            
            
                                                                       
With GNU awk and grep:

awk -v FPAT='[0-9]+|[A-Z]+' '{ print "^.{" $1-1 "}" $2 }' file1 | grep -Ef - file2


Output:

AETYUIOOILAKSJ
RYXURIAJSKDMAO
URITORIEJAHSJD
JAQIAUXCNCVUFO

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  一整个雨季        
                
              
                            
                2021-01-29 05:10
              
            
            
                                                                       
Here you go:

awk 'NR==FNR {b[$0]++;next} {for (i in b) {a=match($0,"[A-Z]");n=substr($0,1,(a-1));s=substr($0,a);t=substr(i,n,1);if (t==s) print i}}' file1 file2
AETYUIOOILAKSJ
RYXURIAJSKDMAO
URITORIEJAHSJD
JAQIAUXCNCVUFO


Some more readable:

awk '
NR==FNR {
  b[$0]++;
  next
} 
{
  for (i in b) {
    a=match($0,"[A-Z]");
    n=substr($0,1,(a-1));
    s=substr($0,a);
    t=substr(i,n,1);
    if (t==s) 
      print i
    }
}
' file1 file2


With comments:

awk '
NR==FNR {                 # For the first file
  b[$0]++;                # Store file1 in in array b
  next
} 
{
  for (i in b) {          # Loop trough elements in array b
    a=match($0,"[A-Z]");  # For file2 find where letters starts
    n=substr($0,1,(a-1)); # Store the number part of file2 in n
    s=substr($0,a);       # Store the letters part of file2 in s
    t=substr(i,n,1);      # from file1 find string at position n
    if (t==s)             # test if string found is equal to letter to find s
      print i             # if yes, print the line 
    }
}
' file1 file2

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  天命终不由人        
                
              
                            
                2021-01-29 05:11
              
            
            
                                                                       
awk '(NR==FNR){a[$0]=substr($0,length);next}
     { for(key in a) if (a[key] == substr($0,key+0,1)) { print; break }
     }' file2 file1


Here, the array a[key] is a associative array with the following key-value pairs:

key:   value
3T     T
10K    K
...    ...


When processing file2 with the line: (NR==FNR){a[$0]=substr($0,length);next}: we extract the value beforehand so we don't have to do it later on. The index is easily extracted with a math operation. Eg. "10K"+0=10 in Awk.

Processing file1 is done with the next line. Here we just check if the character matches for any of the entries in the associative array.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  执笔经年        
                
              
                            
                2021-01-29 05:16
              
            
            
                                                                       
With awk + grep pipeline:

awk '{ pat=sprintf("%*s", int($0)-1, ""); gsub(" ", ".", pat); 
       printf "^%s%s\n", pat, substr($0, length) }' file2 | grep -f- file1


The output:

AETYUIOOILAKSJ
RYXURIAJSKDMAO
URITORIEJAHSJD
JAQIAUXCNCVUFO

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复