match index notation of file1 to the index of file2 and pull out matching rows

后端 未结 5 549
夕颜
夕颜 2021-01-29 04:47

file1 contains multiple alphabetic sequences:

AETYUIOOILAKSJ
EAYEURIOPOSIDK
RYXURIAJSKDMAO
URITORIEJAHSJD
YWQIAKSJDHFKCM
HAJSUDIDSJSIAJ
AJDHDPFDIXSIBJ
JAQIAUXCNC         


        
5条回答
  •  一整个雨季
    2021-01-29 05:10

    Here you go:

    awk 'NR==FNR {b[$0]++;next} {for (i in b) {a=match($0,"[A-Z]");n=substr($0,1,(a-1));s=substr($0,a);t=substr(i,n,1);if (t==s) print i}}' file1 file2
    AETYUIOOILAKSJ
    RYXURIAJSKDMAO
    URITORIEJAHSJD
    JAQIAUXCNCVUFO
    

    Some more readable:

    awk '
    NR==FNR {
      b[$0]++;
      next
    } 
    {
      for (i in b) {
        a=match($0,"[A-Z]");
        n=substr($0,1,(a-1));
        s=substr($0,a);
        t=substr(i,n,1);
        if (t==s) 
          print i
        }
    }
    ' file1 file2
    

    With comments:

    awk '
    NR==FNR {                 # For the first file
      b[$0]++;                # Store file1 in in array b
      next
    } 
    {
      for (i in b) {          # Loop trough elements in array b
        a=match($0,"[A-Z]");  # For file2 find where letters starts
        n=substr($0,1,(a-1)); # Store the number part of file2 in n
        s=substr($0,a);       # Store the letters part of file2 in s
        t=substr(i,n,1);      # from file1 find string at position n
        if (t==s)             # test if string found is equal to letter to find s
          print i             # if yes, print the line 
        }
    }
    ' file1 file2
    

提交回复
热议问题