file1 contains multiple alphabetic sequences:
AETYUIOOILAKSJ
EAYEURIOPOSIDK
RYXURIAJSKDMAO
URITORIEJAHSJD
YWQIAKSJDHFKCM
HAJSUDIDSJSIAJ
AJDHDPFDIXSIBJ
JAQIAUXCNC
$ cat tst.awk
NR==FNR {
lgth = length($0)
pos2char[substr($0,1,lgth-1)] = substr($0,lgth,1)
next
}
{
for (pos in pos2char) {
if ( substr($0,pos,1) == pos2char[pos] ) {
print
next
}
}
}
$ awk -f tst.awk file2 file1
AETYUIOOILAKSJ
RYXURIAJSKDMAO
URITORIEJAHSJD
JAQIAUXCNCVUFO
With GNU awk and grep:
awk -v FPAT='[0-9]+|[A-Z]+' '{ print "^.{" $1-1 "}" $2 }' file1 | grep -Ef - file2
Output:
AETYUIOOILAKSJ
RYXURIAJSKDMAO
URITORIEJAHSJD
JAQIAUXCNCVUFO
Here you go:
awk 'NR==FNR {b[$0]++;next} {for (i in b) {a=match($0,"[A-Z]");n=substr($0,1,(a-1));s=substr($0,a);t=substr(i,n,1);if (t==s) print i}}' file1 file2
AETYUIOOILAKSJ
RYXURIAJSKDMAO
URITORIEJAHSJD
JAQIAUXCNCVUFO
Some more readable:
awk '
NR==FNR {
b[$0]++;
next
}
{
for (i in b) {
a=match($0,"[A-Z]");
n=substr($0,1,(a-1));
s=substr($0,a);
t=substr(i,n,1);
if (t==s)
print i
}
}
' file1 file2
With comments:
awk '
NR==FNR { # For the first file
b[$0]++; # Store file1 in in array b
next
}
{
for (i in b) { # Loop trough elements in array b
a=match($0,"[A-Z]"); # For file2 find where letters starts
n=substr($0,1,(a-1)); # Store the number part of file2 in n
s=substr($0,a); # Store the letters part of file2 in s
t=substr(i,n,1); # from file1 find string at position n
if (t==s) # test if string found is equal to letter to find s
print i # if yes, print the line
}
}
' file1 file2
awk '(NR==FNR){a[$0]=substr($0,length);next}
{ for(key in a) if (a[key] == substr($0,key+0,1)) { print; break }
}' file2 file1
Here, the array a[key]
is a associative array with the following key-value pairs:
key: value
3T T
10K K
... ...
When processing file2
with the line: (NR==FNR){a[$0]=substr($0,length);next}
: we extract the value
beforehand so we don't have to do it later on. The index is easily extracted with a math operation. Eg. "10K"+0=10
in Awk.
Processing file1
is done with the next line. Here we just check if the character matches for any of the entries in the associative array.
With awk
+ grep
pipeline:
awk '{ pat=sprintf("%*s", int($0)-1, ""); gsub(" ", ".", pat);
printf "^%s%s\n", pat, substr($0, length) }' file2 | grep -f- file1
The output:
AETYUIOOILAKSJ
RYXURIAJSKDMAO
URITORIEJAHSJD
JAQIAUXCNCVUFO