问题
I have combined two different files with Same Key by AWK command. And In case there is no key match compare to File1 and File2 then just put "\t\t\t" instead.
I have below AWK command.
awk -F"\t" '
{key = $1}
NR == 1 {header = key}
!(key in result) {result[key] = $0 ; next}
{ for (i=2; i <= NF; i++) result[key] = result[key] FS $i }
END {
print result[header],"\tValue2","\tValue3","\tValue4"
delete result[header]
PROCINFO["sorted_in"] = "@ind_str_asc" # if using GNU awk
for (key in result) print result[key]
}
' $1 $2 > $3
Example Combine File1
Key Value1
A 10000
B 20000
C 30000
D 40000
File2
B 50000 20000 10000
C 20000 10000 50000
Then Expected result
Key Value1 Value2 Value3 Value4
A 10000 - - -
B 20000 50000 20000 10000
C 30000 20000 10000 50000
D 40000 - - -
My AWK Command Show
Key Value1 Value2 Value3 Value4
A 10000
B 20000 50000 20000 10000
C 30000 20000 10000 50000
D 40000
I already tried to do few way like below
!(key in result) {result[key] = $0"\t-\t-\t-" ; next}
But looks like this is does not cover all cases. Anyone has better idea to do this? Thank you!
回答1:
This solution doesn't hardcode that there are 3 extra fields in File2
awk '
BEGIN { FS = OVS = "\t" }
NR == FNR {
key = $1
$1 = ""
store[key] = $0
num_extra_fields = NF-1
next
}
FNR == 1 {
printf "%s", $0
for (i=1; i <= num_extra_fields; i++)
printf "%sValue%d", OFS, i+(NF-1)
print ""
next
}
$1 in store {
print $0 store[key]
next
}
{
for (i=1; i <= num_extra_fields; i++)
$(++NF)="-"
print
}
' file2 file1
The output looks a bit odd due to how stackoverflow displays tabs
Key Value1 Value2 Value3 Value4
A 10000 - - -
B 20000 20000 10000 50000
C 30000 20000 10000 50000
D 40000 - - -
To fix your code, you need to keep track of the keys in file2 that update the results. Change
{ for (i=2; i <= NF; i++) result[key] = result[key] FS $i }
to
{ updated[key]=1; for (i=2; i <= NF; i++) result[key] = result[key] FS $i }
and, in the END block, change
for (key in result) print result[key]
to
for (key in result) {
if (!(key in updated)) result[key] = result[key] FS "-" FS "-" FS "-"
print result[key]
}
回答2:
Using awk you can do:
awk -v OFS='\t' 'NR==FNR {if (!n) {n=NF-1; s="-"; for(i=2; i<=n; i++) s=s OFS "-"}
a[$1]=$0;next} $1 in a{sub($1, "&" OFS $2, a[$1]);
print a[$1]; next} {print $0, s}' file2 file1
A 10000 - - -
B 20000 50000 20000 10000
C 30000 20000 10000 50000
D 40000 - - -
来源:https://stackoverflow.com/questions/29476542/awk-asking-combine-two-files