Shell script, bash, have 2 large files around 1.2 GB data, with key and values, I need to compare both files based on the key and store difference in the value in the third file
Could you please try following, written and tested with shown samples(also considering that your Input_file(s) are not starting from spaces).
awk '
BEGIN{
OFS=";"
}
{
match($0,/ .*/)
line=substr($0,RSTART,RLENGTH)
sub(/^ +/,"",line)
}
FNR==NR{
num=split(line,array,";")
for(i=1;i<=num;i++){
arrayfromFile2[$1]=(arrayfromFile2[$1]?arrayfromFile2[$1] OFS:"")array[i]
}
delete array
next
}
($1 in arrayfromFile2){
num=split(arrayfromFile2[$1],temparrayChkFile2,";")
for(i=1;i<=num;i++){
arrayChkFile2[temparrayChkFile2[i]]
}
num=split(line,array,";")
for(i=1;i<=num;i++){
if(!(array[i] in arrayChkFile2)){
val=(val?val OFS:"")array[i]
}
}
print $1" "val
val=""
next
}
1
' Input_file2 Input_file1