I am attempting to merge two CSV files based on a specific field in each file.
file1.csv
id,attr1,attr2,attr3
1,True,7,\"Purple\"
2,Fal
Use dict of dict then update it. Like this:
import csv
from collections import OrderedDict
with open('file2.csv','r') as f2:
reader = csv.reader(f2)
lines2 = list(reader)
with open('file1.csv','r') as f1:
reader = csv.reader(f1)
lines1 = list(reader)
dict1 = {row[0]: dict(zip(lines1[0][1:], row[1:])) for row in lines1[1:]}
dict2 = {row[0]: dict(zip(lines2[0][1:], row[1:])) for row in lines2[1:]}
#merge
updatedDict = OrderedDict()
mergedAttrs = OrderedDict.fromkeys(lines1[0][1:] + lines2[0][1:], "?")
for id, attrs in dict1.iteritems():
d = mergedAttrs.copy()
d.update(attrs)
updatedDict[id] = d
for id, attrs in dict2.iteritems():
updatedDict[id].update(attrs)
#out
with open('merged.csv', 'wb') as f:
w = csv.writer(f)
for id, rest in sorted(updatedDict.iteritems()):
w.writerow([id] + rest.values())