I want to merge two dictionaries A and B such that the result contains:
dict(list(A.items()) + list(B.items()) + [(k,f(A[k],B[k])) for k in A.keys() & B.keys()])
is in my opinion the shortest and most readable code in Python 3. I derived it from DhruvPathak's answer and realised that optimising it leads to kampu's answer specialised for Python 3:
dict(itertools.chain(A.items(), B.items(), ((k,f(A[k],B[k])) for k in A.keys() & B.keys())))
I compared all of the answers here for performance, and got this ranking:
mergeLZ: 34.0ms
(Lei Zhao, quite bulky one-liner)mergeJK: 11.6ms
(jamylak)mergeMP: 11.5ms
(Martijn Pieters, almost a one-liner)mergeDP: 6.9ms
(DhruvPathak)mergeDS: 6.8ms
(1st one-liner above)mergeK3: 5.2ms
(kampu = 2nd one-liner above)mergeS3: 3.5ms
(imperative, not a one-liner)where the latter mergeS3 is a naive, imperative, multi-line code. I'm disappointed that the old ways prevail when it comes to performance. This test is for simple integer keys and values, but the ranking is quite similar for big string keys and values. Obviously mileage may vary by dictionary size and amount of key overlap (1/3 in my test). By the way, Lei Zhao's second implementation, which I haven't tried to understand, seems to have abysmal performance, ~1000 times slower.
The code:
import functools
import itertools
import operator
import timeit
def t(x): # transform keys and values
return x # str(x) * 8
def f(x,y): # merge values
return x + y
N = 10000
A = {t(k*2): t(k*22) for k in range(N)}
B = {t(k*3): t(k*33) for k in range(N)}
def check(AB):
assert(len(A) == N)
assert(len(B) == N)
assert(len(AB) == 16666)
assert(AB[t(0)] == f(t(0), t(0)))
assert(t(1) not in AB)
assert(AB[t(2)] == t(1*22))
assert(AB[t(3)] == t(1*33))
assert(AB[t(4)] == t(2*22))
assert(t(5) not in AB)
assert(AB[t(6)] == f(t(3*22), t(2*33)))
assert(t(7) not in AB)
assert(AB[t(8)] == t(4*22))
assert(AB[t(9)] == t(3*33))
def mergeLZ(): # Lei Zhao
merged = {k: (lambda x: f(*x) if len(x)>1 else x[0])([ d[k] for d in [A, B]
if k in d ])
for k in functools.reduce(operator.or_, map(lambda x: x.keys(), [A, B]), set()) }
check(merged)
def mergeJK(): # jamylak
merged = {k: f(A[k], B[k]) if k in A and k in B else A.get(k, B.get(k)) for k in A.keys() | B.keys()}
check(merged)
def mergeMP(): # Martijn Pieters
merged = {k: A.get(k, B.get(k)) for k in A.keys() ^ B.keys()}
merged.update({k: f(A[k], B[k]) for k in A.keys() & B.keys()})
check(merged)
def mergeDP(): # DhruvPathak
merged = dict([(k,v) for k,v in A.items()] + [ (k,v) if k not in A else (k,f(A[k],B[k])) for k,v in B.items()])
check(merged)
def mergeDS(): # more elegant (IMO) variation on DhruvPathak
merged = dict(list(A.items()) + list(B.items()) + [(k,f(A[k],B[k])) for k in A.keys() & B.keys()])
check(merged)
def mergeK3(): # kampu adapted to Python 3
merged = dict(itertools.chain(A.items(), B.items(), ((k,f(A[k],B[k])) for k in A.keys() & B.keys())))
check(merged)
def mergeS3(): # "naive" imperative way
merged = A.copy()
for k,v in B.items():
if k in A:
merged[k] = f(A[k], v)
else:
merged[k] = v
check(merged)
for m in [mergeLZ, mergeJK, mergeMP, mergeDP, mergeDS, mergeK3, mergeS3]:
print("{}: {:4.1f}ms".format(m.__name__, timeit.timeit(m, number=1000)))