Python merge dictionaries with custom merge function

问题

I want to merge two dictionaries A and B such that the result contains:

All pairs from A where key is unique to A
All pairs from B where key is unique to B
f(valueA, valueB) where the same key exists in both A and B

For example:

def f(x, y):
    return x * y

A = {1:1, 2:3}
B = {7:3, 2:2}

C = merge(A, B)

Output:

{1:1, 7:3, 2:6}

It feels like there should be a nice one-liner to do this.

回答1:

Use dictionary views to achieve this; the dict.viewkeys() result acts like a set and let you do intersections and symmetrical differences:

def merge(A, B, f):
    # Start with symmetric difference; keys either in A or B, but not both
    merged = {k: A.get(k, B.get(k)) for k in A.viewkeys() ^ B.viewkeys()}
    # Update with `f()` applied to the intersection
    merged.update({k: f(A[k], B[k]) for k in A.viewkeys() & B.viewkeys()})
    return merged

In Python 3, the .viewkeys() method has been renamed to .keys(), replacing the old .keys() functionality (which in Python 2 returs a list).

The above merge() method is the generic solution which works for any given f().

Demo:

>>> def f(x, y):
...     return x * y
... 
>>> A = {1:1, 2:3}
>>> B = {7:3, 2:2}
>>> merge(A, B, f)
{1: 1, 2: 6, 7: 3}
>>> merge(A, B, lambda a, b: '{} merged with {}'.format(a, b))
{1: 1, 2: '3 merged with 2', 7: 3}

回答2:

Stealing this (A.get(k, B.get(k)) snippet from @MartijnPieters

>>> def f(x, y):
        return x * y

>>> A = {1:1, 2:3}
>>> B = {7:3, 2:2}
>>> {k: f(A[k], B[k]) if k in A and k in B else A.get(k, B.get(k))
     for k in A.viewkeys() | B.viewkeys()}
{1: 1, 2: 6, 7: 3}

回答3:

Here's my solution code in Python 3 for the general case.

I first wrote the merge function and then extend it to the more general merge_with function, which takes a function and various number of dictionaries. Were there any duplicate keys in those dictionaries, apply the supplied function to the values whose keys are duplicate.

The merge function can be redefined using the merge_with function, as in the case of merger function. The name merger means to merge them all and keep the rightmost values, were there any duplicates. So does the mergel function, which keep the leftmost.

All the functions here — merge, merge_with, mergel, and merger — are generic in the case that they take arbitrary number of dictionary arguments. Specifically, merge_with must take as argument a function compatible with the data to which it will apply.

from functools import reduce
from operator import or_

def merge(*dicts):
    return { k: reduce(lambda d, x: x.get(k, d), dicts, None)
             for k in reduce(or_, map(lambda x: x.keys(), dicts), set()) }

def merge_with(f, *dicts):
    return { k: (lambda x: f(*x) if len(x)>1 else x[0])([ d[k] for d in dicts
                                                          if k in d ])
             for k in reduce(or_, map(lambda x: x.keys(), dicts), set()) }

mergel = lambda *dicts: merge_with(lambda *x: x[0], *dicts)

merger = lambda *dicts: merge_with(lambda *x: x[-1], *dicts)

Tests

>>> squares = { k:k*k for k in range(4) }
>>> squares
{0: 0, 1: 1, 2: 4, 3: 9}
>>> cubes = { k:k**3 for k in range(2,6) }
>>> cubes
{2: 8, 3: 27, 4: 64, 5: 125}
>>> merger(squares, cubes)
{0: 0, 1: 1, 2: 8, 3: 27, 4: 64, 5: 125}
>>> merger(cubes, squares)
{0: 0, 1: 1, 2: 4, 3: 9, 4: 64, 5: 125}
>>> mergel(squares, cubes)
{0: 0, 1: 1, 2: 4, 3: 9, 4: 64, 5: 125}
>>> mergel(cubes, squares)
{0: 0, 1: 1, 2: 8, 3: 27, 4: 64, 5: 125}
>>> merge(squares, cubes)
{0: 0, 1: 1, 2: 8, 3: 27, 4: 64, 5: 125}
>>> merge(cubes, squares)
{0: 0, 1: 1, 2: 4, 3: 9, 4: 64, 5: 125}
>>> merge_with(lambda x, y: x+y, squares, cubes)
{0: 0, 1: 1, 2: 12, 3: 36, 4: 64, 5: 125}
>>> merge_with(lambda x, y: x*y, squares, cubes)
{0: 0, 1: 1, 2: 32, 3: 243, 4: 64, 5: 125}

Update

After I wrote the above, I find there's another way to do it.

from functools import reduce

def merge(*dicts):
    return reduce(lambda d1, d2: reduce(lambda d, t:
                                        dict(list(d.items())+[t]),
                                        d2.items(), d1),
                  dicts, {})

def merge_with(f, *dicts):
    return reduce(lambda d1, d2: reduce(lambda d, t:
                                        dict(list(d.items()) +
                                             [(t[0], f(d[t[0]], t[1])
                                               if t[0] in d else
                                               t[1])]),
                                        d2.items(), d1),
                  dicts, {})

mergel = lambda *dicts: merge_with(lambda x, y: x, *dicts)
merger = lambda *dicts: merge_with(lambda x, y: y, *dicts)

Notice that the definitions for mergel and merger using merge_with have been changed with new functions as first arguments. The f function must now be binary. The tests provided above still works. Here are some more tests to show the generality of those functions.

>>> merge() == {}
True
>>> merge(squares) == squares
True
>>> merge(cubes) == cubes
True
>>> mergel() == {}
True
>>> mergel(squares) == squares
True
>>> mergel(cubes) == cubes
True
>>> merger() == {}
True
>>> merger(squares) == squares
True
>>> merger(cubes) == cubes
True
>>> merge_with(lambda x, y: x+y, squares, cubes, squares)
{0: 0, 1: 2, 2: 16, 3: 45, 4: 64, 5: 125}
>>> merge_with(lambda x, y: x*y, squares, cubes, squares)
{0: 0, 1: 1, 2: 128, 3: 2187, 4: 64, 5: 125}

回答4:

>>> def f(x,y):
...     return x*y
... 
>>> dict([(k,v) for k,v in A.items()] + [ (k,v) if k not in A else (k,f(A[k],B[k])) for k,v in B.items()])
{1: 1, 2: 6, 7: 3}

回答5:

from itertools import chain

intersection = set(A.keys()).intersection(B.keys())
C = dict(chain(A.items(), B.items(), ((k, f(A[k], B[k])) for k in intersection)))

Could technically be made into a oneliner. Works in both Py2 and Py3. If you only care about Py3, you can rewrite the 'intersection' line to:

intersection = A.keys() & B.keys()

(for Py2-only, use A.viewkeys() & B.viewkeys() instead.)

回答6:

A different approach that is (imho) more readable for users that come from a background in functional programming

def merge_with(f):
    def merge(a,b):
        g = lambda l: [x for x in l if x is not None]  
        keys = a.keys() | b.keys()
        return {key:f(*g([a.get(key), b.get(key)])) for key in keys}
    return merge

Applying this to the OP's example:

A = {1:1, 2:3}
B = {7:3, 2:2}
merge_with(lambda x,y=1: x*y)(A,B)

回答7:

dict(list(A.items()) + list(B.items()) + [(k,f(A[k],B[k])) for k in A.keys() & B.keys()])

is in my opinion the shortest and most readable code in Python 3. I derived it from DhruvPathak's answer and realised that optimising it leads to kampu's answer specialised for Python 3:

dict(itertools.chain(A.items(), B.items(), ((k,f(A[k],B[k])) for k in A.keys() & B.keys())))

I compared all of the answers here for performance, and got this ranking:

mergeLZ: 34.0ms (Lei Zhao, quite bulky one-liner)
mergeJK: 11.6ms (jamylak)
mergeMP: 11.5ms (Martijn Pieters, almost a one-liner)
mergeDP: 6.9ms (DhruvPathak)
mergeDS: 6.8ms (1st one-liner above)
mergeK3: 5.2ms (kampu = 2nd one-liner above)
mergeS3: 3.5ms (imperative, not a one-liner)

where the latter mergeS3 is a naive, imperative, multi-line code. I'm disappointed that the old ways prevail when it comes to performance. This test is for simple integer keys and values, but the ranking is quite similar for big string keys and values. Obviously mileage may vary by dictionary size and amount of key overlap (1/3 in my test). By the way, Lei Zhao's second implementation, which I haven't tried to understand, seems to have abysmal performance, ~1000 times slower.

The code:

import functools 
import itertools
import operator
import timeit

def t(x): # transform keys and values
    return x # str(x) * 8

def f(x,y): # merge values
    return x + y

N = 10000
A = {t(k*2): t(k*22) for k in range(N)}
B = {t(k*3): t(k*33) for k in range(N)}

def check(AB):
    assert(len(A) == N)
    assert(len(B) == N)
    assert(len(AB) == 16666)
    assert(AB[t(0)] == f(t(0), t(0)))
    assert(t(1) not in AB)
    assert(AB[t(2)] == t(1*22))
    assert(AB[t(3)] == t(1*33))
    assert(AB[t(4)] == t(2*22))
    assert(t(5) not in AB)
    assert(AB[t(6)] == f(t(3*22), t(2*33)))
    assert(t(7) not in AB)
    assert(AB[t(8)] == t(4*22))
    assert(AB[t(9)] == t(3*33))

def mergeLZ(): # Lei Zhao
    merged = {k: (lambda x: f(*x) if len(x)>1 else x[0])([ d[k] for d in [A, B]
                                                          if k in d ])
             for k in functools.reduce(operator.or_, map(lambda x: x.keys(), [A, B]), set()) }
    check(merged)
def mergeJK(): # jamylak
    merged = {k: f(A[k], B[k]) if k in A and k in B else A.get(k, B.get(k)) for k in A.keys() | B.keys()}
    check(merged)
def mergeMP(): # Martijn Pieters
    merged = {k: A.get(k, B.get(k)) for k in A.keys() ^ B.keys()}
    merged.update({k: f(A[k], B[k]) for k in A.keys() & B.keys()})
    check(merged)
def mergeDP(): # DhruvPathak
    merged = dict([(k,v) for k,v in A.items()] + [ (k,v) if k not in A else (k,f(A[k],B[k])) for k,v in B.items()])
    check(merged)
def mergeDS(): # more elegant (IMO) variation on DhruvPathak
    merged = dict(list(A.items()) + list(B.items()) + [(k,f(A[k],B[k])) for k in A.keys() & B.keys()])
    check(merged)
def mergeK3(): # kampu adapted to Python 3
    merged = dict(itertools.chain(A.items(), B.items(), ((k,f(A[k],B[k])) for k in A.keys() & B.keys())))
    check(merged)
def mergeS3(): # "naive" imperative way
    merged = A.copy()
    for k,v in B.items():
        if k in A:
            merged[k] = f(A[k], v)
        else:
            merged[k] = v
    check(merged)

for m in [mergeLZ, mergeJK, mergeMP, mergeDP, mergeDS, mergeK3, mergeS3]:
    print("{}: {:4.1f}ms".format(m.__name__, timeit.timeit(m, number=1000)))

回答8:

def merge_dict(dict1,dict2):
    dict1={1:'red'}
    dict2={2:'black',3:'yellow'}
    dict1.update(dict2)
    print 'dict3 =',dict1

merge_dict(dict1,dict2)

Output:

dict3 = {1: 'red', 2: 'black', 3: 'yellow'}

来源：https://stackoverflow.com/questions/16560840/python-merge-dictionaries-with-custom-merge-function

标签

python

dictionary

coding-style