Suppose you have a dictionary like:
{\'a\': 1,
\'c\': {\'a\': 2,
\'b\': {\'x\': 5,
\'y\' : 10}},
\'d\': [1, 2, 3]}
Ho
This is similar to both imran's and ralu's answer. It does not use a generator, but instead employs recursion with a closure:
def flatten_dict(d, separator='_'):
final = {}
def _flatten_dict(obj, parent_keys=[]):
for k, v in obj.iteritems():
if isinstance(v, dict):
_flatten_dict(v, parent_keys + [k])
else:
key = separator.join(parent_keys + [k])
final[key] = v
_flatten_dict(d)
return final
>>> print flatten_dict({'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y' : 10}}, 'd': [1, 2, 3]})
{'a': 1, 'c_a': 2, 'c_b_x': 5, 'd': [1, 2, 3], 'c_b_y': 10}
def flatten(unflattened_dict, separator='_'):
flattened_dict = {}
for k, v in unflattened_dict.items():
if isinstance(v, dict):
sub_flattened_dict = flatten(v, separator)
for k2, v2 in sub_flattened_dict.items():
flattened_dict[k + separator + k2] = v2
else:
flattened_dict[k] = v
return flattened_dict
def flatten_nested_dict(_dict, _str=''):
'''
recursive function to flatten a nested dictionary json
'''
ret_dict = {}
for k, v in _dict.items():
if isinstance(v, dict):
ret_dict.update(flatten_nested_dict(v, _str = '_'.join([_str, k]).strip('_')))
elif isinstance(v, list):
for index, item in enumerate(v):
if isinstance(item, dict):
ret_dict.update(flatten_nested_dict(item, _str= '_'.join([_str, k, str(index)]).strip('_')))
else:
ret_dict['_'.join([_str, k, str(index)]).strip('_')] = item
else:
ret_dict['_'.join([_str, k]).strip('_')] = v
return ret_dict
Variation of this Flatten nested dictionaries, compressing keys with max_level and custom reducer.
def flatten(d, max_level=None, reducer='tuple'):
if reducer == 'tuple':
reducer_seed = tuple()
reducer_func = lambda x, y: (*x, y)
else:
raise ValueError(f'Unknown reducer: {reducer}')
def impl(d, pref, level):
return reduce(
lambda new_d, kv:
(max_level is None or level < max_level)
and isinstance(kv[1], dict)
and {**new_d, **impl(kv[1], reducer_func(pref, kv[0]), level + 1)}
or {**new_d, reducer_func(pref, kv[0]): kv[1]},
d.items(),
{}
)
return impl(d, reducer_seed, 0)
Utilizing recursion, keeping it simple and human readable:
def flatten_dict(dictionary, accumulator=None, parent_key=None, separator="."):
if accumulator is None:
accumulator = {}
for k, v in dictionary.items():
k = f"{parent_key}{separator}{k}" if parent_key else k
if isinstance(v, dict):
flatten_dict(dictionary=v, accumulator=accumulator, parent_key=k)
continue
accumulator[k] = v
return accumulator
Call is simple:
new_dict = flatten_dict(dictionary)
or
new_dict = flatten_dict(dictionary, separator="_")
if we want to change the default separator.
A little breakdown:
When the function is first called, it is called only passing the dictionary
we want to flatten. The accumulator
parameter is here to support recursion, which we see later. So, we instantiate accumulator
to an empty dictionary where we will put all of the nested values from the original dictionary
.
if accumulator is None:
accumulator = {}
As we iterate over the dictionary's values, we construct a key for every value. The parent_key
argument will be None
for the first call, while for every nested dictionary, it will contain the key pointing to it, so we prepend that key.
k = f"{parent_key}{separator}{k}" if parent_key else k
In case the value v
the key k
is pointing to is a dictionary, the function calls itself, passing the nested dictionary, the accumulator
(which is passed by reference, so all changes done to it are done on the same instance) and the key k
so that we can construct the concatenated key. Notice the continue
statement. We want to skip the next line, outside of the if
block, so that the nested dictionary doesn't end up in the accumulator
under key k
.
if isinstance(v, dict):
flatten_dict(dict=v, accumulator=accumulator, parent_key=k)
continue
So, what do we do in case the value v
is not a dictionary? Just put it unchanged inside the accumulator
.
accumulator[k] = v
Once we're done we just return the accumulator
, leaving the original dictionary
argument untouched.
NOTE
This will work only with dictionaries that have strings as keys. It will work with hashable objects implementing the __repr__
method, but will yield unwanted results.
Here is a kind of a "functional", "one-liner" implementation. It is recursive, and based on a conditional expression and a dict comprehension.
def flatten_dict(dd, separator='_', prefix=''):
return { prefix + separator + k if prefix else k : v
for kk, vv in dd.items()
for k, v in flatten_dict(vv, separator, kk).items()
} if isinstance(dd, dict) else { prefix : dd }
Test:
In [2]: flatten_dict({'abc':123, 'hgf':{'gh':432, 'yu':433}, 'gfd':902, 'xzxzxz':{"432":{'0b0b0b':231}, "43234":1321}}, '.')
Out[2]:
{'abc': 123,
'gfd': 902,
'hgf.gh': 432,
'hgf.yu': 433,
'xzxzxz.432.0b0b0b': 231,
'xzxzxz.43234': 1321}