Creating my dataframe:
from pandas import *
arrays = [[\'bar\', \'bar\', \'baz\', \'baz\', \'foo\', \'foo\', \'qux\', \'qux\'],
[\'one\', \'two\', \'o
Use the set_levels
method (new in version 0.13.0):
data.index.set_levels([[u'cat', u'dog', u'foo', u'qux'],
[u'one', u'two']], inplace=True)
yields
c1 c2
first second
cat one -0.289649 -0.870716
two -0.062014 -0.410274
dog one 0.030171 -1.091150
two 0.505408 1.531108
foo one 1.375653 -1.377876
two -1.478615 1.351428
qux one 1.075802 0.532416
two 0.865931 -0.765292
To remap a level based on a dict, you could use a function such as this:
def map_level(df, dct, level=0):
index = df.index
index.set_levels([[dct.get(item, item) for item in names] if i==level else names
for i, names in enumerate(index.levels)], inplace=True)
dct = {'bar':'cat', 'baz':'dog'}
map_level(data, dct, level=0)
Here's a runnable example:
import numpy as np
import pandas as pd
arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
tuples = zip(*arrays)
index = pd.MultiIndex.from_tuples(tuples, names=['first','second'])
data = pd.DataFrame(np.random.randn(8,2),index=index,columns=['c1','c2'])
data2 = data.copy()
data.index.set_levels([[u'cat', u'dog', u'foo', u'qux'],
[u'one', u'two']], inplace=True)
print(data)
# c1 c2
# first second
# cat one 0.939040 -0.748100
# two -0.497006 -1.185966
# dog one -0.368161 0.050339
# two -2.356879 -0.291206
# foo one -0.556261 0.474297
# two 0.647973 0.755983
# qux one -0.017722 1.364244
# two 1.007303 0.004337
def map_level(df, dct, level=0):
index = df.index
index.set_levels([[dct.get(item, item) for item in names] if i==level else names
for i, names in enumerate(index.levels)], inplace=True)
dct = {'bar':'wolf', 'baz':'rabbit'}
map_level(data2, dct, level=0)
print(data2)
# c1 c2
# first second
# wolf one 0.939040 -0.748100
# two -0.497006 -1.185966
# rabbit one -0.368161 0.050339
# two -2.356879 -0.291206
# foo one -0.556261 0.474297
# two 0.647973 0.755983
# qux one -0.017722 1.364244
# two 1.007303 0.004337
The set_levels
method was causing my new column names to be out of order. So I found a different solution that isn't very clean, but works well. The method is to print df.index
(or equivalently df.columns
) and then copy and paste the output with the desired values changed. For example:
print data.index
MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']], labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]], names=['first', 'second'])
data.index = MultiIndex(levels=[['new_bar', 'new_baz', 'new_foo', 'new_qux'],
['new_one', 'new_two']],
labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
names=['first', 'second'])
We can have full control over names by editing the labels as well. For example:
data.index = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'],
['one', 'twooo', 'three', 'four',
'five', 'siz', 'seven', 'eit']],
labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 3, 4, 5, 6, 7]],
names=['first', 'second'])
Note that in this example we have already done something like from pandas import MultiIndex
or from pandas import *
.