Renaming index values in multiindex dataframe

前端 未结 2 1642
时光取名叫无心
时光取名叫无心 2021-02-02 18:21

Creating my dataframe:

from pandas import *
arrays = [[\'bar\', \'bar\', \'baz\', \'baz\', \'foo\', \'foo\', \'qux\', \'qux\'],
          [\'one\', \'two\', \'o         


        
2条回答
  •  梦如初夏
    2021-02-02 18:35

    Use the set_levels method (new in version 0.13.0):

    data.index.set_levels([[u'cat', u'dog', u'foo', u'qux'], 
                           [u'one', u'two']], inplace=True)
    

    yields

                        c1        c2
    first second                    
    cat   one    -0.289649 -0.870716
          two    -0.062014 -0.410274
    dog   one     0.030171 -1.091150
          two     0.505408  1.531108
    foo   one     1.375653 -1.377876
          two    -1.478615  1.351428
    qux   one     1.075802  0.532416
          two     0.865931 -0.765292
    

    To remap a level based on a dict, you could use a function such as this:

    def map_level(df, dct, level=0):
        index = df.index
        index.set_levels([[dct.get(item, item) for item in names] if i==level else names
                          for i, names in enumerate(index.levels)], inplace=True)
    
    dct = {'bar':'cat', 'baz':'dog'}
    map_level(data, dct, level=0)
    

    Here's a runnable example:

    import numpy as np
    import pandas as pd
    
    arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
              ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
    tuples = zip(*arrays)
    index = pd.MultiIndex.from_tuples(tuples, names=['first','second'])
    data = pd.DataFrame(np.random.randn(8,2),index=index,columns=['c1','c2'])
    data2 = data.copy()
    
    data.index.set_levels([[u'cat', u'dog', u'foo', u'qux'], 
                           [u'one', u'two']], inplace=True)
    print(data)
    #                     c1        c2
    # first second                    
    # cat   one     0.939040 -0.748100
    #       two    -0.497006 -1.185966
    # dog   one    -0.368161  0.050339
    #       two    -2.356879 -0.291206
    # foo   one    -0.556261  0.474297
    #       two     0.647973  0.755983
    # qux   one    -0.017722  1.364244
    #       two     1.007303  0.004337
    
    def map_level(df, dct, level=0):
        index = df.index
        index.set_levels([[dct.get(item, item) for item in names] if i==level else names
                          for i, names in enumerate(index.levels)], inplace=True)
    dct = {'bar':'wolf', 'baz':'rabbit'}
    map_level(data2, dct, level=0)
    print(data2)
    #                      c1        c2
    # first  second                    
    # wolf   one     0.939040 -0.748100
    #        two    -0.497006 -1.185966
    # rabbit one    -0.368161  0.050339
    #        two    -2.356879 -0.291206
    # foo    one    -0.556261  0.474297
    #        two     0.647973  0.755983
    # qux    one    -0.017722  1.364244
    #        two     1.007303  0.004337
    

提交回复
热议问题