Reformat pandas DataFrame

前端 未结 1 1503
我在风中等你
我在风中等你 2021-01-17 03:07

I have a pandas.DataFrame with the following data:

 country    branch      Name           salary    mobile no     emailid
    x             


        
相关标签:
1条回答
  • You can replace Null to NaN and then groupby with agg and last reset_index:

    print data_df
      country branch Name  salary   mobile no    emailid position
    0       x      a   aa  250000        Null       Null  unknown
    1       x      b   bb  350000  8976646410  xx@xx.com  unknown
    2       y      c   cc  450000  8777945411  yy@yy.com  unknown
    3       y      d   dd  589630        Null       Null  unknown
    
    data_df = data_df.replace('Null', np.nan)
    print data_df
      country branch Name  salary   mobile no    emailid position
    0       x      a   aa  250000         NaN        NaN  unknown
    1       x      b   bb  350000  8976646410  xx@xx.com  unknown
    2       y      c   cc  450000  8777945411  yy@yy.com  unknown
    3       y      d   dd  589630         NaN        NaN  unknown
    
    df = data_df.groupby(['country', 'branch']).agg({'Name': 'count',
                                                     'mobile no':'count', 
                                                     'emailid': 'count',
                                                     'position': 'count'})
    
    print df.reset_index()
      country branch  emailid  position  Name  mobile no
    0       x      a        0         1     1          0
    1       x      b        1         1     1          1
    2       y      c        1         1     1          1
    3       y      d        0         1     1          0
    

    EDIT:

    If you need count positions by category, create columns for each category, then groupby with count, drop column salary and last reset_index:

    print data_df
      country branch Name  salary   mobile no    emailid
    0       x      a   aa  250000        Null       Null
    1       x      a   aa   20000        Null       Null
    2       x      b   bb  350000  8976646410  xx@xx.com
    3       y      c   cc   45000  8777945411  yy@yy.com
    4       y      d   dd  589630        Null       Null
    
    normal = data_df['salary'] <= 20000
    experienced = (data_df['salary'] > 20000) & (data_df['salary'] <= 50000)
    unknown = data_df['salary'] > 50000
    
    data_df.loc[normal, 'position_normal'] = 'normal employee'
    data_df.loc[experienced,'position_experienced'] = 'experienced employee'
    data_df.loc[unknown,'position_unknown'] = 'unknown employee'
    print data_df
      country branch Name  salary   mobile no    emailid  position_normal  \
    0       x      a   aa  250000        Null       Null              NaN   
    1       x      a   aa   20000        Null       Null  normal employee   
    2       x      b   bb  350000  8976646410  xx@xx.com              NaN   
    3       y      c   cc   45000  8777945411  yy@yy.com              NaN   
    4       y      d   dd  589630        Null       Null              NaN   
    
       position_experienced  position_unknown  
    0                   NaN  unknown employee  
    1                   NaN               NaN  
    2                   NaN  unknown employee  
    3  experienced employee               NaN  
    4                   NaN  unknown employee 
    
    #replace Null to NaN
    data_df = data_df.replace('Null', np.nan)
    df = data_df.groupby(['country', 'branch']).count()
    #remove column salary
    df = df.drop('salary', axis=1)
    
    df = df.reset_index()
    print df
      country branch  Name  mobile no  emailid  position_normal  \
    0       x      a     2          0        0                1   
    1       x      b     1          1        1                0   
    2       y      c     1          1        1                0   
    3       y      d     1          0        0                0   
    
       position_experienced  position_unknown  
    0                     0                 1  
    1                     0                 1  
    2                     1                 0  
    3                     0                 1  
    
    0 讨论(0)
提交回复
热议问题