Replace value with a condition from 2 columns using pandas

前端未结

关注

 1  502

I have a pandas data-frame like as shown below

df1_new = pd.DataFrame({\'person_id\': [1, 2, 3, 4, 5],
                        \'start_date\': [\'07/23/2377\', \


                      
              相关标签:


      
      
        
          1条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  北荒        
                
              
                            
                2021-01-26 10:56
              
            
            
                                                                       
Here is an example. See comments I think you'll understand the basic approach.

from copy import deepcopy
from datetime import datetime
import pandas as pd
from dateutil.relativedelta import relativedelta


df = pd.DataFrame.from_dict({
    'person_id': [1, 2, 3, 4, 5],
    'start_date': ['07/23/2377', '05/29/2477', '02/03/2177', '7/27/2277', '7/13/2077'],
    'start_datetime': ['07/23/2377  12:00:00', '05/29/2477  04:00:00', '02/03/2177  02:00:00', '7/27/2277  05:00:00', '7/13/2077  12:00:00'],
    'end_date': ['07/25/2377', '06/09/2477', '02/05/2177', '01/01/2000', '01/01/2000'],
    'end_datetime': ['07/25/2377 02:00:00', '06/09/2477 04:00:00', '02/05/2177 01:00:00', '01/01/2000 00:00:00', '01/01/2000 00:00:00'],
    'type': ['IP', 'IP', 'OP', 'OP', 'IP']
})


def calculate_days(x):
    # datetime object from string
    x['end_date'] = datetime.strptime(x['end_date'], '%m/%d/%Y')
    x['start_date'] = datetime.strptime(x['start_date'], '%m/%d/%Y')
    x['end_datetime'] = datetime.strptime(x['end_datetime'], '%m/%d/%Y %H:%M:%S')
    x['start_datetime'] = datetime.strptime(x['start_datetime'], '%m/%d/%Y %H:%M:%S')
    # you need only 2000 year...
    if not (x['end_date'].year == 2000 or x['end_datetime'] == 2000):
        return x

    # type conditions and calculations...
    if x['type'] == 'IP':
        x['end_date'] = x['start_date'] + relativedelta(days=2)
        x['end_datetime'] = x['start_datetime'] + relativedelta(days=2)
    elif x['type'] == 'OP':
        x['end_date'] = deepcopy(x['start_date'])
        x['end_datetime'] = deepcopy(x['start_datetime'])
    return x


# apply our custom function
df = df.apply(calculate_days, axis=1)
print(df.head())
#   person_id           start_date  ...         end_datetime type
# 0          1  2377-07-23 00:00:00  ...  2377-07-25 02:00:00   IP
# 1          2  2477-05-29 00:00:00  ...  2477-06-09 04:00:00   IP
# 2          3  2177-02-03 00:00:00  ...  2177-02-05 01:00:00   OP
# 3          4  2277-07-27 00:00:00  ...  2277-07-27 05:00:00   OP
# 4          5  2077-07-13 00:00:00  ...  2077-07-15 12:00:00   IP
# [5 rows x 6 columns]


Hope this helps.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复