Pandas create new date rows and forward fill column values

前端未结

关注

 2  1095

I have a dataframe like this:

id     date       value
 1  12/01/2016      5 
 1  25/02/2016      7 
 1  10/03/2017      13 
 2  02/04/2016      0 
 2  06/07/


                      
              相关标签:


      
      
        
          2条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  無奈伤痛        
                
              
                            
                2021-01-14 17:43
              
            
            
                                                                       

create an index of dates from the earliest date in each group to today
reindex with these dates and a fill method of ffill




today = pd.to_datetime(pd.datetime.today()).floor('D')

def f(df):
    dates = pd.date_range(df.date.min(), today, name='date')
    d = df.set_index('date').sort_index().reindex(dates, method='ffill')
    return d.reset_index().reindex_axis(df.columns, 1)

df.groupby('id', group_keys=False).apply(f)




     id       date  value
0     1 2016-01-12      5
1     1 2016-01-13      5
2     1 2016-01-14      5
3     1 2016-01-15      5
4     1 2016-01-16      5
5     1 2016-01-17      5
6     1 2016-01-18      5
7     1 2016-01-19      5
8     1 2016-01-20      5
9     1 2016-01-21      5
10    1 2016-01-22      5
11    1 2016-01-23      5
12    1 2016-01-24      5
13    1 2016-01-25      5
14    1 2016-01-26      5
15    1 2016-01-27      5
16    1 2016-01-28      5
17    1 2016-01-29      5
18    1 2016-01-30      5
19    1 2016-01-31      5
20    1 2016-02-01      5
21    1 2016-02-02      5
22    1 2016-02-03      5
23    1 2016-02-04      5
24    1 2016-02-05      5
25    1 2016-02-06      5
26    1 2016-02-07      5
27    1 2016-02-08      5
28    1 2016-02-09      5
29    1 2016-02-10      5
..   ..        ...    ...
354   2 2017-03-22      1
355   2 2017-03-23      1
356   2 2017-03-24      1
357   2 2017-03-25      1
358   2 2017-03-26      1
359   2 2017-03-27      1
360   2 2017-03-28      1
361   2 2017-03-29      1
362   2 2017-03-30      1
363   2 2017-03-31      1
364   2 2017-04-01      1
365   2 2017-04-02      1
366   2 2017-04-03      1
367   2 2017-04-04      1
368   2 2017-04-05      1
369   2 2017-04-06      1
370   2 2017-04-07      1
371   2 2017-04-08      1
372   2 2017-04-09      1
373   2 2017-04-10      1
374   2 2017-04-11      1
375   2 2017-04-12      1
376   2 2017-04-13      1
377   2 2017-04-14      1
378   2 2017-04-15      1
379   2 2017-04-16      1
380   2 2017-04-17      1
381   2 2017-04-18      6
382   2 2017-04-19      6
383   2 2017-04-20      6

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  自闭症患者        
                
              
                            
                2021-01-14 18:03
              
            
            
                                                                       
Consider a groupby and merge approach:

import pandas as pd
from io import StringIO
from datetime import date

txt= """
id     date       value
 1  12/01/2016      5 
 1  25/02/2016      7 
 1  10/03/2017      13 
 2  02/04/2016      0 
 2  06/07/2016      1 
 2  18/04/2017      6 
"""

df = pd.read_table(StringIO(txt), sep="\s+", parse_dates=[1], dayfirst=True)

def expand_dates(ser):
    return pd.DataFrame({'date': pd.date_range(ser['date'].min(), date.today(), freq='D')})

newdf = df.groupby(['id']).apply(expand_dates).reset_index()\
          .merge(df, how='left')[['id', 'date', 'value']].ffill()

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复