How to find the longest duplicate sequence in a tibble column?

前端未结

关注

 2  2005

I updated my question because I need one more column to my output tible.

I have the following tibble:

library(tibble)

my_tbl <- tribble(
  ~year, ~ev


                      
              相关标签:


      
      
        
          2条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  故里飘歌        
                
              
                            
                2021-01-22 14:55
              
            
            
                                                                       
One option to get the longest adjacent similar element after grouping by 'event_id' is with rle.  It returns a list of lengths and corresponding values.  By creating an logical. expression with max 'lengths', subset the 'values' as well as get the lengths

library(dplyr)
library(purrr)
my_tbl %>% 
   group_by(event_id) %>%
   summarise(rl = list(rle(winner_id)),
            most_wins_in_a_row =  map_int(rl, ~ max(.x$lengths)),
            number_of_winners = map2_int(rl, most_wins_in_a_row, 
                 ~ sum(.x$lengths == .y)), 
           winners = map2_chr(rl, most_wins_in_a_row, 
               ~ toString(.x$values[.x$lengths == .y]))) %>%
   select(-rl)
# A tibble: 4 x 4
#  event_id most_wins_in_a_row number_of_winners winners                    
#  <chr>                 <int>             <int> <chr>                      
#1 A                         3                 1 4322                       
#2 B                         2                 1 7893                       
#3 C                         2                 2 5556, 2391                 
#4 D                         1                 5 4219, 7623, 8003, 2851, 418

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  一生所求        
                
              
                            
                2021-01-22 15:08
              
            
            
                                                                       
One dplyr option could be:

my_tbl %>%
 add_count(event_id, rleid = cumsum(winner_id != lag(winner_id, default = first(winner_id)))) %>%
 group_by(event_id) %>%
 summarise(most_wins_in_a_row = max(n),
           number_of_winners = n_distinct(winner_id[n == max(n)]),
           winners = paste0(unique(winner_id[n == max(n)]), collapse = ","))

  event_id most_wins_in_a_row number_of_winners winners                
  <chr>                 <int>             <int> <chr>                  
1 A                         3                 1 4322                   
2 B                         2                 1 7893                   
3 C                         2                 2 5556,2391              
4 D                         1                 5 4219,7623,8003,2851,418

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复