How to find first element of a group that fulfill a condition

后端未结

关注

 3  471

structure(list(group = c(17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 
19L, 19L, 20L, 20L, 20L, 21L, 21L, 22L, 23L, 24L, 25L, 25L, 25L, 
26L, 27L, 27L, 27L, 28L), var = c


                      
              相关标签:


      
      
        
          3条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  旧巷少年郎        
                
              
                            
                2021-01-20 12:38
              
            
            
                                                                       
library(dplyr)

df$first = NULL

df %>%
  group_by(group) %>%
  mutate(first = as.numeric(row_number() == min(row_number()[var == 1]))) %>%
  ungroup()

# # A tibble: 26 x 3
#   group   var first
#   <int> <int> <dbl>
# 1    17    74     0
# 2    17    49     0
# 3    17     1     1
# 4    18    74     0
# 5    18     1     1
# 6    18    49     0
# 7    18    61     0
# 8    19    49     0
# 9    19     1     1
# 10   19     5     0
# # ... with 16 more rows


The idea is to flag the minimum row number where var = 1, within each group.

This will return some warnings, because in some groups there are no var = 1 cases.

Another option would be this:

library(dplyr)

df$first = NULL

# create row id
df$id = seq_along(df$group)

df %>%
  filter(var == 1) %>%                         # keep cases where var = 1
  distinct(group, .keep_all = T) %>%           # keep distinct cases based on group
  mutate(first = 1) %>%                        # create first column
  right_join(df, by=c("id","group","var")) %>% # join back original dataset
  mutate(first = coalesce(first, 0)) %>%       # replace NAs with 0
  select(-id)                                  # remove row id

# # A tibble: 26 x 3
#   group   var first
#   <int> <int> <dbl>
# 1    17    74     0
# 2    17    49     0
# 3    17     1     1
# 4    18    74     0
# 5    18     1     1
# 6    18    49     0
# 7    18    61     0
# 8    19    49     0
# 9    19     1     1
#10    19     5     0
# # ... with 16 more rows

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  故里飘歌        
                
              
                            
                2021-01-20 12:49
              
            
            
                                                                       
We can use the expression shown for first:

DF %>% 
  group_by(group) %>% 
  mutate(first = { var == 1 } %>% { . * !duplicated(.) } ) %>%
  ungroup


giving:

# A tibble: 26 x 3
   group   var first
   <int> <int> <int>
 1    17    74     0
 2    17    49     0
 3    17     1     1
 4    18    74     0
 5    18     1     1
 6    18    49     0
 7    18    61     0
 8    19    49     0
 9    19     1     1
10    19     5     0
# ... with 16 more rows

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  慢半拍i        
                
              
                            
                2021-01-20 13:03
              
            
            
                                                                       
For ungrouped data, one solution is

first_equal_to = function(x, value)
    (x == value) & (cumsum(x == value) == 1)


so

tbl %>% group_by(group) %>% mutate(first = first_equal_to(var, 1))


(it seems appropriate to keep this as a logical vector, since that is what the column represents).

Another implementation is

first_equal_to2 = function(x, value) {
    result = logical(length(x))
    result[match(value, x)] = TRUE
    result
}

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复