R: Create multiple new columns based upon other columns

后端未结

关注

 2  353

Let\'s say I have a data-frame that looks like this

dd <- read.table(header = TRUE, text = \"ID week1_t week1_a  week2_t week2_a
  1      12      22       17


                      
              相关标签:


      
      
        
          2条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  耶瑟儿～        
                
              
                            
                2021-01-22 11:25
              
            
            
                                                                       
From a "tidy data" perspective, your problem is that you're encoding (multiple!) pieces of data in your column names: the week number and whatever the letter stands for. I would convert to a long format where week is a column, define d = a - t, and (if necessary) convert back to wide format. But probably I'd keep it in the long format because if there are any other operations you want to do they'll probably be easier to implement on the long data (more manipulation, modeling, plotting...).

library(tidyr)
library(dplyr)

long = dd %>% 
    mutate(real_id = 1:n()) %>%
    gather(key = key, value = value, starts_with("week")) %>%
    separate(key, into = c("week", "letter")) %>% 
    spread(key = letter, value = value) %>%
    mutate(d = a - t)

head(long)
#   ID real_id  week  a  t   d
# 1  1       1 week1 22 12  10
# 2  1       1 week2  4 17 -13
# 3  1       2 week1 32 15  17
# 4  1       2 week2  5 18 -13
# 5  1       3 week1 12 24 -12
# 6  1       3 week2  6 29 -23

wide = gather(long, key = letter, value = value, a, t, d) %>%
    mutate(key = paste(week, letter, sep = "_")) %>%
    select(-week, -letter) %>%
    spread(key = key, value = value)

wide
#   ID real_id week1_a week1_d week1_t week2_a week2_d week2_t
# 1  1       1      22      10      12       4     -13      17
# 2  1       2      32      17      15       5     -13      18
# 3  1       3      12     -12      24       6     -23      29
# 4  2       4      11     -34      45       8     -11      19
# 5  2       5      33      10      23      10     -10      20

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  再見小時候        
                
              
                            
                2021-01-22 11:25
              
            
            
                                                                       
We split the 'week' columns (dd[-1]) by the names of the dataset after removing the suffix with sub into a list, get the difference between the two columns and assign the list elements to create new columns in 'dd'.

lst <-  lapply(split.default(dd[-1], 
           sub("_.*", "", names(dd)[-1])), function(x) x[2]-x[1])
dd[paste0("week_", seq_along(lst), "d")] <- lapply(lst, unlist, use.names=FALSE)
dd
#    ID week1_t week1_a week2_t week2_a week1_d week2_d
#1  1      12      22      17       4      10     -13
#2  1      15      32      18       5      17     -13
#3  1      24      12      29       6     -12     -23
#4  2      45      11      19       8     -34     -11
#5  2      23      33      20      10      10     -10




If the columns are alternating i.e. 'week1_t' followed by 'week1_a', then 'week2_t', followed by 'week2_a', etc.

Un1 <- unique(sub("_.*", "", names(dd)[-1]))
i1 <-  c(TRUE, FALSE)
dd[paste0(Un1, "_d")] <-  dd[-1][!i1]- dd[-1][i1]
dd
#  ID week1_t week1_a week2_t week2_a week1_d week2_d
#1  1      12      22      17       4      10     -13
#2  1      15      32      18       5      17     -13
#3  1      24      12      29       6     -12     -23
#4  2      45      11      19       8     -34     -11
#5  2      23      33      20      10      10     -10

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复