Tidy method to split multiple columns using tidyr::separate

后端未结

关注

 5  1009

I have a data frame like so:

df <- structure(list(A = c(\"3 of 5\", \"1 of 2\", \"1 of 3\", \"1 of 3\", 
\"3 of 4\", \"2 of 7\"), B = c(\"2 of 2\", \"2 of


                      
              相关标签:


      
      
        
          5条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  傲寒        
                
              
                            
                2021-01-14 19:42
              
            
            
                                                                       
Could try:

library(tidyverse)

names(df) %>%
  map(
    function(x) 
      df %>% 
      select(x) %>% 
      separate(x, 
               into = paste0(x, c("_attempted", "_landed")), 
               sep = " of ")
    ) %>%
  bind_cols()


Output:

# A tibble: 6 x 10
  A_attempted A_landed B_attempted B_landed C_attempted C_landed D_attempted D_landed E_attempted E_landed
  <chr>       <chr>    <chr>       <chr>    <chr>       <chr>    <chr>       <chr>    <chr>       <chr>   
1 3           5        2           2        10          21       0           0        8           16      
2 1           2        2           4        3           14       0           0        3           15      
3 1           3        0           1        11          34       0           0        10          32      
4 1           3        0           0        10          35       0           0        6           28      
5 3           4        0           0        16          53       0           0        13          49      
6 2           7        0           0        17          62       0           0        9           48      


As OP suggests we can indeed avoid the last step with map_dfc:

names(df) %>% 
  map_dfc(~ df %>% 
             select(.x) %>% 
             separate(.x, 
                      into = paste0(.x, c("_attempted", "_landed")), 
                      sep = " of ")
           )

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  伪装坚强ぢ        
                
              
                            
                2021-01-14 19:48
              
            
            
                                                                       
Just another tidyverse way: 

purrr::map_dfc(names(df), function(i) {

 df %>% separate(i,
              sep = "of",
              remove = T,
              into = c(paste0(i, "_attempted"), paste0(i, "_landed")))

 }) %>% dplyr::select(., contains("_"))

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  终归单人心        
                
              
                            
                2021-01-14 19:49
              
            
            
                                                                       
We can use cSplit

library(splitstackshape)

df1 <- cSplit(df, names(df), sep = "of", stripWhite = FALSE)
df1

#   A_1 A_2 B_1 B_2 C_1 C_2 D_1 D_2 E_1 E_2
#1:   3   5   2   2  10  21   0   0   8  16
#2:   1   2   2   4   3  14   0   0   3  15
#3:   1   3   0   1  11  34   0   0  10  32
#4:   1   3   0   0  10  35   0   0   6  28
#5:   3   4   0   0  16  53   0   0  13  49
#6:   2   7   0   0  17  62   0   0   9  48


We can rename it by

names(df1) <- c(outer(names(df), c("attempted", "landed"), paste, sep = "_"))




And we can always do things in base R

do.call(cbind.data.frame, 
     lapply(df, function(x) do.call(rbind, strsplit(x, " of "))))


#  A.1 A.2 B.1 B.2 C.1 C.2 D.1 D.2 E.1 E.2
#1   3   5   2   2  10  21   0   0   8  16
#2   1   2   2   4   3  14   0   0   3  15
#3   1   3   0   1  11  34   0   0  10  32
#4   1   3   0   0  10  35   0   0   6  28
#5   3   4   0   0  16  53   0   0  13  49
#6   2   7   0   0  17  62   0   0   9  48


We can rename the columns in similar fashion as shown above. 
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  悲哀的现实        
                
              
                            
                2021-01-14 19:50
              
            
            
                                                                       
Yet another tidyverse possibility

imap_dfc(df, ~ separate(tibble(.x), col = 1, 
                        paste0(.y, c("_attempted", "_landed")), 
                        sep = " of ", convert = TRUE))

# # A tibble: 6 x 10
#   A_attempted A_landed B_attempted B_landed C_attempted C_landed D_attempted D_landed E_attempted E_landed
#         <int>    <int>       <int>    <int>       <int>    <int>       <int>    <int>       <int>    <int>
# 1           3        5           2        2          10       21           0        0           8       16
# 2           1        2           2        4           3       14           0        0           3       15
# 3           1        3           0        1          11       34           0        0          10       32
# 4           1        3           0        0          10       35           0        0           6       28
# 5           3        4           0        0          16       53           0        0          13       49
# 6           2        7           0        0          17       62           0        0           9       48

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  伪装坚强ぢ        
                
              
                            
                2021-01-14 19:51
              
            
            
                                                                       
One approach:

library(tidyverse)

df %>%
  rownames_to_column("id") %>%
  gather(group, value, -id) %>% 
  separate(value, into = c("attempted", "landed"), sep = " of ") %>%
  gather(key, value, -id, -group) %>%
  unite(new, group, key, sep = "_" ) %>%
  spread(new, value)

# A tibble: 6 x 11
  id    A_attempted A_landed B_attempted B_landed C_attempted C_landed D_attempted D_landed E_attempted E_landed
  <chr> <chr>       <chr>    <chr>       <chr>    <chr>       <chr>    <chr>       <chr>    <chr>       <chr>   
1 1     3           5        2           2        10          21       0           0        8           16      
2 2     1           2        2           4        3           14       0           0        3           15      
3 3     1           3        0           1        11          34       0           0        10          32      
4 4     1           3        0           0        10          35       0           0        6           28      
5 5     3           4        0           0        16          53       0           0        13          49      
6 6     2           7        0           0        17          62       0           0        9           48  

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复