Tidy method to split multiple columns using tidyr::separate

后端 未结 5 998
别跟我提以往
别跟我提以往 2021-01-14 19:08

I have a data frame like so:

df <- structure(list(A = c(\"3 of 5\", \"1 of 2\", \"1 of 3\", \"1 of 3\", 
\"3 of 4\", \"2 of 7\"), B = c(\"2 of 2\", \"2 of         


        
相关标签:
5条回答
  • 2021-01-14 19:42

    Could try:

    library(tidyverse)
    
    names(df) %>%
      map(
        function(x) 
          df %>% 
          select(x) %>% 
          separate(x, 
                   into = paste0(x, c("_attempted", "_landed")), 
                   sep = " of ")
        ) %>%
      bind_cols()
    

    Output:

    # A tibble: 6 x 10
      A_attempted A_landed B_attempted B_landed C_attempted C_landed D_attempted D_landed E_attempted E_landed
      <chr>       <chr>    <chr>       <chr>    <chr>       <chr>    <chr>       <chr>    <chr>       <chr>   
    1 3           5        2           2        10          21       0           0        8           16      
    2 1           2        2           4        3           14       0           0        3           15      
    3 1           3        0           1        11          34       0           0        10          32      
    4 1           3        0           0        10          35       0           0        6           28      
    5 3           4        0           0        16          53       0           0        13          49      
    6 2           7        0           0        17          62       0           0        9           48      
    

    As OP suggests we can indeed avoid the last step with map_dfc:

    names(df) %>% 
      map_dfc(~ df %>% 
                 select(.x) %>% 
                 separate(.x, 
                          into = paste0(.x, c("_attempted", "_landed")), 
                          sep = " of ")
               )
    
    0 讨论(0)
  • 2021-01-14 19:48

    Just another tidyverse way:

    purrr::map_dfc(names(df), function(i) {
    
     df %>% separate(i,
                  sep = "of",
                  remove = T,
                  into = c(paste0(i, "_attempted"), paste0(i, "_landed")))
    
     }) %>% dplyr::select(., contains("_"))
    
    0 讨论(0)
  • 2021-01-14 19:49

    We can use cSplit

    library(splitstackshape)
    
    df1 <- cSplit(df, names(df), sep = "of", stripWhite = FALSE)
    df1
    
    #   A_1 A_2 B_1 B_2 C_1 C_2 D_1 D_2 E_1 E_2
    #1:   3   5   2   2  10  21   0   0   8  16
    #2:   1   2   2   4   3  14   0   0   3  15
    #3:   1   3   0   1  11  34   0   0  10  32
    #4:   1   3   0   0  10  35   0   0   6  28
    #5:   3   4   0   0  16  53   0   0  13  49
    #6:   2   7   0   0  17  62   0   0   9  48
    

    We can rename it by

    names(df1) <- c(outer(names(df), c("attempted", "landed"), paste, sep = "_"))
    

    And we can always do things in base R

    do.call(cbind.data.frame, 
         lapply(df, function(x) do.call(rbind, strsplit(x, " of "))))
    
    
    #  A.1 A.2 B.1 B.2 C.1 C.2 D.1 D.2 E.1 E.2
    #1   3   5   2   2  10  21   0   0   8  16
    #2   1   2   2   4   3  14   0   0   3  15
    #3   1   3   0   1  11  34   0   0  10  32
    #4   1   3   0   0  10  35   0   0   6  28
    #5   3   4   0   0  16  53   0   0  13  49
    #6   2   7   0   0  17  62   0   0   9  48
    

    We can rename the columns in similar fashion as shown above.

    0 讨论(0)
  • 2021-01-14 19:50

    Yet another tidyverse possibility

    imap_dfc(df, ~ separate(tibble(.x), col = 1, 
                            paste0(.y, c("_attempted", "_landed")), 
                            sep = " of ", convert = TRUE))
    
    # # A tibble: 6 x 10
    #   A_attempted A_landed B_attempted B_landed C_attempted C_landed D_attempted D_landed E_attempted E_landed
    #         <int>    <int>       <int>    <int>       <int>    <int>       <int>    <int>       <int>    <int>
    # 1           3        5           2        2          10       21           0        0           8       16
    # 2           1        2           2        4           3       14           0        0           3       15
    # 3           1        3           0        1          11       34           0        0          10       32
    # 4           1        3           0        0          10       35           0        0           6       28
    # 5           3        4           0        0          16       53           0        0          13       49
    # 6           2        7           0        0          17       62           0        0           9       48
    
    0 讨论(0)
  • 2021-01-14 19:51

    One approach:

    library(tidyverse)
    
    df %>%
      rownames_to_column("id") %>%
      gather(group, value, -id) %>% 
      separate(value, into = c("attempted", "landed"), sep = " of ") %>%
      gather(key, value, -id, -group) %>%
      unite(new, group, key, sep = "_" ) %>%
      spread(new, value)
    
    # A tibble: 6 x 11
      id    A_attempted A_landed B_attempted B_landed C_attempted C_landed D_attempted D_landed E_attempted E_landed
      <chr> <chr>       <chr>    <chr>       <chr>    <chr>       <chr>    <chr>       <chr>    <chr>       <chr>   
    1 1     3           5        2           2        10          21       0           0        8           16      
    2 2     1           2        2           4        3           14       0           0        3           15      
    3 3     1           3        0           1        11          34       0           0        10          32      
    4 4     1           3        0           0        10          35       0           0        6           28      
    5 5     3           4        0           0        16          53       0           0        13          49      
    6 6     2           7        0           0        17          62       0           0        9           48  
    
    0 讨论(0)
提交回复
热议问题