Split columns in dataframe with NA

前端 未结 5 2524
心在旅途
心在旅途 2021-02-20 05:16

I have a df like this:

df <- data.frame(FOO = c(\'A|B|C\', \'A|B\', \'B|C\', \'A\', \'C\'))

> df
    FOO
1 A|B|C
2   A|B
3   B|C
4     A
5            


        
相关标签:
5条回答
  • 2021-02-20 05:21

    Use unique and strsplit to find all unique values (A, B and C in this case). Use grep to search for the unique values, and return the values when there's a match or character(0) otherwise. cbind the resulting characters. Use apply and ifelse to replace character(0) with NA.

    vals <- unique(unlist(sapply(a1, function(x) strsplit(x, '|', fixed = T))))
    
    out <- NULL
    for(i in vals){
      out <- cbind(out, as.character((lapply(df$FOO, function(x) grep(x, i, value=T)))))
    }
    
    apply(out, 2, function(x) ifelse(x=="character(0)", NA, x))
    
         [,1] [,2] [,3]
    [1,] "A"  "B"  "C" 
    [2,] "A"  "B"  NA  
    [3,] NA   "B"  "C" 
    [4,] "A"  NA   NA  
    [5,] NA   NA   "C" 
    
    0 讨论(0)
  • 2021-02-20 05:23

    Simply do:

    splt <- strsplit(as.character(df$FOO),"\\|")
    all_val <- sort(unique(unlist(splt)))
    t(sapply(splt,function(x){all_val[!(all_val %in% x)]<-NA;all_val}))
    
    
    #     [,1] [,2] [,3]
    #[1,] "A"  "B"  "C" 
    #[2,] "A"  "B"  NA  
    #[3,] NA   "B"  "C" 
    #[4,] "A"  NA   NA  
    #[5,] NA   NA   "C" 
    

    data:

    df <- data.frame(FOO = c('A|B|C', 'A|B', 'B|C', 'A', 'C'))
    

    Please note:

    My version is base:: (no libraries needed) and general:

    It would also work with:

    df <- data.frame(FOO = c('A|B|C', 'A|B', 'B|C', 'A', 'C', 'B|D|F'))
    
    0 讨论(0)
  • 2021-02-20 05:23

    You can try a tidyverse as well

    library(tidyverse)
    df %>%
      rownames_to_column() %>% 
      separate_rows(FOO, sep="[|]") %>% 
      mutate(L=factor(FOO, labels = paste0("X",1:length(unique(FOO))))) %>% 
      spread(L, FOO) %>% 
      select(-1)
        X1   X2   X3
    1    A    B    C
    2    A    B <NA>
    3 <NA>    B    C
    4    A <NA> <NA>
    5 <NA> <NA>    C
    

    It is also generally working e.g. df <- data.frame(FOO = c('A|B|C', 'A|B', 'B|C', 'A', 'C', 'B|D|F')). In addition you can set the levels e.g. B>C>A by yourself using levels = c("B", "C", "A") in the factor function in the mutate step.

    0 讨论(0)
  • 2021-02-20 05:29

    Overlooked that OP asked for a base R solution. Please try @AndreElrico's, @r.user.05apr's or @milan's solutions.


    This can be done with cSplit_e from the splitstackshape package:

    library(splitstackshape)
    cSplit_e(
      data = df,
      split.col = "FOO",
      sep = "|",
      mode = "value",
      type = "character",
      fill = " ",
      drop = TRUE
    )
    #  FOO_A FOO_B FOO_C
    #1     A     B     C
    #2     A     B      
    #3           B     C
    #4     A            
    #5                 C
    

    Does also work in case of the following df (see OP's comment above).

    (df1 <- data.frame(FOO = c('A|B|C', 'A|B', 'B|C', 'A', 'C', 'B|D|F')))
    #    FOO
    #1 A|B|C
    #2   A|B
    #3   B|C
    #4     A
    #5     C
    #6 B|D|F
    
    cSplit_e(df1, "FOO", "|", "value",  "character", TRUE, fill = " ")
    #  FOO_A FOO_B FOO_C FOO_D FOO_F
    #1     A     B     C            
    #2     A     B                  
    #3           B     C            
    #4     A                        
    #5                 C            
    #6           B           D     F
    
    0 讨论(0)
  • 2021-02-20 05:43

    In base R:

    df <- data.frame(FOO = c('A|B|C', 'A|B', 'B|C', 'A', 'C'))
    
    dummy <- strsplit(as.character(df$FOO), "[|]")
    want <- data.frame(values = unlist(dummy),
                       ids = rep(1:length(dummy), unlist(lapply(dummy, length))), 
                       stringsAsFactors = FALSE)
    
    library(reshape2)
    want <- dcast(want, ids ~ values, value.var = "values", fill = " ")[, -1] # first col removed
    names(want) <- paste0("X", seq_along(unique(unlist(dummy)))) 
    want
    # X1 X2 X3
    #1  A  B  C
    #2  A  B   
    #3     B  C
    #4  A      
    #5        C
    
    0 讨论(0)
提交回复
热议问题