How to reshape tabular data to one row per group

后端 未结 5 1293
轻奢々
轻奢々 2020-12-21 23:57

I am an R (and coding novice) and I am looking for a way to reconfigure Table A show below into Table B.

Table A:

type   x1  x2  x3  
A      4   6            


        
相关标签:
5条回答
  • 2020-12-22 00:17
    a <- data.frame(type=c("A", "A","A", "B", "B"), x1 = c(4,7,9,1,2),x2=c(6,4,6,3,7),
                   x3 = c(9,1,2,8,9))
    
    library(dplyr)
    tmp <-
    a %>% 
      group_by(type) %>%
      summarise(no_rows = length(type))
    
    for(i in unique(a$type)){
      n <- max(tmp$no_rows) - nrow(a[a$type == i,])
      nn <- nrow(a)
      if(n > 0){
        for(ii in 1:n){
          a[nn+ii,] <- c(i,NA,NA,NA)
        }    
      }
    
    }
    
    a <- a[order(a$type),]
    a$timevar <- seq(1:max(tmp$no_rows) )
    
    b<-reshape(a,timevar="timevar",idvar="type",direction="wide",drop = F)
    b
    
      type x1.1 x2.1 x3.1 x1.2 x2.2 x3.2 x1.3 x2.3 x3.3
    1    A    4    6    9    7    4    1    9    6    2
    4    B    1    3    8    2    7    9 <NA> <NA> <NA>
    
    0 讨论(0)
  • 2020-12-22 00:17

    Take a try and the solution is not such concise, just give you a hint.I think lots of things can be improved.

    But finally we have to introduce NAs in here :(

    zz <- "type   x1  x2  x3  
    A      4   6   9  
    A      7   4   1  
    A      9   6   2   
    B      1   3   8  
    B      2   7   9"
    
    dA <- read.table(text=zz, header=T)
    
    
    tmp<-(sapply(unique(dA$type), FUN=function(x) as.vector(t(dA[dA$type == x, -1]))))
    
    t(sapply(tmp, '[', seq(max(sapply(tmp, length)))))
          [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
    [1,]    4    6    9    7    4    1    9    6    2
    [2,]    1    3    8    2    7    9   NA   NA   NA
    
    0 讨论(0)
  • 2020-12-22 00:22

    Variation on @Hack-R's answer:

    A$num <- with(A, ave(as.character(type), type, FUN=seq_along) )
    tmp <- cbind(A[c(1,5)], stack(A[2:4]))
    tmp$time <- paste(tmp$ind, tmp$num, sep=".")
    
    reshape(tmp[c("type","time","values")], idvar="type", timevar="time", direction="wide")
    
    #  type values.x1.1 values.x1.2 values.x1.3 values.x2.1 values.x2.2 values.x2.3 values.x3.1 values.x3.2 values.x3.3
    #1    A           4           7           9           6           4           6           9           1           2
    #4    B           1           2          NA           3           7          NA           8           9          NA
    

    And a dplyr version for fun:

    library(dplyr)
    library(tidyr)
    
    A %>%
      gather(var, value, -type) %>%
      group_by(type,var) %>%
      mutate(time=seq_along(var)) %>% 
      ungroup() %>%
      unite(grpvar, c(time,var) ) %>%
      spread(grpvar, value )
    
    #Source: local data frame [2 x 10]
    #
    #   type  1_x1  1_x2  1_x3  2_x1  2_x2  2_x3  3_x1  3_x2  3_x3
    #  (chr) (int) (int) (int) (int) (int) (int) (int) (int) (int)
    #1     A     4     6     9     7     4     1     9     6     2
    #2     B     1     3     8     2     7     9    NA    NA    NA
    
    0 讨论(0)
  • 2020-12-22 00:27

    A bit late to the party, but this can also be done quite easily with the dcast function of the data.table package because you can use multiple value.var's in it:

    library(data.table)
    
    dcast(setDT(d), type ~ rowid(type), value.var = c('x1','x2','x3'), sep = '')
    

    which gives:

       type x11 x12 x13 x21 x22 x23 x31 x32 x33
    1:    A   4   7   9   6   4   6   9   1   2
    2:    B   1   2  NA   3   7  NA   8   9  NA
    

    You can also do this in base R with:

    d$num <- ave(d$x1, d$type, FUN = seq_along)
    reshape(d, idvar = 'type', direction = 'wide', timevar = 'num', sep = '')
    
    0 讨论(0)
  • 2020-12-22 00:38

    To me, this solution seems pretty straightforward

    # split the data frame by type and use unlist, which will provide names
    ld <- lapply(split(d[-1], d[["type"]]), unlist)
    
    # gather all the unique names in the list
    ldNames <- Reduce(unique, lapply(ld, names))
    
    # use the names to index each list element, which makes them
    # all of equal length and suitable for row binding.  
    do.call(rbind, lapply(ld, function(x) x[ldNames]))
    #   x11 x12 x13 x21 x22 x23 x31 x32 x33
    # A   4   7   9   6   4   6   9   1   2
    # B   1   2  NA   3   7  NA   8   9  NA
    

    If the order of the output above is not satisfactory, you can rearrange:

    # save the output from above
    d2 <- do.call(rbind, lapply(ld, function(x) x[ldNames]))
    # reorder the names
    ldNames_sorted <- c(matrix(ldNames, ncol = (ncol(d) - 1), byrow = TRUE))
    
    # apply the new order.
    d2 <- d2[, ldNames_sorted]
    #  x11 x21 x31 x12 x22 x32 x13 x23 x33
    #A   4   6   9   7   4   1   9   6   2
    #B   1   3   8   2   7   9  NA  NA  NA
    

    To add a column for type instead of using row names, one method would be:

    data.frame(type = row.names(d2), d2)
    
    0 讨论(0)
提交回复
热议问题