I\'m looking to transform a data frame of 660 columns into 3 columns just by stacking them on each other per 3 columns without manually re-arranging (since I have 660 column
rbind.data.frame
requires that all columns match up. So use setNames to replace the names of the C:D columns:
rbind( dat[1:2], setNames(dat[3:4], names(dat[1:2])) )
A B
1 1 4
2 2 5
3 3 6
4 7 10
5 8 11
6 9 12
To generalize that to multiple columns use do.call and lapply:
dat <- setNames( as.data.frame( matrix(1:36, ncol=12) ), LETTERS[1:12])
dat
#----
A B C D E F G H I J K L
1 1 4 7 10 13 16 19 22 25 28 31 34
2 2 5 8 11 14 17 20 23 26 29 32 35
3 3 6 9 12 15 18 21 24 27 30 33 36
do.call( rbind, lapply( seq(1,12, by=3), function(x) setNames(dat[x:(x+2)], LETTERS[1:3]) ))
A B C
1 1 4 7
2 2 5 8
3 3 6 9
4 10 13 16
5 11 14 17
6 12 15 18
7 19 22 25
8 20 23 26
9 21 24 27
10 28 31 34
11 29 32 35
12 30 33 36
The 12 would be replaced by 660 and everything else should work.
Here is another base R
option
i1 <- c(TRUE, FALSE)
`row.names<-`(data.frame(A= unlist(df1[i1]), B = unlist(df1[!i1])), NULL)
# A B
#1 1 4
#2 2 5
#3 3 6
#4 7 10
#5 8 11
#6 9 12
Or another option is melt
from data.table
library(data.table)
i1 <- seq(1, ncol(df1), by = 2)
i2 <- seq(2, ncol(df1), by = 2)
melt(setDT(df1), measure = list(i1, i2), value.name = c("A", "B"))
rbindlist
from data.table
package can also be used for the task and seems to be much more efficient.
# EXAMPLE DATA
df1 <- read.table(text = '
Col1 Col2 Col3 Col4
1 2 3 4
5 6 7 8
1 2 3 4
5 6 7 8', header = TRUE)
library(data.table)
library(microbenchmark)
library(purrr)
microbenchmark(
Map = as.data.frame(Map(c, df1[,1:2], df1[, 3:4])),
Reshape = reshape(df1, direction="long", varying=split(names(df1), rep(seq_len(ncol(df1)/2), 2))),
Purr = df1 %>% unclass() %>% # convert to non-data.frame list
split(names(.)[seq(length(.) / 2)]) %>% # split columns by indexed names
map_df(simplify),
DataTable = rbindlist(list(df1[,1:2], df1[, 3:4])),
Mapply = data.frame(mapply(c, df1[,1:2], df1[, 3:4], SIMPLIFY=FALSE)),
Rbind = rbind(df1[, 1:2],setnames(df1[, 3:4],names(df1[,1:2])))
)
The results are:
Unit: microseconds
expr min lq mean median uq max neval cld
Map 214.724 232.9380 246.2936 244.1240 255.9240 343.611 100 bc
Reshape 716.962 739.8940 778.7912 749.7550 767.6725 2834.192 100 e
Purr 309.559 324.6545 339.2973 334.0440 343.4290 551.746 100 d
DataTable 98.228 111.6080 122.7753 119.2320 129.2640 189.614 100 a
Mapply 233.577 258.2605 271.1881 270.7895 281.6305 339.291 100 c
Rbind 206.001 221.1515 228.5956 226.6850 235.2670 283.957 100 b
A classical split-apply-combine approach will scale flexibly:
as.data.frame(lapply(split(unclass(df),
names(df)[seq(ncol(df) / 2)]),
unlist, use.names = FALSE))
## A B
## 1 1 4
## 2 2 5
## 3 3 6
## 4 7 10
## 5 8 11
## 6 9 12
or with a hint of purrr
,
library(purrr)
df %>% unclass() %>% # convert to non-data.frame list
split(names(.)[seq(length(.) / 2)]) %>% # split columns by indexed names
map_df(simplify) # simplify each split to vector, coerce back to data.frame
## # A tibble: 6 × 2
## A B
## <int> <int>
## 1 1 4
## 2 2 5
## 3 3 6
## 4 7 10
## 5 8 11
## 6 9 12
reshape
to the rescue:
reshape(df, direction="long", varying=split(names(df), rep(seq_len(ncol(df)/2), 2)))
# time A B id
#1.1 1 1 4 1
#2.1 1 2 5 2
#3.1 1 3 6 3
#1.2 2 7 10 1
#2.2 2 8 11 2
#3.2 2 9 12 3