Is there an elegant/fastR way to combine all pairs of columns in a data.frame?
For example, using mapply()
and paste()
we can turn this data.fr
It's amusing to note that the OP's solution appears to be the fastest one:
f1 <- function(mydf) {
mapply(function(x, y) {
paste(x, y, sep = ".")},
mydf[ ,seq(1, ncol(mydf), by = 2)],
mydf[ ,seq(2, ncol(mydf), by = 2)])
}
f.thelatemail <- function(mydf) {
mapply(paste,mydf[c(TRUE,FALSE)],mydf[c(FALSE,TRUE)],sep=".")
}
require(dplyr)
f.on_the_shores_of_linux_sea <- function(mydf) {
transmute(mydf,x1=paste0( a.1,'.', a.2),x2=paste0( b.1,'.', b.2))
}
f.jazurro <- function(mydf) {
odd <- seq(1, ncol(mydf), 2);
lapply(odd, function(x) paste(mydf[,x], mydf[,x+1], sep = ".")) %>%
do.call(cbind,.)
}
library(data.table)
f.akrun <- function(mydf) {
res <- as.data.table(matrix(, ncol=ncol(mydf)/2, nrow=nrow(mydf)))
indx <- seq(1, ncol(mydf), 2)
setDT(mydf)
for(j in seq_along(indx)){
set(res, i=NULL, j=j, value= paste(mydf[[indx[j]]],
mydf[[indx[j]+1]], sep='.'))
}
res
}
mydf <- data.frame(a.1 = letters, a.2 = 26:1, b.1 = letters, b.2 = 1:26)
mydf <- mydf[rep(1:nrow(mydf),5000),]
library(rbenchmark)
benchmark(f1(mydf),f.thelatemail(mydf),f.on_the_shores_of_linux_sea(mydf),f.jazurro(mydf),f.akrun(mydf))
Results:
# test replications elapsed relative user.self sys.self user.child sys.child
# 5 f.akrun(mydf) 100 14.000 75.269 13.673 0.296 0 0
# 4 f.jazurro(mydf) 100 0.388 2.086 0.314 0.071 0 0
# 3 f.on_the_shores_of_linux_sea(mydf) 100 15.585 83.790 15.293 0.280 0 0
# 2 f.thelatemail(mydf) 100 26.416 142.022 25.736 0.639 0 0
# 1 f1(mydf) 100 0.186 1.000 0.169 0.017 0 0
[Updated Benchmark]
I've added one solution from @thelatemail, which I missed in the original answer, and one solution from @akrun:
f.thelatemail2 <- function(mydf) {
data.frame(Map(paste,mydf[c(TRUE,FALSE)],mydf[c(FALSE,TRUE)],sep="."))
}
f.akrun2 <- function(mydf) {
setDT(mydf)
indx <- as.integer(seq(1, ncol(mydf), 2))
mydf2 <- copy(mydf)
for(j in indx){
set(mydf2, i=NULL, j=j, value= paste(mydf2[[j]],
mydf2[[j+1]], sep="."))
}
mydf2[,indx, with=FALSE]
}
Benchmark:
library(rbenchmark)
benchmark(f1(mydf),f.thelatemail(mydf), f.thelatemail2(mydf), f.on_the_shores_of_linux_sea(mydf),f.jazurro(mydf),f.akrun(mydf),f.akrun2(mydf))
# test replications elapsed relative user.self sys.self user.child sys.child
# 6 f.akrun(mydf) 100 13.247 69.356 12.897 0.340 0 0
# 7 f.akrun2(mydf) 100 12.746 66.733 12.405 0.339 0 0
# 5 f.jazurro(mydf) 100 0.327 1.712 0.254 0.073 0 0
# 4 f.on_the_shores_of_linux_sea(mydf) 100 16.347 85.586 15.838 0.445 0 0
# 2 f.thelatemail(mydf) 100 26.307 137.733 25.536 0.708 0 0
# 3 f.thelatemail2(mydf) 100 15.938 83.445 15.136 0.750 0 0
# 1 f1(mydf) 100 0.191 1.000 0.156 0.036 0 0