I recently got a computer with several cores and am learning to use parallel computing. I\'m fairly proficient with lapply
and was told parLapply
An alternate method provided by Martin Morgan would work here as well.
This method supplies the objects to each node in the cluster directly in parLapply
call with no need to use cluster export:
library(parallel)
text.var <- rep("I like cake and ice cream so much!", 20)
ntv <- length(text.var)
gc.rate <- 10
pos <- function(i) {
paste(sapply(strsplit(tolower(i), " "), nchar), collapse=" | ")
}
cl <- makeCluster(mc <- getOption("cl.cores", 4))
parLapply(cl, seq_len(ntv), function(i, pos, text.var, ntv, gc.rate) {
x <- pos(text.var[i])
if (i%%gc.rate==0) gc()
return(x)
}, pos, text.var, ntv, gc.rate
)
out1<-lapply(seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)})
out2<-parLapply(cl, seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)})
> identical(out1,out2)
# [1] TRUE
require(rbenchmark)
benchmark(lapply(seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)}),parLapply(cl, seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)}))
test
#1 lapply(seq_len(ntv), function(i) {\n x <- pos(text.var[i])\n if (i%%gc.rate == 0) \n gc()\n return(x)\n})
#2 parLapply(cl, seq_len(ntv), function(i) {\n x <- pos(text.var[i])\n if (i%%gc.rate == 0) \n gc()\n return(x)\n})
# replications elapsed relative user.self sys.self user.child sys.child
#1 100 20.03 3.453448 20.31 0.05 NA NA
#2 100 5.80 1.000000 0.22 0.03 NA NA
> cl
socket cluster with 2 nodes on host ‘localhost’
You need to export those variables to the other R processes in the cluster:
cl <- makeCluster(mc <- getOption("cl.cores", 4))
clusterExport(cl=cl, varlist=c("text.var", "ntv", "gc.rate", "pos"))