问题
Progress has been made on getting the parallel processing part working but saving the vector with the fetch distances is not working properly. The error I get is
df_Test_Fetch <- data.frame(x_lake_length)
Error in data.frame(x_lake_length) : object 'x_lake_length' not found
write.table(df_Test_Fetch,file="C:/tempTest_Fetch.csv",row.names=TRUE,col.names=TRUE, sep=",")
Error in is.data.frame(x) : object 'df_Test_Fetch' not found
I have tried altering the code below so that the foreach step is output to x_lake_length. But that did not output the vector as I hoped. How can I get the actually results to be saved to a csv file. I am running a windows 8 computer with R x64 3.3.0.
Thanks you in advance Jen
Here is the full code.
# make sure there is no prexisting data
rm(x_lake_length)
# Libraries ---------------------------------------------------------------
if (!require("pacman")) install.packages("pacman")
pacman::p_load(lakemorpho,rgdal,maptools,sp,doParallel,foreach,
doParallel)
# HPC ---------------------------------------------------------------------
cores_2_use <- detectCores() - 2
cl <- makeCluster(cores_2_use, useXDR = F)
clusterSetRNGStream(cl, 9956)
registerDoParallel(cl, cores_2_use)
# Data --------------------------------------------------------------------
ogrDrivers()
dsn <- system.file("vectors", package = "rgdal")[1]
# the line below is commented out but when I run the script on my data the line below is what I use instead of the one above
# then making the name changes as needed
# dsn<-setwd("J:\\Elodea\\ByHUC6\\")
ogrListLayers(dsn)
ogrInfo(dsn=dsn, layer="trin_inca_pl03")
owd <- getwd()
setwd(dsn)
ogrInfo(dsn="trin_inca_pl03.shp", layer="trin_inca_pl03")
setwd(owd)
x <- readOGR(dsn=dsn, layer="trin_inca_pl03")
summary(x)
# Analysis ----------------------------------------------------------------
myfun <- function(x,i){tmp<-lakeMorphoClass(x[i,],NULL,NULL,NULL)
x_lake_length<-vector("numeric",length = nrow(x))
x_lake_length[i]<-lakeMaxLength(tmp,200)
print(i)
Sys.sleep(0.1)}
foreach(i = 1:nrow(x),.combine=cbind,.packages=c("lakemorpho","rgdal")) %dopar% (
myfun(x,i)
)
options(digits=10)
df_Test_Fetch <- data.frame(x_lake_length)
write.table(df_Test_Fetch,file="C:/temp/Test_Fetch.csv",row.names=TRUE,col.names=TRUE, sep=",")
print(proc.time())
回答1:
I think this is what you want, though without understanding the subject matter I can't be 100% sure.
What I did was add a return()
to your parallelized function and assigned the value of that returned object to x_lake_length
when you call the foreach
. But I'm only guessing that that's what you were trying to do, so please correct me if I'm wrong.
# make sure there is no prexisting data
rm(x_lake_length)
# Libraries ---------------------------------------------------------------
if (!require("pacman")) install.packages("pacman")
pacman::p_load(lakemorpho,rgdal,maptools,sp,doParallel,foreach,
doParallel)
# HPC ---------------------------------------------------------------------
cores_2_use <- detectCores() - 2
cl <- makeCluster(cores_2_use, useXDR = F)
clusterSetRNGStream(cl, 9956)
registerDoParallel(cl, cores_2_use)
# Data --------------------------------------------------------------------
ogrDrivers()
dsn <- system.file("vectors", package = "rgdal")[1]
# the line below is commented out but when I run the script on my data the line below is what I use instead of the one above
# then making the name changes as needed
# dsn<-setwd("J:\\Elodea\\ByHUC6\\")
ogrListLayers(dsn)
ogrInfo(dsn=dsn, layer="trin_inca_pl03")
owd <- getwd()
setwd(dsn)
ogrInfo(dsn="trin_inca_pl03.shp", layer="trin_inca_pl03")
setwd(owd)
x <- readOGR(dsn=dsn, layer="trin_inca_pl03")
summary(x)
# Analysis ----------------------------------------------------------------
myfun <- function(x,i){tmp<-lakeMorphoClass(x[i,],NULL,NULL,NULL)
x_lake_length<-vector("numeric",length = nrow(x))
x_lake_length[i]<-lakeMaxLength(tmp,200)
print(i)
Sys.sleep(0.1)
return(x_lake_length)
}
x_lake_length <- foreach(i = 1:nrow(x),.combine=cbind,.packages=c("lakemorpho","rgdal")) %dopar% (
myfun(x,i)
)
options(digits=10)
df_Test_Fetch <- data.frame(x_lake_length)
write.table(df_Test_Fetch,file="C:/temp/Test_Fetch.csv",row.names=TRUE,col.names=TRUE, sep=",")
print(proc.time())
来源:https://stackoverflow.com/questions/37910490/parallel-proccessing-in-r-doparallel-foreach-save-data