问题
I have a simulation done with the below function in R
:
## Load packages and prepare multicore process
library(forecast)
library(future.apply)
plan(multisession)
library(parallel)
library(foreach)
library(doParallel)
n_cores <- detectCores()
cl <- makeCluster(n_cores)
registerDoParallel(cores = detectCores())
set.seed(1)
bootstrap1 <- function(n, phi){
ts <- arima.sim(n, model = list(ar=phi, order = c(1, 1, 0)), sd = 1)
#ts <- numeric(n)
#ts[1] <- rnorm(1)
#for(i in 2:length(ts))
# ts[i] <- 2 * ts[i - 1] + rnorm(1)
########################################################
## create a vector of block sizes
t <- length(ts) # the length of the time series
lb <- seq(n-2)+1 # vector of block sizes to be 1 < l < n (i.e to be between 1 and n exclusively)
########################################################
## This section create matrix to store block means
BOOTSTRAP <- matrix(nrow = 1, ncol = length(lb))
colnames(BOOTSTRAP) <-lb
#BOOTSTRAP <- list(length(lb))
########################################################
## This section use foreach function to do detail in the brace
BOOTSTRAP <- foreach(b = 1:length(lb), .combine = 'cbind') %dopar%{
l <- lb[b]# block size at each instance
m <- ceiling(t / l) # number of blocks
blk <- split(ts, rep(1:m, each=l, length.out = t)) # divides the series into blocks
######################################################
res<-sample(blk, replace=T, 1000) # resamples the blocks
res.unlist <- unlist(res, use.names = FALSE) # unlist the bootstrap series
train <- head(res.unlist, round(length(res.unlist) - 10)) # Train set
test <- tail(res.unlist, length(res.unlist) - length(train)) # Test set
nfuture <- forecast::forecast(train, model = forecast::auto.arima(train), lambda=0, biasadj=TRUE, h = length(test))$mean # makes the `forecast of test set
RMSE <- Metrics::rmse(test, nfuture) # RETURN RMSE
BOOTSTRAP[b] <- RMSE
}
BOOTSTRAPS <- matrix(BOOTSTRAP, nrow = 1, ncol = length(lb))
colnames(BOOTSTRAPS) <- lb
BOOTSTRAPS
return(list("BOOTSTRAPS" = BOOTSTRAPS))
}
I use for loop
to print its result three times.
for (i in 1:3) { set.seed(1)
print(bootstrap1(10, 0.5))
}
I have the below result:
## 2 3 4 5 6 7 8 9
##[1,] 1.207381 1.447382 1.282099 0.9311434 0.8481634 1.006494 0.9829584 1.205194
## 2 3 4 5 6 7 8 9
##[1,] 1.404846 1.262756 1.50738 1.188452 0.8981125 1.001651 1.349721 1.579556
## 2 3 4 5 6 7 8 9
##[1,] 1.265196 1.080703 1.074807 1.430653 0.9166268 1.12537 0.9492137 1.201763
If I have to run this several times I will be getting a different result.
I want the way I can set the seed such that the three-round will be distinct while if I run with the set seed, I will get the same three-distinct result using R
.
回答1:
We could specify the kind
in set.seed
. If we are doing this inside the loop, it will return the same values
for (i in 1:3) {
set.seed(1, kind = "L'Ecuyer-CMRG")
print(bootstrap1(10, 0.5))
}
#$BOOTSTRAPS
# 2 3 4 5 6 7 8 9
#[1,] 4.189426 6.428085 3.672116 3.893026 2.685741 3.821201 3.286509 4.062811
#$BOOTSTRAPS
# 2 3 4 5 6 7 8 9
#[1,] 4.189426 6.428085 3.672116 3.893026 2.685741 3.821201 3.286509 4.062811
#$BOOTSTRAPS
# 2 3 4 5 6 7 8 9
#[1,] 4.189426 6.428085 3.672116 3.893026 2.685741 3.821201 3.286509 4.062811
If the intention is to return different values for each iteration in for
loop and get the same result on subsequent runs, specify the set.seed
outside the loop
1) First run
set.seed(1, kind = "L'Ecuyer-CMRG")
for (i in 1:3) {
print(bootstrap1(10, 0.5))
}
#$BOOTSTRAPS
# 2 3 4 5 6 7 8 9
#[1,] 4.189426 6.428085 3.672116 3.893026 2.685741 3.821201 3.286509 4.062811
#$BOOTSTRAPS
# 2 3 4 5 6 7 8 9
#[1,] 1.476428 1.806258 2.071091 2.09906 2.014298 1.032776 2.573738 1.831142
#$BOOTSTRAPS
# 2 3 4 5 6 7 8 9
#[1,] 2.248546 1.838302 2.345557 1.696614 2.06357 1.502569 1.912556 1.906049
2) Second run
set.seed(1, kind = "L'Ecuyer-CMRG")
for (i in 1:3) {
print(bootstrap1(10, 0.5))
}
#$BOOTSTRAPS
# 2 3 4 5 6 7 8 9
#[1,] 4.189426 6.428085 3.672116 3.893026 2.685741 3.821201 3.286509 4.062811
#$BOOTSTRAPS
# 2 3 4 5 6 7 8 9
#[1,] 1.476428 1.806258 2.071091 2.09906 2.014298 1.032776 2.573738 1.831142
#$BOOTSTRAPS
# 2 3 4 5 6 7 8 9
#[1,] 2.248546 1.838302 2.345557 1.696614 2.06357 1.502569 1.912556 1.906049
According to ?set.seed
"L'Ecuyer-CMRG": - A ‘combined multiple-recursive generator’ from L'Ecuyer (1999), each element of which is a feedback multiplicative generator with three integer elements: thus the seed is a (signed) integer vector of length 6. The period is around 2^191. The 6 elements of the seed are internally regarded as 32-bit unsigned integers. Neither the first three nor the last three should be all zero, and they are limited to less than 4294967087 and 4294944443 respectively. This is not particularly interesting of itself, but provides the basis for the multiple streams used in package parallel.
来源:https://stackoverflow.com/questions/64236344/how-do-i-set-seed-for-simulation-in-r-to-attain-reproducibility-on-windows-os