R nested foreach %dopar% in outer loop and %do% in inner loop

前端 未结 1 1341
不知归路
不知归路 2021-01-05 16:51

I\'m running the following script in R. If I use a %do% rather than a %dopar% the script works fine. However, if in the outer loop I use a %dopar% the loop runs forever wit

相关标签:
1条回答
  • 2021-01-05 17:04

    The proper way of nesting foreach loop is using %:% operator. See the example. I have tested it on Windows.

    library(foreach)
    library(doSNOW)
    
    NumberOfCluster <- 4
    cl <- makeCluster(NumberOfCluster) 
    registerDoSNOW(cl) 
    
    N <- 1e6
    
    system.time(foreach(i = 1:10, .combine = rbind) %:%
                  foreach(j = 1:10, .combine = c) %do% mean(rnorm(N, i, j)))
    
    system.time(foreach(i = 1:10, .combine = rbind) %:%
                  foreach(j = 1:10, .combine = c) %dopar% mean(rnorm(N, i, j)))
    

    Output:

    > system.time(foreach(i = 1:10, .combine = rbind) %:%
    +               foreach(j = 1:10, .combine = c) %do% mean(rnorm(N, i, j)))
       user  system elapsed 
       7.38    0.23    7.64 
    > system.time(foreach(i = 1:10, .combine = rbind) %:%
    +               foreach(j = 1:10, .combine = c) %dopar% mean(rnorm(N, i, j)))
       user  system elapsed 
       0.09    0.00    2.14 
    

    Scheme for using nested loops is as following:

    foreach(i) %:% foreach(j) {foo(i, j)}
    

    Operator %:% is used to nest several foreach loops. You can not do computation between nesting. In your case you have to do two loops, for example:

    # Loop over i
    x <- foreach(i = 1:10, .combine = c) %dopar% 2 ^ i
    
    # Nested loop over i and j
    foreach(i = 1:10, .combine = rbind) %:% foreach(j = 1:10, .combine = c) %dopar% {x[i] + j}
    

    Untested code:

    library(data.table)
    library(foreach)
    library(doSNOW)
    
    NumberOfCluster <- 2
    cl <- makeCluster(NumberOfCluster)
    registerDoSNOW(cl)
    
    # Create ABNs as list
    ABNs <- foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %dopar% {
      terms <- as.data.table(unique(gsub(" ", "", unlist(terms_list_by_UNSPSC$Terms[which(substr(terms_list_by_UNSPSC$UNSPSC, 1, 6) == i)]))))
      temp <- inner_join(N_of_UNSPSCs_by_Term, terms, on = 'V1')
      temp$V2 <- 1 / as.numeric(temp$V2)
      temp <- temp[order(temp$V2, decreasing = TRUE), ]
      names(temp) <- c('Term', 'Imp')
      unique(UNSPSCs_per_ABN[which(substr(UNSPSCs_per_ABN$UNSPSC,1,4) == substr(i,1,4)), 1])
    }
    
    # Nested loop
    predictions <- foreach(i = UNSPSC_list, .packages = c('data.table', 'dplyr'), .verbose = TRUE) %:%
      foreach(j = seq(1:nrow(train)), .combine = 'c', .packages = 'dplyr') %dopar% {
        descr <- names(which(!is.na(train[j, ]) == TRUE))
        if (unlist(predict_all[j, 1]) %in% unlist(ABNs[[i]]) || !unlist(predict_all[j, 1]) %in% unlist(suppliers)) {
          sum(temp$Imp[which(temp$Term %in% descr)])
        } else 0
      }
    
    for (i in seq_along(predictions)) save(predictions[[i]], file = paste("Predictions", i, ".rda", sep = "_"))
    
    0 讨论(0)
提交回复
热议问题