R: distm with Big Memory

余生颓废 提交于 2019-12-06 21:39:29

Something like this works for me:

library(bigmemory)
library(foreach)

CutBySize <- function(m, block.size, nb = ceiling(m / block.size)) {
  int <- m / nb
  upper <- round(1:nb * int)
  lower <- c(1, upper[-nb] + 1)
  size <- c(upper[1], diff(upper))
  cbind(lower, upper, size)
}

seq2 <- function(lims) {
  seq(lims[1], lims[2])
}

n <- nrow(list1)
a <- big.matrix(n, n, backingfile = "my_dist.bk",
                descriptorfile = "my_dist.desc")

intervals <- CutBySize(n, block.size = 1000)
K <- nrow(intervals)

doParallel::registerDoParallel(parallel::detectCores() / 2)
foreach(j = 1:K) %dopar% {
  ind_j <- seq2(intervals[j, ])
  foreach(i = j:K) %do% {
    ind_i <- seq2(intervals[i, ])
    tmp <- distm(list1[ind_i, c('longitude', 'latitude')], 
                 list2[ind_j, c('longitude', 'latitude')], 
                 fun = distHaversine)
    a[ind_i, ind_j] <- tmp
    a[ind_j, ind_i] <- t(tmp)
    NULL
  }
}
doParallel::stopImplicitCluster()

I repeated your list 1000 times to test with 10K rows.

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!