Sum of subvectors of a vector in R

后端未结

关注

 6  706

Given a vector x of length k, I would like to obtain a k by k matrix X where X[i,j] is the sum of x[i] + ... + x[j]. The

相关标签:

6条回答

无人共我

2021-01-04 19:11

We can use outer():

mySum <- function(i,j) sum(x[i:j])
outer(1:10, 1:10, Vectorize(mySum))

EDIT: You could also go for a solution by foreach:

library(foreach)
mySum <- function(j) sum(x[i:j])
mySum <- Vectorize(mySum)
foreach(i = 1:10, .combine = 'rbind') %do% mySum(1:10)

and maybe run it in parallel instead.

0 讨论(0)

我在风中等你

2021-01-04 19:19

Here's another approach which seems to be significantly faster than OP's for loop (by factor ~30) and faster than the other answers currently present (by factor >=18):

n <- 5
x <- 1:5
z <- lapply(1:n, function(i) cumsum(x[i:n]))
m <- mapply(function(y, l) c(rep(NA, n-l), y), z, lengths(z))
m[upper.tri(m)] <- t(m)[upper.tri(m)]
m

#     [,1] [,2] [,3] [,4] [,5]
#[1,]    1    3    6   10   15
#[2,]    3    2    5    9   14
#[3,]    6    5    3    7   12
#[4,]   10    9    7    4    9
#[5,]   15   14   12    9    5

Benchmarks (scroll down for results)

library(microbenchmark)
n <- 100
x <- 1:n

f1 <- function() {
  X <- matrix(0,n,n)
  for(i in 1:n) {
    for(j in 1:n) {
      X[i,j] <- sum(x[i:j])
    }
  }
  X
}

f2 <- function() {
  mySum <- function(i,j) sum(x[i:j])
  outer(1:n, 1:n, Vectorize(mySum))
}

f3 <- function() {
  matrix(apply(expand.grid(1:n, 1:n), 1, function(y) sum(x[y[2]:y[1]])), n, n)
}

f4 <- function() {
  z <- lapply(1:n, function(i) cumsum(x[i:n]))
  m <- mapply(function(y, l) c(rep(NA, n-l), y), z, lengths(z))
  m[upper.tri(m)] <- t(m)[upper.tri(m)]
  m
}

f5 <- function() {
  X <- diag(x)
  for(i in 1:(n-1)) {
    for(j in 1:(n-i)){
      X[j+i,j] <- X[j,j+i] <- X[j+i,j+i] + X[j+i-1,j]
    }  
  }
  X
}

microbenchmark(f1(), f2(), f3(), f4(), f5(), times = 25L, unit = "relative")
#Unit: relative
# expr      min       lq     mean   median       uq      max neval
# f1() 29.90113 29.01193 30.82411 31.15412 32.51668 35.93552    25
# f2() 29.46394 30.93101 31.79682 31.88397 34.52489 28.74846    25
# f3() 56.05807 53.82641 53.63785 55.36704 55.62439 45.94875    25
# f4()  1.00000  1.00000  1.00000  1.00000  1.00000  1.00000    25
# f5() 16.30136 17.46371 18.86259 17.87850 21.19914 23.68106    25

all.equal(f1(), f2())
#[1] TRUE
all.equal(f1(), f3())
#[1] TRUE
all.equal(f1(), f4())
#[1] TRUE
all.equal(f1(), f5())
#[1] TRUE

Updated with the edited function by Neal Fultz.

0 讨论(0)

长发绾君心

2021-01-04 19:22
You don't need to repeatedly recalculate the sums in the inner loop, instead, you can build up the matrix by subdiagonal using the fact that a cell equals the cell above it plus the cell on the diagonal to the right. This should reduce the order of the algorithm from O(n^3) to O(n^2).

Here's a quick and dirty implementation:
```
X <- diag(x)

for(i in 1:9) {
    for(j in 1:(10-i)){
        X[j+i,j] <- X[j,j+i] <- X[j+i,j+i] + X[j+i-1,j]
    }  
}
```
EDIT:

As others have pointed out, you can get a little more speed and simplicity by using cumsum and vectorizing the inner loop:
```
n <- length(x)
X <- diag(x)
for(i in 1:n) {
    X[i:n,i] <- X[i,i:n] <- cumsum(x[i:n])
}
```
0 讨论(0)
发布评论:

提交评论
- 加载中...

不要未来只要你来

2021-01-04 19:25

Here is an Rcpp function that is almost a literal translation of your code:

set.seed(1)
x <- rnorm(10)

X <- matrix(0,10,10)
for(i in 1:10) 
  for(j in 1:10)
    X[i,j] <- sum(x[i:j])

library(inline)
library(Rcpp)

cppFunction(
  'NumericMatrix allSums(NumericVector x) {
        int n = x.length();
        NumericMatrix X(n, n);
        for (int i = 0; i < n; ++i) {
          for (int j = 0; j < n; ++j) {
             for (int k = i; k <= j; ++k) {
               X(i,j) += x(k);
             }
            X(j,i) = X(i,j);
          }
        }
        return X;
    }')

Y <- allSums(x)
all.equal(X, Y)
#[1] TRUE

But of course, the algorithm can be improved:

cppFunction(
  'NumericMatrix allSums2(NumericVector x) {
        int n = x.length();
        NumericMatrix X(n, n);
        X(0,0) = x(0);
        for (int j = 0; j < n; ++j) {
          if (j > 0) {
            X(0,j) = X(0, j-1) + x(j);
            X(j,0) = X(0,j);
          }
          for (int i = 1; i < n; ++i) {
            X(i,j) = X(i-1,j) - x(i-1); 
            X(j,i) = X(i,j);
            }
          }
        return X;
    }')

Z <- allSums2(x)
all.equal(X, Z)
#[1] TRUE

Some benchmarks:

library(microbenchmark)
n <- 100
x <- 1:n

f4 <- function(x, n) {
  z <- lapply(1:n, function(i) cumsum(x[i:n]))
  m <- mapply(function(y, l) c(rep(NA, n-l), y), z, lengths(z))
  m[upper.tri(m)] <- t(m)[upper.tri(m)]
  m
}


microbenchmark(f4(x, n), allSums(x), allSums2(x), times = 25)#
#Unit: microseconds
#       expr      min       lq      mean   median       uq      max neval cld
#   f4(x, n)  933.441  938.061 1121.0901  975.633 1045.232 2635.561    25  b 
# allSums(x) 1385.533 1391.693 1466.4784 1395.080 1408.630 2996.803    25   c
#allSums2(x)  127.499  129.038  198.8475  133.965  139.201 1737.844    25 a

0 讨论(0)

余生分开走

2021-01-04 19:27

In addition to the excellent answers already provided, here is a super fast base R solution:

subVecSum <- function(v, s) {
    t <- c(0L, cumsum(v))
    n1 <- s+1L
    m <- matrix(0L,s,s)
    for (i in 4L:n1) {
        m[i-2L,1L:(i-3L)] <- t[i-1L]-t[1L:(i-3L)]
        m[i-2L,i-2L] <- v[i-2L]
        m[i-2L,(i-1L):s] <- t[i:n1]-t[i-2L]
    }
    m[1L,] <- t[-1L]; m[s,] <- t[n1]-t[1L:s]
    m
}

In fact, according to the benchmarks below, it is the fastest base R solution (@Roland's Rcpp solution is still the fastest). It also gets faster, relatively speaking, as the size of the vector increases (I only compared f4 (provided by @docendo) as it is the fastest base R solution thus far and @Roland's Rcpp implementation. You will note that I'm using the modified f4 function as defined by @Roland).

## We first compile the functions.. no need to compile the Rcpp
## function as it is already done by calling cppFunction
c.f4 <- compiler::cmpfun(f4)
c.subVS1 <- compiler::cmpfun(subVecSum)

n <- 100
x <- 1:n
microbenchmark(c.f4(x,n), c.subVS1(x,n), allSums2(x), times = 1000, unit = "relative")
Unit: relative
          expr       min        lq     mean    median        uq       max neval cld
    c.f4(x, n) 11.355013 11.262663 9.231756 11.545315 12.074004 1.0819186  1000   c
c.subVS1(x, n)  7.795879  7.592643 5.414135  7.624209  8.080471 0.8490876  1000  b 
   allSums2(x)  1.000000  1.000000 1.000000  1.000000  1.000000 1.0000000  1000 a  

n <- 500
x <- 1:n
microbenchmark(c.f4(x,n), c.subVS1(x,n), allSums2(x), times = 500, unit = "relative")
Unit: relative
          expr      min       lq     mean   median       uq       max neval cld
    c.f4(x, n) 6.231426 6.585118 6.442567 6.438163 6.882862 10.124428   500   c
c.subVS1(x, n) 3.548766 3.271089 3.137887 2.881520 3.604536  8.854241   500  b 
   allSums2(x) 1.000000 1.000000 1.000000 1.000000 1.000000  1.000000   500 a  

n <- 1000
x <- 1:n
microbenchmark(c.f4(x,n), c.subVS1(x,n), allSums2(x), times = 100, unit = "relative")
Unit: relative
          expr      min        lq      mean    median        uq      max neval cld
    c.f4(x, n) 7.779537 16.352334 11.489506 15.529351 14.447210 3.639483   100   c
c.subVS1(x, n) 2.637996  2.951763  2.937385  2.726569  2.692099 1.211545   100  b 
   allSums2(x) 1.000000  1.000000  1.000000  1.000000  1.000000 1.000000   100 a  

identical(c.f4(x,n), c.subVS1(x,n), as.integer(allSums2(x)))  ## gives the same results
[1] TRUE

This algorithm takes advantage of only calculating cumsum(v) one time and utilizing indexing from there. For really large vectors, the efficiency is comparable to the Rcpp solution provided by @Roland. Observe:

n <- 5000
x <- 1:n
microbenchmark(c.subVS1(x,n), allSums2(x), times = 10, unit = "relative")
Unit: relative
          expr      min       lq     mean   median       uq      max neval cld
c.subVS1(x, n) 1.900718 1.865304 1.854165 1.865396 1.769996 1.837354    10   b
   allSums2(x) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000    10  a 


n <- 10000
x <- 1:n
microbenchmark(c.subVS1(x,n), allSums2(x), times = 10, unit = "relative")
Unit: relative
          expr      min      lq     mean   median       uq     max neval cld
c.subVS1(x, n) 1.503538 1.53851 1.493883 1.526843 1.496783 1.29196    10   b
   allSums2(x) 1.000000 1.00000 1.000000 1.000000 1.000000 1.00000    10  a

Not bad, for base R, however Rcpp stills rules the day!!!

0 讨论(0)

囚心锁ツ

2021-01-04 19:33

You can also try this:

x <- 1:10

matrix(apply(expand.grid(1:10, 1:10), 1, function(y) sum(x[y[2]:y[1]])), 10, 10)
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
 [1,]    1    3    6   10   15   21   28   36   45    55
 [2,]    3    2    5    9   14   20   27   35   44    54
 [3,]    6    5    3    7   12   18   25   33   42    52
 [4,]   10    9    7    4    9   15   22   30   39    49
 [5,]   15   14   12    9    5   11   18   26   35    45
 [6,]   21   20   18   15   11    6   13   21   30    40
 [7,]   28   27   25   22   18   13    7   15   24    34
 [8,]   36   35   33   30   26   21   15    8   17    27
 [9,]   45   44   42   39   35   30   24   17    9    19
[10,]   55   54   52   49   45   40   34   27   19    10

0 讨论(0)