Suppose I have a vector c(1, 2, 3, 4)
with no duplicated values. I need a vector c(1 * 2, 1 * 3, 1 * 4, 2 * 3, 2 * 4, 3 * 4)
, so the multiplication
Here is the "outer
+ upper triangular part option"
m <- outer(1:4, 1:4)
as.numeric(m[upper.tri(m)])
#[1] 2 3 6 4 8 12
Another method is to index the elements of the upper/lower triangular part of a matrix directly and then calculate the pairwise product for those elements (adapted from this post)
upperouter <- function(x) {
N <- length(x)
i <- sequence(1:N)
j <- rep(1:N, 1:N)
(1:N)[i[i != j]] * (1:N)[j[j != i]]
}
upperouter(1:4)
#[1] 2 3 6 4 8 12
It's interesting to compare the different methods in a microbenchmark
analysis for a larger vector
(e.g. 1:100
):
upperouter <- function(x) {
N <- length(x)
i <- sequence(1:N)
j <- rep(1:N, 1:N)
(1:N)[i[i != j]] * (1:N)[j[j != i]]
}
finv <- function (n) {
k <- 1:(n * (n - 1) / 2)
j <- floor(((2 * n + 1) - sqrt((2 * n - 1) ^ 2 - 8 * (k - 1))) / 2)
i <- j + k - (2 * n - j) * (j - 1) / 2
cbind(i, j)
}
N <- 100
library(microbenchmark)
res <- microbenchmark(
combn = combn(1:N, 2, prod),
outer = {
m <- outer(1:N, 1:N)
as.numeric(m[upper.tri(m)])
},
upperouter = {
upperouter(1:N)
},
finv = {
vec <- 1:N
ind <- finv(length(vec))
vec[ind[, 2]] * vec[ind[, 1]]
},
sapply = {
m <- sapply(1:N, "*", 1:N)
as.numeric(m[upper.tri(m)])
})
res
#Unit: microseconds
# expr min lq mean median uq max neval
# combn 6584.938 6896.0545 7584.8084 7035.9575 7886.5720 12020.626 100
# outer 106.791 113.6535 157.3774 138.9205 160.5985 950.706 100
# upperouter 201.943 210.1515 277.0989 227.6370 259.1975 2806.962 100
# finv 308.447 324.1960 442.3220 332.7250 375.3490 4128.325 100
# sapply 232.805 249.9080 298.3674 283.8580 315.9145 556.463 100
library(ggplot2)
autoplot(res)