with C++ through Rcpp
library(Rcpp)
cppFunction('NumericVector seqOfLogical(LogicalVector lv) {
size_t n = lv.size();
NumericVector res(n);
int foundCounter = 0;
for (size_t i = 0; i < n; i++) {
if (lv[i] == 1) {
foundCounter++;
} else {
foundCounter = 0;
}
res[i] = foundCounter;
}
return res;
}')
seqOfLogical(x)
# [1] 0 0 1 2 3 0 1 2 0 0 0 1 2 3 4
Benchmarks
library(microbenchmark)
set.seed(1)
x <- sample(c(T,F), size = 1e6, replace = T)
microbenchmark(
symbolix = { symbolix(x) },
thelatemail1 = { thelatemail1(x) },
thelatemail2 = { thelatemail2(x) },
wen = { wen(x) },
maurits = { maurits(x) },
#mhammer = { mhammer(x) }, ## this errors
times = 5
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# symbolix 2.760152 4.579596 34.60909 4.833333 22.31126 138.5611 5
# thelatemail1 154.050925 189.784368 235.16431 235.982093 262.33704 333.6671 5
# thelatemail2 138.876834 146.197278 158.66718 148.547708 179.80223 179.9119 5
# wen 780.432786 898.505231 1091.39099 1093.702177 1279.33318 1404.9816 5
# maurits 1002.267323 1043.590621 1136.35624 1086.967756 1271.38803 1277.5675 5
functions
symbolix <- function(x) {
seqOfLogical(x)
}
thelatemail1 <- function(x) {
r <- rle(x)
x[x] <- sequence(r$l[r$v])
return(x)
}
thelatemail2 <- function(x) {
x[x] <- sequence(with(rle(x), lengths[values]))
return(x)
}
maurits <- function(x) {
unlist(Map(function(l, v) if (!isTRUE(v)) rep(0, l) else 1:l, rle(x)$lengths, rle(x)$values))
}
wen <- function(A) {
B=data.table::rleid(A)
B=ave(B,B,FUN = seq_along)
B[!A]=0
B
}
mhammer <- function(x) {
x_counts <- x
for(i in seq_along(x)) {
if(x[i] == 1) { x_counts[i] <- x_counts[i] + x_counts[i-1] }
}
return(x_counts)
}