I\'d like to have a function which returns the initial indicies of matching subsequences of a vector. For example:
y <- c(\"a\",\"a\",\"a\",\"b\",\"c\")
set.seed(0)
a <- sample(1:6,12000, TRUE)
b <- 2:4
vecIn <- function(a,b){
which(
Reduce('+', lapply(seq_along(y <- lapply(b, '==', a)), function(x){
y[[x]][x:(length(a) - length(b) +x)]
}
)
) == length(b)
)
}
> vecIn(a,b)
[1] 2 154 986 1037 1046 1257 1266 1750 2375 2677 3184 3206
[13] 3499 3526 3882 4238 4311 4388 4437 4580 4714 4766 4827 5046
[25] 5279 5629 6153 6842 6856 6919 7200 7516 7520 7707 7824 7859
[37] 8140 8191 8687 9208 9281 9313 10022 10320 10617 10720 10958 11179
[49] 11567 11591 11698 11811
library(zoo)
library(rbenchmark)
func1 <- function(a,b){
gregexpr(paste0(b,collapse=""),paste0(a,collapse=""))
}
func2 <- function(a,b){
which(rollapply(a, length(b), identical, b))
}
func3 <- vecIn
Some benchmarks
benchmark(func1(a,b), func2(a,b), func3(a,b))
test replications elapsed relative user.self sys.self user.child
1 func1(a, b) 100 0.673 5.904 0.680 0.000 0
2 func2(a, b) 100 28.808 252.702 28.198 0.672 0
3 func3(a, b) 100 0.114 1.000 0.116 0.000 0
sys.child
1 0
2 0
3 0
Try rollapply
in zoo:
library(zoo)
which(rollapply(y, 2, identical, c("a", "a")))
## [1] 1 2
which(rollapply(y, 2, identical, c("a", "b")))
## [1] 3