I have an xts of 1033 daily returns points for 5 currency pairs on which I want to run a rolling window regression, but rollapply is not working for my defined function which us
G. Grothendieck's answer is correct but you can do it faster with the rollRegres
package as the following example shows (the roll_regres.fit
function is ~118 times faster)
# simulate data
set.seed(101)
n <- 1000
wdth = 100
X <- matrix(rnorm(10 * n), n, 10)
y <- drop(X %*% runif(10)) + rnorm(n)
Z <- cbind(y, X)
# assign other function
dolm <- function(x)
coef(lm.fit(x[, -1], x[, 1]))
# show that they yield the same
library(zoo)
library(rollRegres)
all.equal(
rollapply(Z, wdth, FUN = dolm,
by.column = FALSE, align = "right", fill = NA_real_),
roll_regres.fit(X, y, wdth)$coefs,
check.attributes = FALSE)
#R [1] TRUE
# benchmark
library(compiler)
dolm <- cmpfun(dolm)
microbenchmark::microbenchmark(
newnew = roll_regres.fit(X, y, wdth),
prev = rollapply(Z, wdth, FUN = dolm,
by.column = FALSE, align = "right", fill = NA_real_),
times = 10)
#R Unit: microseconds
#R expr min lq mean median uq max neval
#R newnew 884.938 950.914 1026.134 1025.581 1057.581 1242.075 10
#R prev 111057.822 111903.649 118867.761 116857.726 122087.160 141362.229 10
You can also use the roll_regres
function from the package if you want to use a R formula instead.
A third options would be to update the R matrix in a QR decomposition as done in the code below. You can speed this up by doing it in C++ but than you will need the dchud
and dchdd
subroutines from LINPACK (or another function to update R)
library(SamplerCompare) # for LINPACK `chdd` and `chud`
roll_coef <- function(X, y, width){
n <- nrow(X)
p <- ncol(X)
out <- matrix(NA_real_, n, p)
is_first <- TRUE
i <- width
while(i <= n){
if(is_first){
is_first <- FALSE
qr. <- qr(X[1:width, ])
R <- qr.R(qr.)
# Use X^T for the rest
X <- t(X)
XtY <- drop(tcrossprod(y[1:width], X[, 1:width]))
} else {
x_new <- X[, i]
x_old <- X[, i - width]
# update R
R <- .Fortran(
"dchud", R, p, p, x_new, 0., 0L, 0L,
0., 0., numeric(p), numeric(p),
PACKAGE = "SamplerCompare")[[1]]
# downdate R
R <- .Fortran(
"dchdd", R, p, p, x_old, 0., 0L, 0L,
0., 0., numeric(p), numeric(p), integer(1),
PACKAGE = "SamplerCompare")[[1]]
# update XtY
XtY <- XtY + y[i] * x_new - y[i - width] * x_old
}
coef. <- .Internal(backsolve(R, XtY, p, TRUE, TRUE))
out[i, ] <- .Internal(backsolve(R, coef., p, TRUE, FALSE))
i <- i + 1
}
out
}
# simulate data
set.seed(101)
n <- 1000
wdth = 100
X <- matrix(rnorm(10 * n), n, 10)
y <- drop(X %*% runif(10)) + rnorm(n)
Z <- cbind(y, X)
# assign other function
dolm <- function(x)
coef(lm.fit(x[, -1], x[, 1]))
# show that they yield the same
library(zoo)
all.equal(
rollapply(Z, wdth, FUN = dolm,
by.column = FALSE, align = "right", fill = NA_real_),
roll_coef(X, y, wdth),
check.attributes = FALSE)
#R> [1] TRUE
# benchmark
library(compiler)
roll_coef <- cmpfun(roll_coef)
dolm <- cmpfun(dolm)
microbenchmark::microbenchmark(
new = roll_coef(X, y, wdth),
prev = rollapply(Z, wdth, FUN = dolm,
by.column = FALSE, align = "right", fill = NA_real_),
times = 10)
#R> Unit: milliseconds
#R> expr min lq mean median uq max neval cld
#R> new 8.631319 9.010579 9.808525 9.659665 9.973741 11.87083 10 a
#R> prev 118.257128 121.734860 124.489826 122.882318 127.195410 135.21280 10 b
The solution above requires that you form the model.matrix
and model.response
first but this is just three calls (one extra to model.frame
) prior to the call to roll_coef
.