Applying a rolling window regression to an XTS series in R

前端 未结 2 1700
悲哀的现实
悲哀的现实 2021-02-04 13:52

I have an xts of 1033 daily returns points for 5 currency pairs on which I want to run a rolling window regression, but rollapply is not working for my defined function which us

2条回答
  •  无人及你
    2021-02-04 13:54

    New answer

    G. Grothendieck's answer is correct but you can do it faster with the rollRegres package as the following example shows (the roll_regres.fit function is ~118 times faster)

    # simulate data
    set.seed(101)
    n <- 1000
    wdth = 100
    X <- matrix(rnorm(10 * n), n, 10)
    y <- drop(X %*% runif(10)) + rnorm(n)
    Z <- cbind(y, X)
    
    # assign other function
    dolm <- function(x)
      coef(lm.fit(x[, -1], x[, 1]))
    
    # show that they yield the same
    library(zoo)
    library(rollRegres)
    all.equal(
      rollapply(Z, wdth, FUN = dolm,
                by.column = FALSE,  align = "right", fill = NA_real_),
      roll_regres.fit(X, y, wdth)$coefs,
      check.attributes = FALSE)
    #R [1] TRUE
    
    # benchmark
    library(compiler)
    dolm <- cmpfun(dolm)
    
    microbenchmark::microbenchmark(
      newnew = roll_regres.fit(X, y, wdth),
      prev   = rollapply(Z, wdth, FUN = dolm,
                         by.column = FALSE,  align = "right", fill = NA_real_),
      times = 10)
    #R Unit: microseconds
    #R expr        min         lq       mean     median         uq        max neval
    #R newnew    884.938    950.914   1026.134   1025.581   1057.581   1242.075    10
    #R   prev 111057.822 111903.649 118867.761 116857.726 122087.160 141362.229    10
    

    You can also use the roll_regres function from the package if you want to use a R formula instead.

    Old answer

    A third options would be to update the R matrix in a QR decomposition as done in the code below. You can speed this up by doing it in C++ but than you will need the dchud and dchdd subroutines from LINPACK (or another function to update R)

    library(SamplerCompare) # for LINPACK `chdd` and `chud`
    roll_coef <- function(X, y, width){
      n <- nrow(X)
      p <- ncol(X)
      out <- matrix(NA_real_, n, p)
    
      is_first <- TRUE
      i <- width 
      while(i <= n){
        if(is_first){
          is_first <- FALSE
          qr. <- qr(X[1:width, ])
          R <- qr.R(qr.)
    
          # Use X^T for the rest
          X <- t(X)
    
          XtY <- drop(tcrossprod(y[1:width], X[, 1:width]))
        } else {
          x_new <- X[, i]
          x_old <- X[, i - width]
    
          # update R 
          R <- .Fortran(
            "dchud", R, p, p, x_new, 0., 0L, 0L, 
            0., 0., numeric(p), numeric(p), 
            PACKAGE = "SamplerCompare")[[1]]
    
          # downdate R
          R <- .Fortran(
            "dchdd", R, p, p, x_old, 0., 0L, 0L, 
            0., 0., numeric(p), numeric(p), integer(1),
            PACKAGE = "SamplerCompare")[[1]]
    
          # update XtY
          XtY <- XtY + y[i] * x_new - y[i - width] * x_old
        }
    
        coef.    <- .Internal(backsolve(R, XtY, p, TRUE, TRUE))
        out[i, ] <- .Internal(backsolve(R, coef., p, TRUE, FALSE))
    
        i <- i + 1
      }
    
      out
    }
    
    # simulate data
    set.seed(101)
    n <- 1000
    wdth = 100
    X <- matrix(rnorm(10 * n), n, 10)
    y <- drop(X %*% runif(10)) + rnorm(n)
    Z <- cbind(y, X)
    
    # assign other function
    dolm <- function(x) 
      coef(lm.fit(x[, -1], x[, 1]))
    
    # show that they yield the same
    library(zoo)
    all.equal(
      rollapply(Z, wdth, FUN = dolm,  
                by.column = FALSE,  align = "right", fill = NA_real_),
      roll_coef(X, y, wdth), 
      check.attributes = FALSE)
    #R> [1] TRUE
    
    # benchmark
    library(compiler)
    roll_coef <- cmpfun(roll_coef)
    dolm <- cmpfun(dolm)
    microbenchmark::microbenchmark(
      new =  roll_coef(X, y, wdth),
      prev = rollapply(Z, wdth, FUN = dolm,  
                       by.column = FALSE,  align = "right", fill = NA_real_), 
      times = 10)
    #R> Unit: milliseconds
    #R>  expr        min         lq       mean     median         uq       max neval cld
    #R>   new   8.631319   9.010579   9.808525   9.659665   9.973741  11.87083    10  a 
    #R>  prev 118.257128 121.734860 124.489826 122.882318 127.195410 135.21280    10   b
    

    The solution above requires that you form the model.matrix and model.response first but this is just three calls (one extra to model.frame) prior to the call to roll_coef.

提交回复
热议问题