Recursive regression in R

后端未结

关注

 3  1237

清酒与你 2021-02-10 05:41

Say I have a data frame in R as follows:

> set.seed(1)
> X <- runif(50, 0, 1)
> Y <- runif(50, 0, 1)
> df <- data.frame(X,Y)
> head(df)


      
      
        
          3条回答        

        
                    
            
            
                         
                
              
              
                
                   死守一世寂寞
                                             
                
                
                (楼主)
            
              
              
                2021-02-10 05:54
              

            
            
                        
The solution Greg proposes with biglm is faster than the solution LyzandeR suggest with lm but still quite slow. There is a lot of overhead which can be avoid with the function I show below. I figure you can make it considerably faster if you do it all C++ with Rcpp

# simulate data
set.seed(101)
n <- 1000
X <- matrix(rnorm(10 * n), n, 10)
y <- drop(10 + X %*% runif(10)) + rnorm(n)
dat <- data.frame(y = y, X)

# assign wrapper for biglm
biglm_wrapper <- function(formula, data, min_window_size){
  mf <- model.frame(formula, data)
  X <- model.matrix(terms(mf), mf)
  y - model.response(mf)

  n <- nrow(X)
  p <- ncol(X)
  storage.mode(X) <- "double"
  storage.mode(y) <- "double"
  w <- 1
  qr <- list(
    d = numeric(p), rbar = numeric(choose(p, 2)), 
    thetab = numeric(p), sserr = 0, checked = FALSE, tol = numeric(p))
  nrbar = length(qr$rbar)
  beta. <- numeric(p)

  out <- NULL
  for(i in 1:n){
    row <- X[i, ] # will be over written
    qr[c("d", "rbar", "thetab", "sserr")] <- .Fortran(
      "INCLUD", np = p, nrbar = nrbar, weight = w, xrow = row, yelem = y[i], 
      d = qr$d, rbar = qr$rbar, thetab = qr$thetab, sserr = qr$sserr, ier = i - 0L, 
      PACKAGE = "biglm")[
        c("d", "rbar", "thetab", "sserr")]

    if(i >= min_window_size){
      tmp <- .Fortran(
        "REGCF", np = p, nrbar = nrbar, d = qr$d, rbar = qr$rbar,
        thetab = qr$thetab, tol = qr$tol, beta = beta., nreq = p, ier = i,
        PACKAGE = "biglm")
      Coef <- tmp$beta

      # compute vcov. See biglm/R/vcov.biglm.R
      R <- diag(p)
      R[row(R) > col(R)] <- qr$rbar
      R <- t(R)
      R <- sqrt(qr$d) * R
      ok <- qr$d != 0
      R[ok, ok] <- chol2inv(R[ok, ok, drop = FALSE])
      R[!ok, ] <- NA
      R[ ,!ok] <- NA
      out <- c(out, list(cbind(
        coef = Coef, 
        SE   = sqrt(diag(R * qr$sserr / (i - p + sum(!ok)))))))
    }
  }

  out
}

# assign function to compare with 
library(biglm)
f2 <- function(formula, data, min_window_size){
  fit1 <- biglm(formula, data = data[1:min_window_size, ])
  data.split <- 
    split(data, c(rep(NA,min_window_size),1:(nrow(data) - min_window_size)))
  out4 <- Reduce(update, data.split, init=fit1, accumulate=TRUE)
  lapply(out4, function(x) summary(x)$mat[, c("Coef", "SE")])
}

# show that the two gives the same
frm <- y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10
r1 <- biglm_wrapper(frm, dat, 25)
r2 <- f2(frm, dat, 25)
all.equal(r1, r2, check.attributes = FALSE)
#R> [1] TRUE

# show run time
microbenchmark::microbenchmark(
  r1 = biglm_wrapper(frm, dat, 25), 
  r2 = f2(frm, dat, 25), 
  r3 = lapply(
    25:nrow(dat), function(x) lm(frm, data = dat[1:x , ])),
  times = 5)
#R> Unit: milliseconds
#R>  expr        min         lq       mean    median         uq        max neval cld
#R>    r1   43.72469   44.33467   44.79847   44.9964   45.33536   45.60124     5 a  
#R>    r2 1101.51558 1161.72464 1204.68884 1169.4580 1246.74321 1344.00278     5  b 
#R>    r3 2080.52513 2232.35939 2231.02060 2253.1420 2260.74737 2328.32908     5   c

    
             
                                                        
            
            
              
                
                0
              
                   
                
               讨论(0)
              
                                                  
              
              
                          
             
       
          
              
                                       
     查看其它3个回答


            
                         
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
                              			
        
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复