Speed up WMA (Weighted Moving Average) calculation

后端 未结 1 1659
失恋的感觉
失恋的感觉 2021-02-10 21:43

I am trying to calculate exponential moving average on 15 day bars, but want to see \"evolution\" of the 15 day bar EMA on each (end of) day/bar. So, this means that I have 15 d

1条回答
  •  予麋鹿
    予麋鹿 (楼主)
    2021-02-10 21:45

    I have not find a satisfactory solution for my question using R. So I took the old tool, c language, and results are better than I would have ever expected. Thanks for "pushing" me using this great tools of Rcpp, inline etc. Amazing. I guess, whenever I have performance requirements in the future and can not be met using R I will add C to R and performance is there. So, please see below my code and resolution of the performance issues.

    # How to speedup cumulative EMA calculation
    # 
    ###############################################################################
    
    library(quantmod)
    library(Rcpp)
    library(inline)
    library(rbenchmark)
    
    do.call.rbind <- function(lst) {
        while(length(lst) > 1) {
            idxlst <- seq(from=1, to=length(lst), by=2)
    
            lst <- lapply(idxlst, function(i) {
                        if(i==length(lst)) { return(lst[[i]]) }
    
                        return(rbind(lst[[i]], lst[[i+1]]))
                    })
        }
        lst[[1]]
    }
    
    to.period.cumulative <- function(x, name=NULL, period="days", numPeriods=15) {
        if(is.null(name))
            name <- deparse(substitute(x))
    
        cnames <- c("Open", "High", "Low", "Close")
        if (has.Vo(x)) 
            cnames <- c(cnames, "Volume")
    
        cnames <- paste(name, cnames, sep=".") 
    
        if (quantmod:::is.OHLCV(x)) {
            x <- quantmod:::OHLCV(x)
            out <- do.call.rbind( 
                    lapply(split(x, f=period, k=numPeriods), 
                            function(x) cbind(rep(first(x[,1]), NROW(x[,1])), 
                                    cummax(x[,2]), cummin(x[,3]), x[,4], cumsum(x[,5]))))
        } else if (quantmod:::is.OHLC(x)) {
            x <- OHLC(x)
            out <- do.call.rbind( 
                    lapply(split(x, f=period, k=numPeriods), 
                            function(x) cbind(rep(first(x[,1]), NROW(x[,1])), 
                                    cummax(x[,2]), cummin(x[,3]), x[,4])))
        } else {
            stop("Object does not have OHLC(V).")
        }
    
        colnames(out) <- cnames
    
        return(out)
    }
    
    EMA.cumulative<-function(cumulativeBars, nEMA = 4, period="days", numPeriods=15) {
        barsEndptCl <- Cl(cumulativeBars[endpoints(cumulativeBars, on=period, k=numPeriods)])
    
        # TODO: This is sloooooooooooooooooow... 
        outEMA <- do.call.rbind(
                lapply(split(Cl(cumulativeBars), period), 
                        function(x) {
                            previousFullBars <- barsEndptCl[index(barsEndptCl) < last(index(x)), ]
                            if (NROW(previousFullBars) >= (nEMA - 1)) {
                                    last(EMA(last(rbind(previousFullBars, x), n=(nEMA + 1)), n=nEMA))
                            } else {
                                xts(NA, order.by=index(x))
                            }
                        }))
    
        colnames(outEMA) <- paste("EMA", nEMA, sep="")
    
        return(outEMA)
    }
    
    EMA.c.c.code <- '
        /* Initalize loop and PROTECT counters */
        int i, P=0;
    
        /* ensure that cumbars and fullbarsrep is double */
        if(TYPEOF(cumbars) != REALSXP) {
          PROTECT(cumbars = coerceVector(cumbars, REALSXP)); P++;
        }
    
        /* Pointers to function arguments */
        double *d_cumbars = REAL(cumbars);
        int i_nper = asInteger(nperiod);
        int i_n = asInteger(n);
        double d_ratio = asReal(ratio);
    
        /* Input object length */
        int nr = nrows(cumbars);
    
        /* Initalize result R object */
        SEXP result;
        PROTECT(result = allocVector(REALSXP,nr)); P++;
        double *d_result = REAL(result);
    
        /* Find first non-NA input value */
        int beg = i_n*i_nper - 1;
        d_result[beg] = 0;
        for(i = 0; i <= beg; i++) {
            /* Account for leading NAs in input */
            if(ISNA(d_cumbars[i])) {
                d_result[i] = NA_REAL;
                beg++;
                d_result[beg] = 0;
                continue;
            }
            /* Set leading NAs in output */
            if(i < beg) {
                d_result[i] = NA_REAL;
            }
            /* Raw mean to start EMA - but only on full bars*/
            if ((i != 0) && (i%i_nper == (i_nper - 1))) {
                d_result[beg] += d_cumbars[i] / i_n;
            }
        }
    
        /* Loop over non-NA input values */
        int i_lookback = 0;
        for(i = beg+1; i < nr; i++) {
            i_lookback = i%i_nper;
    
            if (i_lookback == 0) {
                i_lookback = 1;
            } 
            /*Previous result should be based only on full bars*/
            d_result[i] = d_cumbars[i] * d_ratio + d_result[i-i_lookback] * (1-d_ratio);
        }
    
        /* UNPROTECT R objects and return result */
        UNPROTECT(P);
        return(result);
    '
    
    EMA.c.c <- cfunction(signature(cumbars="numeric", nperiod="numeric", n="numeric",     ratio="numeric"), EMA.c.c.code)
    
    EMA.cumulative.c<-function(cumulativeBars, nEMA = 4, period="days", numPeriods=15) {
        ratio <- 2/(nEMA+1)
    
        outEMA <- EMA.c.c(cumbars=Cl(cumulativeBars), nperiod=numPeriods, n=nEMA, ratio=ratio)  
    
        outEMA <- reclass(outEMA, Cl(cumulativeBars))
    
        colnames(outEMA) <- paste("EMA", nEMA, sep="")
    
        return(outEMA)
    }
    
    getSymbols("SPY", from="2010-01-01")
    
    SPY.cumulative <- to.period.cumulative(SPY, name="SPY")
    
    system.time(
            SPY.EMA <- EMA.cumulative(SPY.cumulative)
    )
    
    system.time(
            SPY.EMA.c <- EMA.cumulative.c(SPY.cumulative)
    )
    
    
    res <- benchmark(EMA.cumulative(SPY.cumulative), EMA.cumulative.c(SPY.cumulative),
            columns=c("test", "replications", "elapsed", "relative", "user.self", "sys.self"),
            order="relative",
            replications=10)
    
    print(res)
    

    EDIT: To give an indication of performance improvement over my cumbersome (I am sure it can be made better, since in effect I have created double for loop) R here is a print out:

    > print(res)
                                  test replications elapsed relative user.self
    2 EMA.cumulative.c(SPY.cumulative)           10   0.026    1.000     0.024
    1   EMA.cumulative(SPY.cumulative)           10  57.732 2220.462    56.755
    

    So, by my standards, a SF type of improvement...

    0 讨论(0)
提交回复
热议问题