Is the “*apply” family really not vectorized?

前端 未结 4 1749
萌比男神i
萌比男神i 2020-11-22 05:25

So we are used to say to every R new user that \"apply isn\'t vectorized, check out the Patrick Burns R Inferno Circle 4\" which says (I quote):

<
4条回答
  •  囚心锁ツ
    2020-11-22 05:53

    First of all, in your example you make tests on a "data.frame" which is not fair for colMeans, apply and "[.data.frame" since they have an overhead:

    system.time(as.matrix(m))  #called by `colMeans` and `apply`
    #   user  system elapsed 
    #   1.03    0.00    1.05
    system.time(for(i in 1:ncol(m)) m[, i])  #in the `for` loop
    #   user  system elapsed 
    #  12.93    0.01   13.07
    

    On a matrix, the picture is a bit different:

    mm = as.matrix(m)
    system.time(colMeans(mm))
    #   user  system elapsed 
    #   0.01    0.00    0.01 
    system.time(apply(mm, 2, mean))
    #   user  system elapsed 
    #   1.48    0.03    1.53 
    system.time(for(i in 1:ncol(mm)) mean(mm[, i]))
    #   user  system elapsed 
    #   1.22    0.00    1.21
    

    Regading the main part of the question, the main difference between lapply/mapply/etc and straightforward R-loops is where the looping is done. As Roland notes, both C and R loops need to evaluate an R function in each iteration which is the most costly. The really fast C functions are those that do everything in C, so, I guess, this should be what "vectorised" is about?

    An example where we find the mean in each of a "list"s elements:

    (EDIT May 11 '16 : I believe the example with finding the "mean" is not a good setup for the differences between evaluating an R function iteratively and compiled code, (1) because of the particularity of R's mean algorithm on "numeric"s over a simple sum(x) / length(x) and (2) it should make more sense to test on "list"s with length(x) >> lengths(x). So, the "mean" example is moved to the end and replaced with another.)

    As a simple example we could consider the finding of the opposite of each length == 1 element of a "list":

    In a tmp.c file:

    #include 
    #define USE_RINTERNALS 
    #include 
    #include 
    
    /* call a C function inside another */
    double oppC(double x) { return(ISNAN(x) ? NA_REAL : -x); }
    SEXP sapply_oppC(SEXP x)
    {
        SEXP ans = PROTECT(allocVector(REALSXP, LENGTH(x)));
        for(int i = 0; i < LENGTH(x); i++) 
            REAL(ans)[i] = oppC(REAL(VECTOR_ELT(x, i))[0]);
    
        UNPROTECT(1);
        return(ans);
    }
    
    /* call an R function inside a C function;
     * will be used with 'f' as a closure and as a builtin */    
    SEXP sapply_oppR(SEXP x, SEXP f)
    {
        SEXP call = PROTECT(allocVector(LANGSXP, 2));
        SETCAR(call, install(CHAR(STRING_ELT(f, 0))));
    
        SEXP ans = PROTECT(allocVector(REALSXP, LENGTH(x)));     
        for(int i = 0; i < LENGTH(x); i++) { 
            SETCADR(call, VECTOR_ELT(x, i));
            REAL(ans)[i] = REAL(eval(call, R_GlobalEnv))[0];
        }
    
        UNPROTECT(2);
        return(ans);
    }
    

    And in R side:

    system("R CMD SHLIB /home/~/tmp.c")
    dyn.load("/home/~/tmp.so")
    

    with data:

    set.seed(007)
    myls = rep_len(as.list(c(NA, runif(3))), 1e7)
    
    #a closure wrapper of `-`
    oppR = function(x) -x
    
    for_oppR = compiler::cmpfun(function(x, f)
    {
        f = match.fun(f)  
        ans = numeric(length(x))
        for(i in seq_along(x)) ans[[i]] = f(x[[i]])
        return(ans)
    })
    

    Benchmarking:

    #call a C function iteratively
    system.time({ sapplyC =  .Call("sapply_oppC", myls) }) 
    #   user  system elapsed 
    #  0.048   0.000   0.047 
    
    #evaluate an R closure iteratively
    system.time({ sapplyRC =  .Call("sapply_oppR", myls, "oppR") }) 
    #   user  system elapsed 
    #  3.348   0.000   3.358 
    
    #evaluate an R builtin iteratively
    system.time({ sapplyRCprim =  .Call("sapply_oppR", myls, "-") }) 
    #   user  system elapsed 
    #  0.652   0.000   0.653 
    
    #loop with a R closure
    system.time({ forR = for_oppR(myls, "oppR") })
    #   user  system elapsed 
    #  4.396   0.000   4.409 
    
    #loop with an R builtin
    system.time({ forRprim = for_oppR(myls, "-") })
    #   user  system elapsed 
    #  1.908   0.000   1.913 
    
    #for reference and testing 
    system.time({ sapplyR = unlist(lapply(myls, oppR)) })
    #   user  system elapsed 
    #  7.080   0.068   7.170 
    system.time({ sapplyRprim = unlist(lapply(myls, `-`)) }) 
    #   user  system elapsed 
    #  3.524   0.064   3.598 
    
    all.equal(sapplyR, sapplyRprim)
    #[1] TRUE 
    all.equal(sapplyR, sapplyC)
    #[1] TRUE
    all.equal(sapplyR, sapplyRC)
    #[1] TRUE
    all.equal(sapplyR, sapplyRCprim)
    #[1] TRUE
    all.equal(sapplyR, forR)
    #[1] TRUE
    all.equal(sapplyR, forRprim)
    #[1] TRUE
    

    (Follows the original example of mean finding):

    #all computations in C
    all_C = inline::cfunction(sig = c(R_ls = "list"), body = '
        SEXP tmp, ans;
        PROTECT(ans = allocVector(REALSXP, LENGTH(R_ls)));
    
        double *ptmp, *pans = REAL(ans);
    
        for(int i = 0; i < LENGTH(R_ls); i++) {
            pans[i] = 0.0;
    
            PROTECT(tmp = coerceVector(VECTOR_ELT(R_ls, i), REALSXP));
            ptmp = REAL(tmp);
    
            for(int j = 0; j < LENGTH(tmp); j++) pans[i] += ptmp[j];
    
            pans[i] /= LENGTH(tmp);
    
            UNPROTECT(1);
        }
    
        UNPROTECT(1);
        return(ans);
    ')
    
    #a very simple `lapply(x, mean)`
    C_and_R = inline::cfunction(sig = c(R_ls = "list"), body = '
        SEXP call, ans, ret;
    
        PROTECT(call = allocList(2));
        SET_TYPEOF(call, LANGSXP);
        SETCAR(call, install("mean"));
    
        PROTECT(ans = allocVector(VECSXP, LENGTH(R_ls)));
        PROTECT(ret = allocVector(REALSXP, LENGTH(ans)));
    
        for(int i = 0; i < LENGTH(R_ls); i++) {
            SETCADR(call, VECTOR_ELT(R_ls, i));
            SET_VECTOR_ELT(ans, i, eval(call, R_GlobalEnv));
        }
    
        double *pret = REAL(ret);
        for(int i = 0; i < LENGTH(ans); i++) pret[i] = REAL(VECTOR_ELT(ans, i))[0];
    
        UNPROTECT(3);
        return(ret);
    ')                    
    
    R_lapply = function(x) unlist(lapply(x, mean))                       
    
    R_loop = function(x) 
    {
        ans = numeric(length(x))
        for(i in seq_along(x)) ans[i] = mean(x[[i]])
        return(ans)
    } 
    
    R_loopcmp = compiler::cmpfun(R_loop)
    
    
    set.seed(007); myls = replicate(1e4, runif(1e3), simplify = FALSE)
    all.equal(all_C(myls), C_and_R(myls))
    #[1] TRUE
    all.equal(all_C(myls), R_lapply(myls))
    #[1] TRUE
    all.equal(all_C(myls), R_loop(myls))
    #[1] TRUE
    all.equal(all_C(myls), R_loopcmp(myls))
    #[1] TRUE
    
    microbenchmark::microbenchmark(all_C(myls), 
                                   C_and_R(myls), 
                                   R_lapply(myls), 
                                   R_loop(myls), 
                                   R_loopcmp(myls), 
                                   times = 15)
    #Unit: milliseconds
    #            expr       min        lq    median        uq      max neval
    #     all_C(myls)  37.29183  38.19107  38.69359  39.58083  41.3861    15
    #   C_and_R(myls) 117.21457 123.22044 124.58148 130.85513 169.6822    15
    #  R_lapply(myls)  98.48009 103.80717 106.55519 109.54890 116.3150    15
    #    R_loop(myls) 122.40367 130.85061 132.61378 138.53664 178.5128    15
    # R_loopcmp(myls) 105.63228 111.38340 112.16781 115.68909 128.1976    15
    

提交回复
热议问题