问题
I am trying to implement following Rcpp code by calling from R. The computing time is extremely slow. There are lots of for loops involved.
#include <RcppArmadillo.h>
using namespace Rcpp;
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::export]]
arma::mat qpart(
const int& n,
const int& p,
const int& m,
arma::vec& G,
arma::vec& ftime,
arma::vec& cause,
arma::mat& covs,
arma::mat& S1byS0hat,
arma::vec& S0hat,
arma::vec& expz){
arma::mat q(n,p);
q.zeros();
for(int u=0;u<n;++u){
arma::mat q1(1,p);
q1.zeros();
for(int iprime=0;iprime<n;++iprime){
for(int i=0;i<n;++i){
if(cause(iprime)==1 & cause(i)>1 & (ftime(i) < ftime(u)) & (ftime(u) <= ftime(iprime))){
q1 += (covs.row(i) - S1byS0hat.row(iprime))*G(iprime)/G(i)*expz(i)/S0hat(iprime);
}
}
}
q.row(u) = q1/(m*m);
}
return q;
}
Following is an example in R.
#### In R ########
n = 2000
m = 500
p=3
G = runif(n)
ftime = runif(n,0.01,5)
cause = c(rep(0,600),rep(1,1000),rep(2,400))
covs = matrix(rnorm(n*p),n,p)
S1byS0hat = matrix(rnorm(n*p),p,n)
S0hat = rnorm(n)
expz = rnorm(n)
system.time( qpart(n,p,m,G,ftime,cause,covs,t(S1byS0hat),S0hat,expz))
user system elapsed
21.5 0.0 21.5
As we can see, the computing time is very high.
Same code implemented in R and the computing time is very high.
q = matrix(0,n,p)
for(u in 1 : n){
q1 <- matrix(0,p,1)
for(iprime in 1 : n){
for(i in 1 : n){
if(cause[iprime]==1 & cause[i]>1 & (time[i]<time[u]) & (time[u] <= time[iprime])){
q1 = q1 + (covs[i,] - S1byS0hat[,iprime])*G[iprime]/G[i]*expz[i]/S0hat[iprime]
}
}
}
q[u,] = q1/(m*m)
}
Following is the formula that I am trying to implement.
回答1:
Some conditions depends only on u
and iprime
so you can check them much before.
You can also precompute some stuff. This gives:
arma::mat qpart2(
double m,
arma::vec& ftime,
arma::vec& cause,
arma::mat& covs,
arma::mat& S1byS0hat,
arma::vec& G_div_S0hat,
arma::vec& expz_div_G){
double m2 = m * m;
int n = covs.n_rows;
int p = covs.n_cols;
arma::mat q(n, p, arma::fill::zeros);
for (int u = 0; u < n; u++) {
double ftime_u = ftime(u);
for (int iprime = 0; iprime < n; iprime++) {
if (cause(iprime) == 1 && ftime_u <= ftime(iprime)) {
for (int i = 0; i < n; i++) {
if (cause(i) > 1 && ftime(i) < ftime_u) {
double coef = G_div_S0hat(iprime) * expz_div_G(i);
for (int j = 0; j < p; j++) {
q(u, j) += (covs(i, j) - S1byS0hat(iprime, j)) * coef;
}
}
}
}
}
for (int j = 0; j < p; j++) q(u, j) /= m2;
}
return q;
}
Using qpart2(m, ftime, cause, covs, t(S1byS0hat), G / S0hat, expz / G)
takes 3.7 sec (vs 32 sec for your code).
**
Small remarks:
- Is there a reason why you are using arma structures instead of Rcpp ones?
- You should access matrices by columns, not by rows, it should be a bit faster because they are stored column-wise.
来源:https://stackoverflow.com/questions/59911329/how-to-make-rcpp-code-efficient-with-multiple-for-loops