For two logical vectors, x
and y
, of length > 1E8, what is the fastest way to calculate the 2x2 cross tabulations?
I suspect the answer is to w
Here is an answer using Rcpp sugar.
N <- 1e8
x <- sample(c(T,F),N,replace=T)
y <- sample(c(T,F),N,replace=T)
func_logical <- function(v1,v2){
return(c(sum(v1 & v2), sum(v1 & !v2), sum(!v1 & v2), sum(!v1 & !v2)))
}
library(Rcpp)
library(inline)
doCrossTab1 <- cxxfunction(signature(x="integer", y = "integer"), body='
Rcpp::LogicalVector Vx(x);
Rcpp::LogicalVector Vy(y);
Rcpp::IntegerVector V(4);
V[0] = sum(Vx*Vy);
V[1] = sum(Vx*!Vy);
V[2] = sum(!Vx*Vy);
V[3] = sum(!Vx*!Vy);
return( wrap(V));
'
, plugin="Rcpp")
system.time(doCrossTab1(x,y))
require(bit)
system.time(
{
xb <- as.bit(x)
yb <- as.bit(y)
func_logical(xb,yb)
})
which results in:
> system.time(doCrossTab1(x,y))
user system elapsed
1.067 0.002 1.069
> system.time(
+ {
+ xb <- as.bit(x)
+ yb <- as.bit(y)
+ func_logical(xb,yb)
+ })
user system elapsed
1.451 0.001 1.453
So, we can get a little speed up over the bit package, though I'm surprised at how competitive the times are.
Update: In honor of Iterator, here is a Rcpp iterator solution:
doCrossTab2 <- cxxfunction(signature(x="integer", y = "integer"), body='
Rcpp::LogicalVector Vx(x);
Rcpp::LogicalVector Vy(y);
Rcpp::IntegerVector V(4);
V[0]=V[1]=V[2]=V[3]=0;
LogicalVector::iterator itx = Vx.begin();
LogicalVector::iterator ity = Vy.begin();
while(itx!=Vx.end()){
V[0] += (*itx)*(*ity);
V[1] += (*itx)*(!*ity);
V[2] += (!*itx)*(*ity);
V[3] += (!*itx)*(!*ity);
itx++;
ity++;
}
return( wrap(V));
'
, plugin="Rcpp")
system.time(doCrossTab2(x,y))
# user system elapsed
# 0.780 0.001 0.782