I am trying to manipulate column data in a two column matrix and output it as a data.frame.
The matrix that I have is in this format where both the values in the sta
You could use Rcpp:
start_end <- matrix(c(1, 6, 2, 9, 3, 15, 7, NA, 8, NA, 11, NA, 12, NA, 14, NA),
nrow=8,
ncol=2, byrow = TRUE)
library(Rcpp)
cppFunction('
DataFrame fun(const IntegerMatrix& Mat) {
IntegerVector start = na_omit(Mat(_, 0)); // remove NAs from starts
std::sort(start.begin(), start.end()); // sort starts
IntegerVector end = na_omit(Mat(_, 1)); // remove NAs from ends
std::sort(end.begin(), end.end()); // sort ends
IntegerVector res = clone(start); // initialize vector for matching ends
int j = 0;
for (int i = 0; i < start.length(); i++) { // loop over starts
while (end(j) < start(i) && j < (end.length() - 1)) { // find corresponding end
j++;
}
if (end(j) >= start(i)) res(i) = end(j); // assign end
else res(i) = NA_INTEGER; // assign NA if no end >= start exists
}
return DataFrame::create(_["start"]= start, _["end"]= res); // return a data.frame
}
')
Res <- fun(start_end)
library(data.table)
setDT(Res)
Res[, .(start = paste(start, collapse = ",")), by = end]
# end start
#1: 6 1,2,3
#2: 9 7,8
#3: 15 11,12,14