I am creating a DataFrame to hold a parsed haproxy http log files which has quite a few fields (25+).
If I add more than 20 vectors (one for each field), I get the c
The other common approach with Rcpp is to just use an outer list containing as many DataFrame objects (with each limited by the number of elements provided via the old-school macro expansion / repetition) in the corresponding header) as you need.
In (untested) code:
Rcpp::DataFrame a = Rcpp::DateFrame::create(/* ... */);
Rcpp::DataFrame b = Rcpp::DateFrame::create(/* ... */);
Rcpp::DataFrame c = Rcpp::DateFrame::create(/* ... */);
return Rcpp::List::create(Rcpp::Named("a") = a,
Rcpp::Named("b") = b,
Rcpp::Named("c") = c);
Yes, you have hit a hard limit -- Rcpp
is limited by the C++98 standard, which requires explicit code bloat to support 'variadic' arguments. Essentially, a new overload must be generated for each create
function used, and to avoid choking the compiler Rcpp
just provides up to 20.
A workaround would be to use a 'builder' class, where you successively add elements, and then convert to DataFrame
at the end. A simple example of such a class -- we create a ListBuilder
object, for which we successively add
new columns. Try running Rcpp::sourceCpp()
with this file to see the output.
#include <Rcpp.h>
using namespace Rcpp;
class ListBuilder {
public:
ListBuilder() {};
~ListBuilder() {};
inline ListBuilder& add(std::string const& name, SEXP x) {
names.push_back(name);
// NOTE: we need to protect the SEXPs we pass in; there is
// probably a nicer way to handle this but ...
elements.push_back(PROTECT(x));
return *this;
}
inline operator List() const {
List result(elements.size());
for (size_t i = 0; i < elements.size(); ++i) {
result[i] = elements[i];
}
result.attr("names") = wrap(names);
UNPROTECT(elements.size());
return result;
}
inline operator DataFrame() const {
List result = static_cast<List>(*this);
result.attr("class") = "data.frame";
result.attr("row.names") = IntegerVector::create(NA_INTEGER, XLENGTH(elements[0]));
return result;
}
private:
std::vector<std::string> names;
std::vector<SEXP> elements;
ListBuilder(ListBuilder const&) {}; // not safe to copy
};
// [[Rcpp::export]]
DataFrame test_builder(SEXP x, SEXP y, SEXP z) {
return ListBuilder()
.add("foo", x)
.add("bar", y)
.add("baz", z);
}
/*** R
test_builder(1:5, letters[1:5], rnorm(5))
*/
PS: With Rcpp11, we have variadic functions and hence the limitations are removed.