I am using a datatype of std::vector
to store a 2D matrix/array. I would like to determine the unique rows of this matrix. I am l
You should also consider using hashing, it preserves row ordering and could be faster (amortized O(m*n)
if alteration of the original is permitted, O(2*m*n)
if a copy is required) than sort
/unique
-- especially noticeable for large matrices (on small matrices you are probably better off with Billy's solution since his requires no additional memory allocation to keep track of the hashes.)
Anyway, taking advantage of Boost.Unordered, here's what you can do:
#include
#include
#include
#include
#include
namespace boost {
template< typename T >
size_t hash_value(const boost::reference_wrapper< T >& v) {
return boost::hash_value(v.get());
}
template< typename T >
bool operator==(const boost::reference_wrapper< T >& lhs, const boost::reference_wrapper< T >& rhs) {
return lhs.get() == rhs.get();
}
}
// destructive, but fast if the original copy is no longer required
template
void uniqueRows_inplace(std::vector >& A)
{
boost::unordered_set< boost::reference_wrapper< std::vector< T > const > > unique(A.size());
for (BOOST_AUTO(it, A.begin()); it != A.end(); ) {
if (unique.insert(boost::cref(*it)).second) {
++it;
} else {
A.erase(it);
}
}
}
// returning a copy (extra copying cost)
template
void uniqueRows_copy(const std::vector > &A,
std::vector< std::vector< T > > &ret)
{
ret.reserve(A.size());
boost::unordered_set< boost::reference_wrapper< std::vector< T > const > > unique;
BOOST_FOREACH(const std::vector< T >& row, A) {
if (unique.insert(boost::cref(row)).second) {
ret.push_back(row);
}
}
}