I want to sort a large array of integers (say 1 millon elements) lexicographically.
Example:
input [] = { 100, 21 , 22 , 99 , 1 , 927 }
sorted[] = { 1
Here's another algorithm which does some of the computation before sorting. It seems to be quite fast, despite the additional copying (see comparisons).
Note:
std::numeric_limits::max()/10
N.B. you can optimize count_digits
and my_pow10
; for example, see Three Optimization Tips for C++ from Andrei Alexandrescu and Any way faster than pow() to compute an integer power of 10 in C++?
Helpers:
#include
#include
#include
#include
#include
#include
#include
#include
// non-optimized version
int count_digits(int p) // returns `0` for `p == 0`
{
int res = 0;
for(; p != 0; ++res)
{
p /= 10;
}
return res;
}
// non-optimized version
int my_pow10(unsigned exp)
{
int res = 1;
for(; exp != 0; --exp)
{
res *= 10;
}
return res;
}
Algorithm (note - not in-place):
// helper to provide integers with the same number of digits
template
std::pair lexicographic_pair_helper(T const p, U const maxDigits)
{
auto const digits = count_digits(p);
// append zeros so that `l` has `maxDigits` digits
auto const l = static_cast( p * my_pow10(maxDigits-digits) );
return {l, p};
}
template
using pair_vec
= std::vector::value_type,
typename std::iterator_traits::value_type>>;
template
pair_vec lexicographic_sort(RaIt p_beg, RaIt p_end)
{
if(p_beg == p_end) return {};
auto max = *std::max_element(p_beg, p_end);
auto maxDigits = count_digits(max);
pair_vec result;
result.reserve( std::distance(p_beg, p_end) );
for(auto i = p_beg; i != p_end; ++i)
result.push_back( lexicographic_pair_helper(*i, maxDigits) );
using value_type = typename pair_vec::value_type;
std::sort(begin(result), end(result),
[](value_type const& l, value_type const& r)
{
if(l.first < r.first) return true;
if(l.first > r.first) return false;
return l.second < r.second; }
);
return result;
}
Usage example:
int main()
{
std::vector input = { 100, 21 , 22 , 99 , 1 , 927 };
// generate some numbers
/*{
constexpr int number_of_elements = 1E6;
std::random_device rd;
std::mt19937 gen( rd() );
std::uniform_int_distribution<>
dist(0, std::numeric_limits::max()/10);
for(int i = 0; i < number_of_elements; ++i)
input.push_back( dist(gen) );
}*/
std::cout << "unsorted: ";
for(auto const& e : input) std::cout << e << ", ";
std::cout << "\n\n";
auto sorted = lexicographic_sort(begin(input), end(input));
std::cout << "sorted: ";
for(auto const& e : sorted) std::cout << e.second << ", ";
std::cout << "\n\n";
}