Sort by proxy (or: sort one container by the contents of another) in C++

前端 未结 7 865
失恋的感觉
失恋的感觉 2020-12-19 02:34

I have a set of data which is split into two arrays (let\'s call them data and keys). That is, for any given item with an index i, I c

相关标签:
7条回答
  • 2020-12-19 03:18

    This problem really got me thinking. I came up with a solution that makes use of some C++0x features to get a very STL-like parallel_sort algorithm. In order to perform the sort "in-place", I had to write a back_remove_iterator as the counterpart of back_insert_iterator to allow the algorithm to read from and write to the same container. You can skip over those parts and go straight to the interesting stuff.

    I haven't put it through any hardcore testing, but it seems reasonably efficient in both time and space, principally due to the use of std::move() to prevent unnecessary copying.

    #include <algorithm>
    #include <iostream>
    #include <string>
    #include <vector>
    
    
    //
    // An input iterator that removes elements from the back of a container.
    // Provided only because the standard library neglects one.
    //
    template<class Container>
    class back_remove_iterator :
        public std::iterator<std::input_iterator_tag, void, void, void, void> {
    public:
    
    
        back_remove_iterator() : container(0) {}
        explicit back_remove_iterator(Container& c) : container(&c) {}
    
        back_remove_iterator& operator=
            (typename Container::const_reference value) { return *this; }
    
        typename Container::value_type operator*() {
    
            typename Container::value_type value(container->back());
            container->pop_back();
            return value;
    
        } // operator*()
    
        back_remove_iterator& operator++() { return *this; }
        back_remove_iterator operator++(int) { return *this; }
    
    
        Container* container;
    
    
    }; // class back_remove_iterator
    
    
    //
    // Equivalence operator for back_remove_iterator. An iterator compares equal
    // to the end iterator either if it is default-constructed or if its
    // container is empty.
    //
    template<class Container>
    bool operator==(const back_remove_iterator<Container>& a,
        const back_remove_iterator<Container>& b) {
    
        return !a.container ? !b.container || b.container->empty() :
            !b.container ? !a.container || a.container->empty() :
            a.container == b.container;
    
    } // operator==()
    
    
    //
    // Inequivalence operator for back_remove_iterator.
    //
    template<class Container>
    bool operator!=(const back_remove_iterator<Container>& a,
        const back_remove_iterator<Container>& b) {
    
        return !(a == b);
    
    } // operator!=()
    
    
    //
    // A handy way to default-construct a back_remove_iterator.
    //
    template<class Container>
    back_remove_iterator<Container> back_remover() {
    
        return back_remove_iterator<Container>();
    
    } // back_remover()
    
    
    //
    // A handy way to construct a back_remove_iterator.
    //
    template<class Container>
    back_remove_iterator<Container> back_remover(Container& c) {
    
        return back_remove_iterator<Container>(c);
    
    } // back_remover()
    
    
    //
    // A comparison functor that sorts std::pairs by their first element.
    //
    template<class A, class B>
    struct sort_pair_by_first {
    
        bool operator()(const std::pair<A, B>& a, const std::pair<A, B>& b) {
    
            return a.first < b.first;
    
        } // operator()()
    
    }; // struct sort_pair_by_first
    
    
    //
    // Performs a parallel sort of the ranges [keys_first, keys_last) and
    // [values_first, values_last), preserving the ordering relation between
    // values and keys. Sends key and value output to keys_out and values_out.
    //
    // This works by building a vector of std::pairs, sorting them by the key
    // element, then returning the sorted pairs as two separate sequences. Note
    // the use of std::move() for a vast performance improvement.
    //
    template<class A, class B, class I, class J, class K, class L>
    void parallel_sort(I keys_first, I keys_last, J values_first, J values_last,
                       K keys_out, L values_out) {
    
        typedef std::vector< std::pair<A, B> > Pairs;
        Pairs sorted;
    
        while (keys_first != keys_last)
            sorted.push_back({std::move(*keys_first++), std::move(*values_first++)});
    
        std::sort(sorted.begin(), sorted.end(), sort_pair_by_first<A, B>());
    
        for (auto i = sorted.begin(); i != sorted.end(); ++i)
            *keys_out++ = std::move(i->first),
            *values_out++ = std::move(i->second);
    
    } // parallel_sort()
    
    
    int main(int argc, char** argv) {
    
        //
        // There is an ordering relation between keys and values,
        // but the sets still need to be sorted. Sounds like a job for...
        //
        std::vector<int> keys{0, 3, 1, 2};
        std::vector<std::string> values{"zero", "three", "one", "two"};
    
        //
        // parallel_sort! Unfortunately, the key and value types do need to
        // be specified explicitly. This could be helped with a utility
        // function that accepts back_remove_iterators.
        //
        parallel_sort<int, std::string>
            (back_remover(keys), back_remover<std::vector<int>>(),
            back_remover(values), back_remover<std::vector<std::string>>(),
            std::back_inserter(keys), std::back_inserter(values));
    
        //
        // Just to prove that the mapping is preserved.
        //
        for (unsigned int i = 0; i < keys.size(); ++i)
            std::cout << keys[i] << ": " << values[i] << '\n';
    
        return 0;
    
    } // main()
    

    I hope this proves useful, or at least entertaining.

    0 讨论(0)
  • 2020-12-19 03:22

    It turns out that Boost contains an iterator which does pretty much what the paired_iterator from my other answer does:

    Boost.Iterator Zip Iterator

    This seems like the best option.

    0 讨论(0)
  • 2020-12-19 03:23

    You could use a map:

    int main() {
      vector<int> keys;
      vector<string> data;
      keys.push_back(5); data.push_back("joe");
      keys.push_back(2); data.push_back("yaochun");
      keys.push_back(1); data.push_back("holio");
    
      // load the keys and data to the map (they will automatically be inserted in sorted order by key)
      map<int, string> sortedVals;
      for(int i = 0; i < (int)keys.size(); ++i) {
        sortedVals[keys[i]] = data[i];
      }
    
      // copy the map values back to vectors  
      int ndx=0;
      for(map<int, string>::iterator it = sortedVals.begin(); it != sortedVals.end(); ++it) {
        keys[ndx] = it->first;
        data[ndx] = it->second;
        ++ndx;
      }
    
      // verify
      for(int i = 0; i < (int)keys.size(); ++i) {
        cout<<keys[i]<<" "<<data[i]<<endl;
      }
    
      return 0;
    }
    

    Here's the output:

    ---------- Capture Output ----------
    > "c:\windows\system32\cmd.exe" /c c:\temp\temp.exe
    1 holio
    2 yaochun
    5 joe
    
    > Terminated with exit code 0.
    
    0 讨论(0)
  • 2020-12-19 03:24

    You can use functors to do the sorting, for example:

    template <class T>
    struct IndexFunctor {
      IndexFunctor(const std::vector<T>& v_) : v(v_) {}
      bool operator ()(int a, int b) const {
        return v[a] < v[b];
      }
      const std::vector<T>& v;
    };
    
    template <class K, class D>
    void SortByKeys(std::vector<K>& keys, std::vector<D>& data) {
      // Calculate the valid order inside a permutation array p.
      const int n = static_cast<int>(keys.size());
      std::vector<int> p(n);
      for (int i = 0; i < n; ++i) p[i] = i;
      std::sort(p.begin(), p.end(), IndexFunctor(keys));
    
      // Reorder the keys and data in temporary arrays, it cannot be done in place.
      std::vector<K> aux_keys(n);
      std::vector<D> aux_data(n);
      for (int i = 0; i < n; ++i) {
        aux_keys[i] = keys[p[i]];
        aux_data[i] = data[p[i]];
      }
    
      // Assign the ordered vectors by swapping, it should be faster.
      keys.swap(aux_keys);
      data.swap(aux_data);
    }
    
    0 讨论(0)
  • 2020-12-19 03:26

    Here is a sample implementation which defines a new iterator type to provide a paired view over two sequences. I have tried to make it standards compliant and correct, but since the C++ standard is hideously complex in its details, I am almost certain that I have failed. I will say that this code appears to work when built with clang++ or g++.

    This code is not recommended for general use, since it is longer and less understandable than the other answers, and possibly invokes the dreaded 'undefined behaviour'.

    However, it does have the advantage of constant time and space overhead since it provides a view on existing data rather than actually building a temporary alternate representation or permutation vector. The most obvious (to me) performance problem with this code is that individual elements of the two containers will have to be copied during the swap operation. Despite several attempts, I have not found a way to successfully specialise std::swap such that std::sort or std::random_shuffle will avoid using the default temporary-copy based swap implementation. It is possible that use of the C++0x rvalue reference system (see std::move and Jon Purdy's answer) could solve this.

    #ifndef HDR_PAIRED_ITERATOR
    #define HDR_PAIRED_ITERATOR
    
    #include <iterator>
    
    /// pair_view mostly looks like a std::pair,
    /// and can decay to a std::pair, but is really a pair of references
    template <typename ItA, typename ItB>
    struct pair_view {
        typedef typename ItA::value_type first_type;
        typedef typename ItB::value_type second_type;
    
        typedef std::pair<first_type, second_type> pair_type;
    
        pair_view() {}
        pair_view(const ItA &a, const ItB &b):
            first(*a), second(*b) {}
    
        pair_view &operator=(const pair_view &x)
            { first = x.first; second = x.second; return *this; }
        pair_view &operator=(const pair_type &x)
            { first = x.first; second = x.second; return *this; }
    
        typename ItA::reference first;
        typename ItB::reference second;
        operator pair_type() const
            { return std::make_pair(first, second); }
    
        friend bool operator==(const pair_view &a, const pair_view &b)
            { return (a.first == b.first) && (a.second == b.second); }
        friend bool operator<(const pair_view &a, const pair_view &b)
            { return (a.first < b.first) || ((a.first == b.first) && (a.second < b.second)); }
        friend bool operator!=(const pair_view &a, const pair_view &b)
            { return !(a == b); }
        friend bool operator>(const pair_view &a, const pair_view &b)
            { return (b < a); }
        friend bool operator<=(const pair_view &a, const pair_view &b)
            { return !(b < a); }
        friend bool operator>=(const pair_view &a, const pair_view &b)
            { return !(a < b); }
    
        friend bool operator==(const pair_view &a, const pair_type &b)
            { return (a.first == b.first) && (a.second == b.second); }
        friend bool operator<(const pair_view &a, const pair_type &b)
            { return (a.first < b.first) || ((a.first == b.first) && (a.second < b.second)); }
        friend bool operator!=(const pair_view &a, const pair_type &b)
            { return !(a == b); }
        friend bool operator>(const pair_view &a, const pair_type &b)
            { return (b < a); }
        friend bool operator<=(const pair_view &a, const pair_type &b)
            { return !(b < a); }
        friend bool operator>=(const pair_view &a, const pair_type &b)
            { return !(a < b); }
    
        friend bool operator==(const pair_type &a, const pair_type &b)
            { return (a.first == b.first) && (a.second == b.second); }
        friend bool operator<(const pair_type &a, const pair_type &b)
            { return (a.first < b.first) || ((a.first == b.first) && (a.second < b.second)); }
        friend bool operator!=(const pair_type &a, const pair_type &b)
            { return !(a == b); }
        friend bool operator>(const pair_type &a, const pair_type &b)
            { return (b < a); }
        friend bool operator<=(const pair_type &a, const pair_type &b)
            { return !(b < a); }
        friend bool operator>=(const pair_type &a, const pair_type &b)
            { return !(a < b); }
    };
    
    template <typename ItA, typename ItB>
    struct paired_iterator {
        // --- standard iterator traits
        typedef typename pair_view<ItA, ItB>::pair_type value_type;
        typedef pair_view<ItA, ItB> reference;
        typedef paired_iterator<ItA,ItB> pointer;
    
        typedef typename std::iterator_traits<ItA>::difference_type difference_type;
        typedef std::random_access_iterator_tag iterator_category;
    
        // --- methods not required by the Random Access Iterator concept
        paired_iterator(const ItA &a, const ItB &b):
            a(a), b(b) {}
    
        // --- iterator requirements
    
        // default construction
        paired_iterator() {}
    
        // copy construction and assignment
        paired_iterator(const paired_iterator &x):
            a(x.a), b(x.b) {}
        paired_iterator &operator=(const paired_iterator &x)
            { a = x.a; b = x.b; return *this; }
    
        // pre- and post-increment
        paired_iterator &operator++()
            { ++a; ++b; return *this; }
        paired_iterator operator++(int)
            { paired_iterator tmp(*this); ++(*this); return tmp; }
    
        // pre- and post-decrement
        paired_iterator &operator--()
            { --a; --b; return *this; }
        paired_iterator operator--(int)
            { paired_iterator tmp(*this); --(*this); return tmp; }
    
        // arithmetic
        paired_iterator &operator+=(const difference_type &n)
            { a += n; b += n; return *this; }
        friend paired_iterator operator+(const paired_iterator &x, const difference_type &n)
            { return paired_iterator(x.a+n, x.b+n); }
        friend paired_iterator operator+(const difference_type &n, const paired_iterator &x)
            { return paired_iterator(x.a+n, x.b+n); }
        paired_iterator &operator-=(const difference_type &n)
            { a -= n; b -= n; return *this; }
        friend paired_iterator operator-(const paired_iterator &x, const difference_type &n)
            { return paired_iterator(x.a-n, x.b-n); }
        friend difference_type operator-(const paired_iterator &x, const paired_iterator &y)
            { return (x.a - y.a); }
    
        // (in-)equality and ordering
        friend bool operator==(const paired_iterator &x, const paired_iterator &y)
            { return (x.a == y.a) && (x.b == y.b); }
        friend bool operator<(const paired_iterator &x, const paired_iterator &y)
            { return (x.a < y.a); }
    
        // derived (in-)equality and ordering operators
        friend bool operator!=(const paired_iterator &x, const paired_iterator &y)
            { return !(x == y); }
        friend bool operator>(const paired_iterator &x, const paired_iterator &y)
            { return (y < x); }
        friend bool operator<=(const paired_iterator &x, const paired_iterator &y)
            { return !(y < x); }
        friend bool operator>=(const paired_iterator &x, const paired_iterator &y)
            { return !(x < y); }
    
        // dereferencing and random access
        reference operator*() const
            { return reference(a,b); }
        reference operator[](const difference_type &n) const
            { return reference(a+n, b+n); }
    private:
        ItA a;
        ItB b;
    };
    
    template <typename ItA, typename ItB>
    paired_iterator<ItA, ItB> make_paired_iterator(const ItA &a, const ItB &b)
    { return paired_iterator<ItA, ItB>(a, b); }
    
    #endif
    
    
    #include <vector>
    #include <algorithm>
    #include <iostream>
    
    template <typename ItA, typename ItB>
    void print_kvs(const ItA &k0, const ItB &v0, const ItA &kn, const ItB &vn) {
        ItA k(k0);
        ItB v(v0);
        while (k != kn || v != vn) {
            if (k != kn && v != vn)
                std::cout << "[" << *k << "] = " << *v << "\n";
            else if (k != kn)
                std::cout << "[" << *k << "]\n";
            else if (v != vn)
                std::cout << "[?] = " << *v << "\n";
    
            if (k != kn) ++k;
            if (v != vn) ++v;
        }
        std::cout << std::endl;
    }
    
    int main() {
        std::vector<int> keys;
        std::vector<std::string> data;
    
        keys.push_back(0); data.push_back("zero");
        keys.push_back(1); data.push_back("one");
        keys.push_back(2); data.push_back("two");
        keys.push_back(3); data.push_back("three");
        keys.push_back(4); data.push_back("four");
        keys.push_back(5); data.push_back("five");
        keys.push_back(6); data.push_back("six");
        keys.push_back(7); data.push_back("seven");
        keys.push_back(8); data.push_back("eight");
        keys.push_back(9); data.push_back("nine");
    
        print_kvs(keys.begin(), data.begin(), keys.end(), data.end());
    
        std::cout << "Shuffling\n";
        std::random_shuffle(
            make_paired_iterator(keys.begin(), data.begin()),
            make_paired_iterator(keys.end(), data.end())
        );
    
        print_kvs(keys.begin(), data.begin(), keys.end(), data.end());
    
        std::cout << "Sorting\n";
        std::sort(
            make_paired_iterator(keys.begin(), data.begin()),
            make_paired_iterator(keys.end(), data.end())
        );
    
        print_kvs(keys.begin(), data.begin(), keys.end(), data.end());
    
        std::cout << "Sort descending\n";
        std::sort(
            make_paired_iterator(keys.begin(), data.begin()),
            make_paired_iterator(keys.end(), data.end()),
            std::greater< std::pair<int,std::string> >()
        );
    
        print_kvs(keys.begin(), data.begin(), keys.end(), data.end());
    
        return 0;
    }
    
    0 讨论(0)
  • 2020-12-19 03:26

    I don't know whether following exploitation of knowing of std::swap implementation details is UB or not. I think "no".

    #include <iostream>
    #include <iomanip>
    
    #include <type_traits>
    #include <utility>
    #include <iterator>
    #include <algorithm>
    #include <numeric>
    #include <deque>
    #include <forward_list>
    #include <vector>
    
    #include <cstdlib>
    #include <cassert>
    
    template< typename pattern_iterator, typename target_iterator >
    void
    pattern_sort(pattern_iterator pbeg, pattern_iterator pend, target_iterator tbeg, target_iterator tend)
    {
        //assert(std::distance(pbeg, pend) == std::distance(tbeg, tend));
        using pattern_traits = std::iterator_traits< pattern_iterator >;
        using target_traits = std::iterator_traits< target_iterator >;
        static_assert(std::is_base_of< std::forward_iterator_tag, typename pattern_traits::iterator_category >{});
        static_assert(std::is_base_of< std::forward_iterator_tag, typename target_traits::iterator_category >{});
        struct iterator_adaptor
        {
    
            iterator_adaptor(typename pattern_traits::reference pattern,
                             typename target_traits::reference target)
                : p(&pattern)
                , t(&target)
            { ; }
    
            iterator_adaptor(iterator_adaptor &&)
                : p(nullptr)
                , t(nullptr)
            { ; }
    
            void
            operator = (iterator_adaptor && rhs) &
            {
                if (!!rhs.p) {
                    assert(!!rhs.t);
                    std::swap(p, rhs.p);
                    std::iter_swap(t, rhs.t);
                }
            }
    
            bool
            operator < (iterator_adaptor const & rhs) const
            {
                return (*p < *rhs.p);
            }
    
        private :
    
            typename pattern_traits::pointer p;
            typename target_traits::pointer t;
    
        };
        std::deque< iterator_adaptor > proxy_; // std::vector can be used instead
        //proxy_.reserve(static_cast< std::size_t >(std::distance(pbeg, pend))); // it's (maybe) worth it if proxy_ is std::vector and if walking through whole [tbeg, tend) range is not too expensive operation (in case if target_iterator is worse then RandomAccessIterator)
        auto t = tbeg;
        auto p = pbeg;
        while (p != pend) {
            assert(t != tend);
            proxy_.emplace_back(*p, *t);
            ++p;
            ++t;
        }
        std::sort(std::begin(proxy_), std::end(proxy_));
    }
    
    int
    main()
    {
        std::forward_list< int > keys{5, 4, 3, 2, 1};
        std::vector< std::size_t > indices(static_cast< std::size_t >(std::distance(std::cbegin(keys), std::cend(keys))));
        std::iota(std::begin(indices), std::end(indices), std::size_t{0}); // indices now: 0, 1, 2, 3, 4    
        std::copy(std::cbegin(keys), std::cend(keys), std::ostream_iterator< int >(std::cout, " ")); std::cout << std::endl;
        std::copy(std::cbegin(indices), std::cend(indices), std::ostream_iterator< std::size_t >(std::cout, " ")); std::cout << std::endl;
        pattern_sort(std::cbegin(keys), std::cend(keys), std::begin(indices), std::end(indices)); std::cout << std::endl;
        std::copy(std::cbegin(keys), std::cend(keys), std::ostream_iterator< int >(std::cout, " ")); std::cout << std::endl;
        std::copy(std::cbegin(indices), std::cend(indices), std::ostream_iterator< std::size_t >(std::cout, " ")); std::cout << std::endl;
        // now one can use indices to access keys and data to as ordered containers
        return EXIT_SUCCESS;
    }
    
    0 讨论(0)
提交回复
热议问题