Hash an arbitrary precision value (boost::multiprecision::cpp_int)

后端 未结 2 1737
余生分开走
余生分开走 2020-11-28 15:53

I need to get the hash of value with arbitrary precision (from Boost.Multiprecision); I use the cpp_int backend. For now, I came up with the following code:

         


        
相关标签:
2条回答
  • 2020-11-28 16:24

    Just to say that I've just added native hashing support (for Boost.Hash and std::hash) to git develop. It works for all the number types including those from GMP etc. Unfortunately that code won't be released until Boost-1.62 now.

    The answer above that (ab)uses serialization support, is actually extremely cool and really rather clever ;) However, it wouldn't work if you wanted to use a vector-based hasher like CityHash, I added an example of using that by accessing the limbs directly to the docs: https://htmlpreview.github.io/?https://github.com/boostorg/multiprecision/blob/develop/doc/html/boost_multiprecision/tut/hash.html Either direct limb-access or the serialization tip will work with all previous releases of course.

    0 讨论(0)
  • 2020-11-28 16:34

    You can (ab)use the serialization support:

    Support for serialization comes in two forms: Classes number, debug_adaptor, logged_adaptor and rational_adaptor have "pass through" serialization support which requires the underlying backend to be serializable.

    Backends cpp_int, cpp_bin_float, cpp_dec_float and float128 have full support for Boost.Serialization.

    So, let me cobble something together that works with boost and std unordered containers:

    template <typename Map>
    void test(Map const& map) {
        std::cout << "\n" << __PRETTY_FUNCTION__ << "\n";
        for(auto& p : map)
            std::cout << p.second << "\t" << p.first << "\n";
    }
    
    int main() {
        using boost::multiprecision::cpp_int;
    
        test(std::unordered_map<cpp_int, std::string> {
            { cpp_int(1) << 111, "one"   },
            { cpp_int(2) << 222, "two"   },
            { cpp_int(3) << 333, "three" },
        });
    
        test(boost::unordered_map<cpp_int, std::string> {
            { cpp_int(1) << 111, "one"   },
            { cpp_int(2) << 222, "two"   },
            { cpp_int(3) << 333, "three" },
        });
    }
    

    Let's forward the relevant hash<> implementations to our own hash_impl specialization that uses Multiprecision and Serialization:

    namespace std {
        template <typename backend> 
        struct hash<boost::multiprecision::number<backend> > 
            : mp_hashing::hash_impl<boost::multiprecision::number<backend> > 
        {};
    }
    
    namespace boost {
        template <typename backend> 
        struct hash<multiprecision::number<backend> > 
            : mp_hashing::hash_impl<multiprecision::number<backend> > 
        {};
    }
    

    Now, of course, this begs the question, how is hash_impl implemented?

    template <typename T> struct hash_impl {
        size_t operator()(T const& v) const {
            using namespace boost;
            size_t seed = 0;
            {
                iostreams::stream<hash_sink> os(seed);
                archive::binary_oarchive oa(os, archive::no_header | archive::no_codecvt);
                oa << v;
            }
            return seed;
        }
    };
    

    This looks pretty simple. That's because Boost is awesome, and writing a hash_sink device for use with Boost Iostreams is just the following straightforward exercise:

    namespace io = boost::iostreams;
    
    struct hash_sink {
        hash_sink(size_t& seed_ref) : _ptr(&seed_ref) {}
    
        typedef char         char_type;
        typedef io::sink_tag category;
    
        std::streamsize write(const char* s, std::streamsize n) {
            boost::hash_combine(*_ptr, boost::hash_range(s, s+n));
            return n;
        }
      private:
        size_t* _ptr;
    };
    

    Full Demo:

    Live On Coliru

    #include <iostream>
    #include <iomanip>
    
    #include <boost/archive/binary_oarchive.hpp>
    #include <boost/multiprecision/cpp_int.hpp>
    #include <boost/multiprecision/cpp_int/serialize.hpp>
    #include <boost/iostreams/device/back_inserter.hpp>
    #include <boost/iostreams/stream_buffer.hpp>
    #include <boost/iostreams/stream.hpp>
    
    #include <boost/functional/hash.hpp>
    
    namespace mp_hashing {
        namespace io = boost::iostreams;
    
        struct hash_sink {
            hash_sink(size_t& seed_ref) : _ptr(&seed_ref) {}
    
            typedef char         char_type;
            typedef io::sink_tag category;
    
            std::streamsize write(const char* s, std::streamsize n) {
                boost::hash_combine(*_ptr, boost::hash_range(s, s+n));
                return n;
            }
          private:
            size_t* _ptr;
        };
    
        template <typename T> struct hash_impl {
            size_t operator()(T const& v) const {
                using namespace boost;
                size_t seed = 0;
                {
                    iostreams::stream<hash_sink> os(seed);
                    archive::binary_oarchive oa(os, archive::no_header | archive::no_codecvt);
                    oa << v;
                }
                return seed;
            }
        };
    }
    
    #include <unordered_map>
    #include <boost/unordered_map.hpp>
    
    namespace std {
        template <typename backend> 
        struct hash<boost::multiprecision::number<backend> > 
            : mp_hashing::hash_impl<boost::multiprecision::number<backend> > 
        {};
    }
    
    namespace boost {
        template <typename backend> 
        struct hash<multiprecision::number<backend> > 
            : mp_hashing::hash_impl<multiprecision::number<backend> > 
        {};
    }
    
    template <typename Map>
    void test(Map const& map) {
        std::cout << "\n" << __PRETTY_FUNCTION__ << "\n";
        for(auto& p : map)
            std::cout << p.second << "\t" << p.first << "\n";
    }
    
    int main() {
        using boost::multiprecision::cpp_int;
    
        test(std::unordered_map<cpp_int, std::string> {
            { cpp_int(1) << 111, "one"   },
            { cpp_int(2) << 222, "two"   },
            { cpp_int(3) << 333, "three" },
        });
    
        test(boost::unordered_map<cpp_int, std::string> {
            { cpp_int(1) << 111, "one"   },
            { cpp_int(2) << 222, "two"   },
            { cpp_int(3) << 333, "three" },
        });
    }
    

    Prints

    void test(const Map&) [with Map = std::unordered_map<boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<> >, std::basic_string<char> >]
    one 2596148429267413814265248164610048
    three   52494017394792286184940053450822912768476066341437098474218494553838871980785022157364316248553291776
    two 13479973333575319897333507543509815336818572211270286240551805124608
    
    void test(const Map&) [with Map = boost::unordered::unordered_map<boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<> >, std::basic_string<char> >]
    three   52494017394792286184940053450822912768476066341437098474218494553838871980785022157364316248553291776
    two 13479973333575319897333507543509815336818572211270286240551805124608
    one 2596148429267413814265248164610048
    

    As you can see, the difference in implementation between Boost's and the standard library's unordered_map show up in the different orderings for identical hashes.

    0 讨论(0)
提交回复
热议问题