atoi on a character array with lots of integers

前端 未结 2 402
感情败类
感情败类 2021-01-16 15:02

I have a code in which the character array is populated by integers (converted to char arrays), and read by another function which reconverts it back to integers. I have use

相关标签:
2条回答
  • 2021-01-16 15:23

    You are copying only a 4 characters (dependent on your system's pointer width). This will leave numbers of 4+ characters non-null terminated, leading to runaway strings in the input to atoi

     sizeof(str.c_str()) //i.e. sizeof(char*) = 4 (32 bit systems)
    

    should be

     str.length() + 1
    

    Or the characters will not be nullterminated

    STL Only:

    make_testdata(): see all the way down

    Why don't you use streams...?

    #include <sstream>
    #include <iostream>
    #include <algorithm>
    #include <iterator>
    #include <string>
    #include <vector>
    
    int main()
    {
        std::vector<int> data = make_testdata();
    
        std::ostringstream oss;
        std::copy(data.begin(), data.end(), std::ostream_iterator<int>(oss, "\t"));
    
        std::stringstream iss(oss.str());
    
        std::vector<int> clone;
        std::copy(std::istream_iterator<int>(iss), std::istream_iterator<int>(),
                  std::back_inserter(clone));
    
        //verify that clone now contains the original random data:
        //bool ok = std::equal(data.begin(), data.end(), clone.begin());
    
        return 0;
    }
    

    You could do it a lot faster in plain C with atoi/itoa and some tweaks, but I reckon you should be using binary transmission (see Boost Spirit Karma and protobuf for good libraries) if you need the speed.

    Boost Karma/Qi:

    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/karma.hpp>
    
    namespace qi=::boost::spirit::qi;
    namespace karma=::boost::spirit::karma;
    
    static const char delimiter = '\0';
    
    int main()
    {
        std::vector<int> data = make_testdata();
    
        std::string astext;
    //  astext.reserve(3 * sizeof(data[0]) * data.size()); // heuristic pre-alloc
        std::back_insert_iterator<std::string> out(astext);
    
        {
            using namespace karma;
            generate(out, delimit(delimiter) [ *int_ ], data);
        //  generate_delimited(out, *int_, delimiter, data); // equivalent
        //  generate(out, int_ % delimiter, data); // somehow much slower!
        }
    
        std::string::const_iterator begin(astext.begin()), end(astext.end());
        std::vector<int> clone;
        qi::parse(begin, end, qi::int_ % delimiter, clone);
    
        //verify that clone now contains the original random data:
        //bool ok = std::equal(data.begin(), data.end(), clone.begin());
    
        return 0;
    }
    

    If you wanted to do architecture independent binary serialization instead, you'd use this tiny adaptation making things a zillion times faster (see benchmark below...):

    karma::generate(out, *karma::big_dword, data);
    // ...
    qi::parse(begin, end, *qi::big_dword, clone);
    

    Boost Serialization

    The best performance can be reached when using Boost Serialization in binary mode:

    #include <sstream>
    #include <boost/archive/binary_oarchive.hpp>
    #include <boost/archive/binary_iarchive.hpp>
    #include <boost/serialization/vector.hpp>
    
    int main()
    {
        std::vector<int> data = make_testdata();
    
        std::stringstream ss;
        {
            boost::archive::binary_oarchive oa(ss);
            oa << data;
        }
    
        std::vector<int> clone;
        {
            boost::archive::binary_iarchive ia(ss);
            ia >> clone;
        }
    
        //verify that clone now contains the original random data:
        //bool ok = std::equal(data.begin(), data.end(), clone.begin());
    
        return 0;
    }
    

    Testdata

    (common to all versions above)

    #include <boost/random.hpp>
    
    // generates a deterministic pseudo-random vector of 32Mio ints
    std::vector<int> make_testdata()
    {
        std::vector<int> testdata;
    
        testdata.resize(2 << 24);
        std::generate(testdata.begin(), testdata.end(), boost::mt19937(0));
    
        return testdata;
    }
    

    Benchmarks

    I benchmarked it by

    • using input data of 2<<24 (33554432) random integers
    • not displaying output (we don't want to measure the scrolling performance of our terminal)
    • the rough timings were
      • STL only version isn't too bad actually at 12.6s
      • Karma/Qi text version ran in 18s 5.1s, thanks to Arlen's hint at generate_delimited :)
      • Karma/Qi binary version (big_dword) in only 1.4s (roughly 12x 3-4x as fast)
      • Boost Serialization takes the cake with around 0.8s (or when subsituting text archives instead of binaries, around 13s)
    0 讨论(0)
  • 2021-01-16 15:27

    There is absolutely no reason for the Karma/Qi text version to be any slower than the STL version. I improved @sehe implementation of the Karma/Qi text version to reflect that claim.

    The following Boost Karma/Qi text version is more than twice as fast as the STL version:

    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/karma.hpp>
    #include <boost/random.hpp>
    #include <boost/spirit/include/phoenix_core.hpp>
    #include <boost/spirit/include/phoenix_operator.hpp>
    #include <boost/spirit/include/phoenix_stl.hpp>
    
    namespace ascii = boost::spirit::ascii;
    namespace qi = boost::spirit::qi;
    namespace karma = boost::spirit::karma;
    namespace phoenix = boost::phoenix;
    
    
    template <typename OutputIterator>
    void generate_numbers(OutputIterator& sink, const std::vector<int>& v){
    
      using karma::int_;
      using karma::generate_delimited;
      using ascii::space;
    
      generate_delimited(sink, *int_, space, v);
    }
    
    template <typename Iterator>
    void parse_numbers(Iterator first, Iterator last, std::vector<int>& v){
    
      using qi::int_;
      using qi::phrase_parse;
      using ascii::space;
      using qi::_1;
      using phoenix::push_back;
      using phoenix::ref;
    
      phrase_parse(first, last, *int_[push_back(ref(v), _1)], space);
    }
    
    int main(int argc, char* argv[]){
    
      static boost::mt19937 rng(0); // make test deterministic
      std::vector<int> data;
      data.resize(2 << 24);
      std::generate(data.begin(), data.end(), rng);
    
      std::string astext;
      std::back_insert_iterator<std::string> out(astext);
      generate_numbers(out, data);
    
      //std::cout << astext << std::endl;
    
      std::string::const_iterator begin(astext.begin()), end(astext.end());
      std::vector<int> clone;
      parse_numbers(begin, end, clone);
    
      //verify that clone now contains the original random data:
      //std::copy(clone.begin(), clone.end(), std::ostream_iterator<int>(std::cout, ","));
    
      return 0;
    }
    
    0 讨论(0)
提交回复
热议问题