Move the string out of a std::ostringstream

。_饼干妹妹 提交于 2019-11-27 04:29:18

问题


If I construct a string made of a list of space separated floating point values using std::ostringstream:

std::ostringstream ss;
unsigned int s = floatData.size();
for(unsigned int i=0;i<s;i++)
{
    ss << floatData[i] << " ";
}

Then I get the result in a std::string:

std::string textValues(ss.str());

However, this will cause an unnecessary deep copy of the string contents, as ss will not be used anymore.

Is there any way to construct the string without copying the entire content?


回答1:


std::ostringstream offers no public interface to access its in-memory buffer unless it non-portably supports pubsetbuf (but even then your buffer is fixed-size, see cppreference example)

If you want to torture some string streams, you could access the buffer using the protected interface:

#include <iostream>
#include <sstream>
#include <vector>

struct my_stringbuf : std::stringbuf {
    const char* my_str() const { return pbase(); } // pptr might be useful too
};

int main()
{
    std::vector<float> v = {1.1, -3.4, 1/7.0};
    my_stringbuf buf;
    std::ostream ss(&buf);
    for(unsigned int i=0; i < v.size(); ++i)
        ss << v[i] << ' ';
    ss << std::ends;
    std::cout << buf.my_str() << '\n';
}

The standard C++ way of directly accessing an auto-resizing output stream buffer is offered by std::ostrstream, deprecated in C++98, but still standard C++14 and counting.

#include <iostream>
#include <strstream>
#include <vector>

int main()
{
    std::vector<float> v = {1.1, -3.4, 1/7.0};
    std::ostrstream ss;
    for(unsigned int i=0; i < v.size(); ++i)
        ss << v[i] << ' ';
    ss << std::ends;
    const char* buffer = ss.str(); // direct access!
    std::cout << buffer << '\n';
    ss.freeze(false); // abomination
}

However, I think the cleanest (and the fastest) solution is boost.karma

#include <iostream>
#include <string>
#include <vector>
#include <boost/spirit/include/karma.hpp>
namespace karma = boost::spirit::karma;
int main()
{
    std::vector<float> v = {1.1, -3.4, 1/7.0};
    std::string s;
    karma::generate(back_inserter(s), karma::double_ % ' ', v);
    std::cout << s << '\n'; // here's your string
}



回答2:


I implemented "outstringstream" class, which I believe does exactly what you need (see take_str() method). I partially used code from: What is wrong with my implementation of overflow()?

#include <ostream>

template <typename char_type>
class basic_outstringstream : private std::basic_streambuf<char_type, std::char_traits<char_type>>,
                              public std::basic_ostream<char_type, std::char_traits<char_type>>
{
    using traits_type = std::char_traits<char_type>;
    using base_buf_type = std::basic_streambuf<char_type, traits_type>;
    using base_stream_type = std::basic_ostream<char_type, traits_type>;
    using int_type = typename base_buf_type::int_type;

    std::basic_string<char_type> m_str;

    int_type overflow(int_type ch) override
    {
        if (traits_type::eq_int_type(ch, traits_type::eof()))
            return traits_type::not_eof(ch);

        if (m_str.empty())
            m_str.resize(1);
        else
            m_str.resize(m_str.size() * 2);

        const std::ptrdiff_t diff = this->pptr() - this->pbase();
        this->setp(&m_str.front(), &m_str.back());

        this->pbump(diff);
        *this->pptr() = traits_type::to_char_type(ch);
        this->pbump(1);

        return traits_type::not_eof(traits_type::to_int_type(*this->pptr()));
    }

    void init()
    {
        this->setp(&m_str.front(), &m_str.back());

        const std::size_t size = m_str.size();
        if (size)
        {
            memcpy(this->pptr(), &m_str.front(), size);
            this->pbump(size);
        }
    }

public:

    explicit basic_outstringstream(std::size_t reserveSize = 8)
        : base_stream_type(this)
    {
        m_str.reserve(reserveSize);
        init();
    }

    explicit basic_outstringstream(std::basic_string<char_type>&& str)
        : base_stream_type(this), m_str(std::move(str))
    {
        init();
    }

    explicit basic_outstringstream(const std::basic_string<char_type>& str)
        : base_stream_type(this), m_str(str)
    {
        init();
    }

    const std::basic_string<char_type>& str() const
    {
        return m_str;
    }

    std::basic_string<char_type>&& take_str()
    {
        return std::move(m_str);
    }

    void clear()
    {
        m_str.clear();
        init();
    }
};

using outstringstream = basic_outstringstream<char>;
using woutstringstream = basic_outstringstream<wchar_t>;



回答3:


+1 for the Boost Karma by @Cubbi and the suggestion to "create your own streambuf-dervied type that does not make a copy, and give that to the constructor of a basic_istream<>.".

A more generic answer, though, is missing, and sits between these two. It uses Boost Iostreams:

using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;

Here's a demo program:

Live On Coliru

#include <boost/iostreams/device/back_inserter.hpp>
#include <boost/iostreams/stream_buffer.hpp>

namespace bio = boost::iostreams;

using string_buf = bio::stream_buffer<bio::back_insert_device<std::string> >;

// any code that uses ostream
void foo(std::ostream& os) {
    os << "Hello world " 
       << std::hex << std::showbase << 42
       << " " << std::boolalpha << (1==1) << "\n";
}

#include <iostream>

int main() {
    std::string output;
    output.reserve(100); // optionally optimize if you know roughly how large output is gonna, or know what minimal size it will require

    {
        string_buf buf(output);
        std::ostream os(&buf);
        foo(os);
    }

    std::cout << "Output contains: " << output;
}

Note that you can trivially replace the std::string withstd::wstring, or std::vector<char> etc.

Even better, you can use it with the array_sink device and have a fixed-size buffer. That way you can avoid any buffer allocation whatsoever with your Iostreams code!

Live On Coliru

#include <boost/iostreams/device/array.hpp>

using array_buf = bio::stream_buffer<bio::basic_array<char>>;

// ...

int main() {
    char output[100] = {0};

    {
        array_buf buf(output);
        std::ostream os(&buf);
        foo(os);
    }

    std::cout << "Output contains: " << output;
}

Both programs print:

Output contains: Hello world 0x2a true



回答4:


Update: In the face of people's continued dislike of this answer, I thought I'd make an edit and explain.

  1. No, there is no way to avoid a string copy (stringbuf has the same interface)

  2. It will never matter. It's actually more efficient that way. (I will try to explain this)

Imagine writing a version of stringbuf that keeps a perfect, moveable std::string available at all times. (I have actually tried this).

Adding characters is easy - we simply use push_back on the underlying string.

OK, but what about removing characters (reading from the buffer)? We'll have to move some pointer to account for the characters we've removed, all well and good.

However, we have a problem - the contract we're keeping that says we'll always have a std::string available.

So whenever we remove characters from the stream, we'll need to erase them from the underlying string. That means shuffling all the remaining characters down (memmove/memcpy). Because this contract must be kept every time the flow of control leaves our private implementation, this in practice means having to erase characters from the string every time we call getc or gets on the string buffer. This translates to a call to erase on every << operation on the stream.

Then of course there's the problem of implementing the pushback buffer. If you pushback characters into the underlying string, you've got to insert them at position 0 - shuffling the entire buffer up.

The long and short of it is that you can write an ostream-only stream buffer purely for building a std::string. You'll still need to deal with all the reallocations as the underlying buffer grows, so in the end you get to save exactly one string copy. So perhaps we go from 4 string copies (and calls to malloc/free) to 3, or 3 to 2.

You'll also need to deal with the problem that the streambuf interface is not split into istreambuf and ostreambuf. This means you still have to offer the input interface and either throw exceptions or assert if someone uses it. This amounts to lying to users - we've failed to implement an expected interface.

For this tiny improvement in performance, we must pay the cost of:

  1. developing a (quite complex, when you factor in locale management) software component.

  2. suffering the loss of flexibility of having a streambuf which only supports output operations.

  3. Laying landmines for future developers to step on.




回答5:


I adapted the very good @Kuba answer to fix some issues (unfortunately he's currently unresponsive). In particular:

  • added a safe_pbump to handle 64 bit offsets;
  • return a string_view instead of string (internal string doesn't have the right size of the buffer);
  • resize the string to current buffer size on the move semantics take_str method;
  • fixed take_str method move semantics with init before return;
  • removed a useless memcpy on init method;
  • renamed the template parameter char_type to CharT to avoid ambiguity with basic_streambuf::char_type;
  • used string::data() and pointer arithmetic instead of possible undefined behavior using string::front() and string::back() as pointed by @LightnessRacesinOrbit;
  • Implementation with streambuf composition.
#pragma once

#include <cstdlib>
#include <limits>
#include <ostream>
#include <string>
#if __cplusplus >= 201703L
#include <string_view>
#endif

namespace usr
{
    template <typename CharT>
    class basic_outstringstream : public std::basic_ostream<CharT, std::char_traits<CharT>>
    {
        using traits_type = std::char_traits<CharT>;
        using base_stream_type = std::basic_ostream<CharT, traits_type>;

        class buffer : public std::basic_streambuf<CharT, std::char_traits<CharT>>
        {
            using base_buf_type = std::basic_streambuf<CharT, traits_type>;
            using int_type = typename base_buf_type::int_type;

        private:
            void safe_pbump(std::streamsize off)
            {
                // pbump doesn't support 64 bit offsets
                // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47921
                int maxbump;
                if (off > 0)
                    maxbump = std::numeric_limits<int>::max();
                else if (off < 0)
                    maxbump = std::numeric_limits<int>::min();
                else // == 0
                    return;

                while (std::abs(off) > std::numeric_limits<int>::max())
                {
                    this->pbump(maxbump);
                    off -= maxbump;
                }

                this->pbump((int)off);
            }

            void init()
            {
                this->setp(const_cast<CharT *>(m_str.data()),
                    const_cast<CharT *>(m_str.data()) + m_str.size());
                this->safe_pbump((std::streamsize)m_str.size());
            }

        protected:
            int_type overflow(int_type ch) override
            {
                if (traits_type::eq_int_type(ch, traits_type::eof()))
                    return traits_type::not_eof(ch);

                if (m_str.empty())
                    m_str.resize(1);
                else
                    m_str.resize(m_str.size() * 2);

                size_t size = this->size();
                this->setp(const_cast<CharT *>(m_str.data()),
                    const_cast<CharT *>(m_str.data()) + m_str.size());
                this->safe_pbump((std::streamsize)size);
                *this->pptr() = traits_type::to_char_type(ch);
                this->pbump(1);

                return ch;
            }

        public:
            buffer(std::size_t reserveSize)
            {
                m_str.reserve(reserveSize);
                init();
            }

            buffer(std::basic_string<CharT>&& str)
                : m_str(std::move(str))
            {
                init();
            }

            buffer(const std::basic_string<CharT>& str)
                : m_str(str)
            {
                init();
            }

        public:
            size_t size() const
            {
                return (size_t)(this->pptr() - this->pbase());
            }

#if __cplusplus >= 201703L
            std::basic_string_view<CharT> str() const
            {
                return std::basic_string_view<CharT>(m_str.data(), size());
            }
#endif
            std::basic_string<CharT> take_str()
            {
                // Resize the string to actual used buffer size
                m_str.resize(size());
                std::string ret = std::move(m_str);
                init();
                return ret;
            }

            void clear()
            {
                m_str.clear();
                init();
            }

            const CharT * data() const
            {
                return m_str.data();
            }

        private:
            std::basic_string<CharT> m_str;
        };

    public:
        explicit basic_outstringstream(std::size_t reserveSize = 8)
            : base_stream_type(nullptr), m_buffer(reserveSize)
        {
            this->rdbuf(&m_buffer);
        }

        explicit basic_outstringstream(std::basic_string<CharT>&& str)
            : base_stream_type(nullptr), m_buffer(str)
        {
            this->rdbuf(&m_buffer);
        }

        explicit basic_outstringstream(const std::basic_string<CharT>& str)
            : base_stream_type(nullptr), m_buffer(str)
        {
            this->rdbuf(&m_buffer);
        }

#if __cplusplus >= 201703L
        std::basic_string_view<CharT> str() const
        {
            return m_buffer.str();
        }
#endif
        std::basic_string<CharT> take_str()
        {
            return m_buffer.take_str();
        }

        const CharT * data() const
        {
            return m_buffer.data();
        }

        size_t size() const
        {
            return m_buffer.size();
        }

        void clear()
        {
            m_buffer.clear();
        }

    private:
        buffer m_buffer;
    };

    using outstringstream = basic_outstringstream<char>;
    using woutstringstream = basic_outstringstream<wchar_t>;
}


来源:https://stackoverflow.com/questions/26266525/move-the-string-out-of-a-stdostringstream

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!