The question is how to convert wstring to string?
I have next example :
#include
#include
int main()
{
std::wstr
This solution is inspired in dk123's solution, but uses a locale dependent codecvt facet. The result is in locale encoded string instead of UTF-8 (if it is not set as locale):
std::string w2s(const std::wstring &var)
{
static std::locale loc("");
auto &facet = std::use_facet<std::codecvt<wchar_t, char, std::mbstate_t>>(loc);
return std::wstring_convert<std::remove_reference<decltype(facet)>::type, wchar_t>(&facet).to_bytes(var);
}
std::wstring s2w(const std::string &var)
{
static std::locale loc("");
auto &facet = std::use_facet<std::codecvt<wchar_t, char, std::mbstate_t>>(loc);
return std::wstring_convert<std::remove_reference<decltype(facet)>::type, wchar_t>(&facet).from_bytes(var);
}
I was searching for it, but I can't find it. Finally I found that I can get the right facet from std::locale
using the std::use_facet()
function with the right typename. Hope this helps.
Here is a worked-out solution based on the other suggestions:
#include <string>
#include <iostream>
#include <clocale>
#include <locale>
#include <vector>
int main() {
std::setlocale(LC_ALL, "");
const std::wstring ws = L"ħëłlö";
const std::locale locale("");
typedef std::codecvt<wchar_t, char, std::mbstate_t> converter_type;
const converter_type& converter = std::use_facet<converter_type>(locale);
std::vector<char> to(ws.length() * converter.max_length());
std::mbstate_t state;
const wchar_t* from_next;
char* to_next;
const converter_type::result result = converter.out(state, ws.data(), ws.data() + ws.length(), from_next, &to[0], &to[0] + to.size(), to_next);
if (result == converter_type::ok or result == converter_type::noconv) {
const std::string s(&to[0], to_next);
std::cout <<"std::string = "<<s<<std::endl;
}
}
This will usually work for Linux, but will create problems on Windows.
In case anyone else is interested: I needed a class that could be used interchangeably wherever either a string
or wstring
was expected. The following class convertible_string
, based on dk123's solution, can be initialized with either a string
, char const*
, wstring
or wchar_t const*
and can be assigned to by or implicitly converted to either a string
or wstring
(so can be passed into a functions that take either).
class convertible_string
{
public:
// default ctor
convertible_string()
{}
/* conversion ctors */
convertible_string(std::string const& value) : value_(value)
{}
convertible_string(char const* val_array) : value_(val_array)
{}
convertible_string(std::wstring const& wvalue) : value_(ws2s(wvalue))
{}
convertible_string(wchar_t const* wval_array) : value_(ws2s(std::wstring(wval_array)))
{}
/* assignment operators */
convertible_string& operator=(std::string const& value)
{
value_ = value;
return *this;
}
convertible_string& operator=(std::wstring const& wvalue)
{
value_ = ws2s(wvalue);
return *this;
}
/* implicit conversion operators */
operator std::string() const { return value_; }
operator std::wstring() const { return s2ws(value_); }
private:
std::string value_;
};
Default encoding on:
- Windows UTF-16.
- Linux UTF-8.
- MacOS UTF-8.
This code have two forms to convert std::string to std::wstring and std::wstring to std::string. If you negate #if defined WIN32, you get the same result.
1. std::string to std::wstring
• MultiByteToWideChar WinAPI
• _mbstowcs_s_l
#if defined WIN32
#include <windows.h>
#endif
std::wstring StringToWideString(std::string str)
{
if (str.empty())
{
return std::wstring();
}
size_t len = str.length() + 1;
std::wstring ret = std::wstring(len, 0);
#if defined WIN32
int size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, &str[0], str.size(), &ret[0], len);
ret.resize(size);
#else
size_t size = 0;
_locale_t lc = _create_locale(LC_ALL, "en_US.UTF-8");
errno_t retval = _mbstowcs_s_l(&size, &ret[0], len, &str[0], _TRUNCATE, lc);
_free_locale(lc);
ret.resize(size - 1);
#endif
return ret;
}
2. std::wstring to std::string
• WideCharToMultiByte WinAPI
• _wcstombs_s_l
std::string WidestringToString(std::wstring wstr)
{
if (wstr.empty())
{
return std::string();
}
#if defined WIN32
int size = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, &wstr[0], wstr.size(), NULL, 0, NULL, NULL);
std::string ret = std::string(size, 0);
WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, &wstr[0], wstr.size(), &ret[0], size, NULL, NULL);
#else
size_t size = 0;
_locale_t lc = _create_locale(LC_ALL, "en_US.UTF-8");
errno_t err = _wcstombs_s_l(&size, NULL, 0, &wstr[0], _TRUNCATE, lc);
std::string ret = std::string(size, 0);
err = _wcstombs_s_l(&size, &ret[0], size, &wstr[0], _TRUNCATE, lc);
_free_locale(lc);
ret.resize(size - 1);
#endif
return ret;
}
3. On windows you need to print unicode, using WinAPI.
• WriteConsole
#if defined _WIN32
void WriteLineUnicode(std::string s)
{
std::wstring unicode = StringToWideString(s);
WriteConsole(GetStdHandle(STD_OUTPUT_HANDLE), unicode.c_str(), unicode.length(), NULL, NULL);
std::cout << std::endl;
}
void WriteUnicode(std::string s)
{
std::wstring unicode = StringToWideString(s);
WriteConsole(GetStdHandle(STD_OUTPUT_HANDLE), unicode.c_str(), unicode.length(), NULL, NULL);
}
void WriteLineUnicode(std::wstring ws)
{
WriteConsole(GetStdHandle(STD_OUTPUT_HANDLE), ws.c_str(), ws.length(), NULL, NULL);
std::cout << std::endl;
}
void WriteUnicode(std::wstring ws)
{
WriteConsole(GetStdHandle(STD_OUTPUT_HANDLE), ws.c_str(), ws.length(), NULL, NULL);
}
4. On main program.
#if defined _WIN32
int wmain(int argc, WCHAR ** args)
#else
int main(int argc, CHAR ** args)
#endif
{
std::string source = u8"ÜüΩωЙ你月曜日\na
// Embarcadero C++ Builder
// convertion string to wstring
string str1 = "hello";
String str2 = str1; // typedef UnicodeString String; -> str2 contains now u"hello";
// convertion wstring to string
String str2 = u"hello";
string str1 = UTF8string(str2).c_str(); // -> str1 contains now "hello"