Hi I was trying to output unicode string to a console with iostreams and failed.
I found this: Using unicode font in c++ console app and this snippet work
I have verified a solution here using Visual Studio 2010. Via this MSDN article and MSDN blog post. The trick is an obscure call to _setmode(..., _O_U16TEXT)
.
Solution:
#include <iostream>
#include <io.h>
#include <fcntl.h>
int wmain(int argc, wchar_t* argv[])
{
_setmode(_fileno(stdout), _O_U16TEXT);
std::wcout << L"Testing unicode -- English -- Ελληνικά -- Español." << std::endl;
}
Screenshot:
First, sorry I probably don't have the fonts required so I cannot test it yet.
Something looks a bit fishy here
// the following is said to be working
SetConsoleOutputCP(CP_UTF8); // output is in UTF8
wchar_t s[] = L"èéøÞǽлљΣæča";
int bufferSize = WideCharToMultiByte(CP_UTF8, 0, s, -1, NULL, 0, NULL, NULL);
char* m = new char[bufferSize];
WideCharToMultiByte(CP_UTF8, 0, s, -1, m, bufferSize, NULL, NULL);
wprintf(L"%S", m); // <-- upper case %S in wprintf() is used for MultiByte/utf-8
// lower case %s in wprintf() is used for WideChar
printf("%s", m); // <-- does this work as well? try it to verify my assumption
while
// the following is said to have problem
SetConsoleOutputCP(CP_UTF8);
utf8_locale = locale(old_locale,
new boost::program_options::detail::utf8_codecvt_facet());
wcout.imbue(utf8_locale);
wcout << L"¡Hola!" << endl; // <-- you are passing wide char.
// have you tried passing the multibyte equivalent by converting to utf8 first?
int bufferSize = WideCharToMultiByte(CP_UTF8, 0, s, -1, NULL, 0, NULL, NULL);
char* m = new char[bufferSize];
WideCharToMultiByte(CP_UTF8, 0, s, -1, m, bufferSize, NULL, NULL);
cout << m << endl;
what about
// without setting locale to UTF8, you pass WideChars
wcout << L"¡Hola!" << endl;
// set locale to UTF8 and use cout
SetConsoleOutputCP(CP_UTF8);
cout << utf8_encoded_by_converting_using_WideCharToMultiByte << endl;
SetConsoleCP() and chcp does not the same!
Take this program snippet:
SetConsoleCP(65001) // 65001 = UTF-8
static const char s[]="tränenüberströmt™\n";
DWORD slen=lstrlen(s);
WriteConsoleA(GetStdHandle(STD_OUTPUT_HANDLE),s,slen,&slen,NULL);
The source code must be saved as UTF-8 without BOM (Byte Order Mark; Signature). Then, the Microsoft compiler cl.exe takes the UTF-8 strings as-is.
If this code is saved with BOM, cl.exe transcodes the string to ANSI (i.e. CP1252), which doesn't match to CP65001 (= UTF-8).
Change the display font to Lucidia Console, otherwise, UTF-8 output will not work at all.
chcp
850
test.exe
tr├ñnen├╝berstr├ÂmtÔäó
chcp
65001
- This setting has changed by SetConsoleCP()
but with no useful effect.
chcp 65001
test.exe
tränenüberströmt™
- All OK now.
Tested with: German Windows XP SP3
The wcout must have the locale set differently to the CRT. Here's how it can be fixed:
int _tmain(int argc, _TCHAR* argv[])
{
char* locale = setlocale(LC_ALL, "English"); // Get the CRT's current locale.
std::locale lollocale(locale);
setlocale(LC_ALL, locale); // Restore the CRT.
std::wcout.imbue(lollocale); // Now set the std::wcout to have the locale that we got from the CRT.
std::wcout << L"¡Hola!";
std::cin.get();
return 0;
}
I just tested it, and it displays the string here absolutely fine.
Default encoding on:
- Windows UTF-16.
- Linux UTF-8.
- MacOS UTF-8.
My solution Steps, includes null chars \0 (avoid truncated). Without using functions on windows.h header:
- Add Macros to detect Platform.
#if defined (_WIN32)
#define WINDOWSLIB 1
#elif defined (__ANDROID__) || defined(ANDROID)//Android
#define ANDROIDLIB 1
#elif defined (__APPLE__)//iOS, Mac OS
#define MACOSLIB 1
#elif defined (__LINUX__) || defined(__gnu_linux__) || defined(__linux__)//_Ubuntu - Fedora - Centos - RedHat
#define LINUXLIB 1
#endif
- Create conversion functions std::wstring to std::string or viceversa.
#include <locale>
#include <iostream>
#include <string>
#ifdef WINDOWSLIB
#include <Windows.h>
#endif
using namespace std::literals::string_literals;
// Convert std::wstring to std::string
std::string WidestringToString(const std::wstring& wstr, const std::string& locale)
{
if (wstr.empty())
{
return std::string();
}
size_t pos;
size_t begin = 0;
std::string ret;
size_t size;
#ifdef WINDOWSLIB
_locale_t lc = _create_locale(LC_ALL, locale.c_str());
pos = wstr.find(static_cast<wchar_t>(0), begin);
while (pos != std::wstring::npos && begin < wstr.length())
{
std::wstring segment = std::wstring(&wstr[begin], pos - begin);
_wcstombs_s_l(&size, nullptr, 0, &segment[0], _TRUNCATE, lc);
std::string converted = std::string(size, 0);
_wcstombs_s_l(&size, &converted[0], size, &segment[0], _TRUNCATE, lc);
ret.append(converted);
begin = pos + 1;
pos = wstr.find(static_cast<wchar_t>(0), begin);
}
if (begin <= wstr.length()) {
std::wstring segment = std::wstring(&wstr[begin], wstr.length() - begin);
_wcstombs_s_l(&size, nullptr, 0, &segment[0], _TRUNCATE, lc);
std::string converted = std::string(size, 0);
_wcstombs_s_l(&size, &converted[0], size, &segment[0], _TRUNCATE, lc);
converted.resize(size - 1);
ret.append(converted);
}
_free_locale(lc);
#elif defined LINUXLIB
std::string currentLocale = setlocale(LC_ALL, nullptr);
setlocale(LC_ALL, locale.c_str());
pos = wstr.find(static_cast<wchar_t>(0), begin);
while (pos != std::wstring::npos && begin < wstr.length())
{
std::wstring segment = std::wstring(&wstr[begin], pos - begin);
size = wcstombs(nullptr, segment.c_str(), 0);
std::string converted = std::string(size, 0);
wcstombs(&converted[0], segment.c_str(), converted.size());
ret.append(converted);
ret.append({ 0 });
begin = pos + 1;
pos = wstr.find(static_cast<wchar_t>(0), begin);
}
if (begin <= wstr.length()) {
std::wstring segment = std::wstring(&wstr[begin], wstr.length() - begin);
size = wcstombs(nullptr, segment.c_str(), 0);
std::string converted = std::string(size, 0);
wcstombs(&converted[0], segment.c_str(), converted.size());
ret.append(converted);
}
setlocale(LC_ALL, currentLocale.c_str());
#elif defined MACOSLIB
#endif
return ret;
}
// Convert std::string to std::wstring
std::wstring StringToWideString(const std::string& str, const std::string& locale)
{
if (str.empty())
{
return std::wstring();
}
size_t pos;
size_t begin = 0;
std::wstring ret;
size_t size;
#ifdef WINDOWSLIB
_locale_t lc = _create_locale(LC_ALL, locale.c_str());
pos = str.find(static_cast<char>(0), begin);
while (pos != std::string::npos) {
std::string segment = std::string(&str[begin], pos - begin);
std::wstring converted = std::wstring(segment.size() + 1, 0);
_mbstowcs_s_l(&size, &converted[0], converted.size(), &segment[0], _TRUNCATE, lc);
converted.resize(size - 1);
ret.append(converted);
ret.append({ 0 });
begin = pos + 1;
pos = str.find(static_cast<char>(0), begin);
}
if (begin < str.length()) {
std::string segment = std::string(&str[begin], str.length() - begin);
std::wstring converted = std::wstring(segment.size() + 1, 0);
_mbstowcs_s_l(&size, &converted[0], converted.size(), &segment[0], _TRUNCATE, lc);
converted.resize(size - 1);
ret.append(converted);
}
_free_locale(lc);
#elif defined LINUXLIB
std::string currentLocale = setlocale(LC_ALL, nullptr);
setlocale(LC_ALL, locale.c_str());
pos = str.find(static_cast<char>(0), begin);
while (pos != std::string::npos) {
std::string segment = std::string(&str[begin], pos - begin);
std::wstring converted = std::wstring(segment.size(), 0);
size = mbstowcs(&converted[0], &segment[0], converted.size());
converted.resize(size);
ret.append(converted);
ret.append({ 0 });
begin = pos + 1;
pos = str.find(static_cast<char>(0), begin);
}
if (begin < str.length()) {
std::string segment = std::string(&str[begin], str.length() - begin);
std::wstring converted = std::wstring(segment.size(), 0);
size = mbstowcs(&converted[0], &segment[0], converted.size());
converted.resize(size);
ret.append(converted);
}
setlocale(LC_ALL, currentLocale.c_str());
#elif defined MACOSLIB
#endif
return ret;
}
- Print std::string. Check RawString Suffix.
Linux Code. Print directly std::string using std::cout.
If you have std::wstring.
1. Convert to std::string.
2. Print with std::cout.
std::wstring x = L"\0\001日本ABC\0DE\0F\0G