Output unicode strings in Windows console app

后端 未结 11 940
花落未央
花落未央 2020-11-21 11:20

Hi I was trying to output unicode string to a console with iostreams and failed.

I found this: Using unicode font in c++ console app and this snippet work

相关标签:
11条回答
  • 2020-11-21 12:01

    I have verified a solution here using Visual Studio 2010. Via this MSDN article and MSDN blog post. The trick is an obscure call to _setmode(..., _O_U16TEXT).

    Solution:

    #include <iostream>
    #include <io.h>
    #include <fcntl.h>
    
    int wmain(int argc, wchar_t* argv[])
    {
        _setmode(_fileno(stdout), _O_U16TEXT);
        std::wcout << L"Testing unicode -- English -- Ελληνικά -- Español." << std::endl;
    }
    

    Screenshot:

    Unicode in console

    0 讨论(0)
  • 2020-11-21 12:03

    First, sorry I probably don't have the fonts required so I cannot test it yet.

    Something looks a bit fishy here

    // the following is said to be working
    SetConsoleOutputCP(CP_UTF8); // output is in UTF8
    wchar_t s[] = L"èéøÞǽлљΣæča";
    int bufferSize = WideCharToMultiByte(CP_UTF8, 0, s, -1, NULL, 0, NULL, NULL);
    char* m = new char[bufferSize]; 
    WideCharToMultiByte(CP_UTF8, 0, s, -1, m, bufferSize, NULL, NULL);
    wprintf(L"%S", m); // <-- upper case %S in wprintf() is used for MultiByte/utf-8
                       //     lower case %s in wprintf() is used for WideChar
    printf("%s", m); // <-- does this work as well? try it to verify my assumption
    

    while

    // the following is said to have problem
    SetConsoleOutputCP(CP_UTF8);
    utf8_locale = locale(old_locale,
                         new boost::program_options::detail::utf8_codecvt_facet());
    wcout.imbue(utf8_locale);
    wcout << L"¡Hola!" << endl; // <-- you are passing wide char.
    // have you tried passing the multibyte equivalent by converting to utf8 first?
    int bufferSize = WideCharToMultiByte(CP_UTF8, 0, s, -1, NULL, 0, NULL, NULL);
    char* m = new char[bufferSize]; 
    WideCharToMultiByte(CP_UTF8, 0, s, -1, m, bufferSize, NULL, NULL);
    cout << m << endl;
    

    what about

    // without setting locale to UTF8, you pass WideChars
    wcout << L"¡Hola!" << endl;
    // set locale to UTF8 and use cout
    SetConsoleOutputCP(CP_UTF8);
    cout << utf8_encoded_by_converting_using_WideCharToMultiByte << endl;
    
    0 讨论(0)
  • 2020-11-21 12:06

    SetConsoleCP() and chcp does not the same!

    Take this program snippet:

    SetConsoleCP(65001)  // 65001 = UTF-8
    static const char s[]="tränenüberströmt™\n";
    DWORD slen=lstrlen(s);
    WriteConsoleA(GetStdHandle(STD_OUTPUT_HANDLE),s,slen,&slen,NULL);
    

    The source code must be saved as UTF-8 without BOM (Byte Order Mark; Signature). Then, the Microsoft compiler cl.exe takes the UTF-8 strings as-is.
    If this code is saved with BOM, cl.exe transcodes the string to ANSI (i.e. CP1252), which doesn't match to CP65001 (= UTF-8).

    Change the display font to Lucidia Console, otherwise, UTF-8 output will not work at all.

    • Type: chcp
    • Answer: 850
    • Type: test.exe
    • Answer: tr├ñnen├╝berstr├ÂmtÔäó
    • Type: chcp
    • Answer: 65001 - This setting has changed by SetConsoleCP() but with no useful effect.
    • Type: chcp 65001
    • Type: test.exe
    • Answer: tränenüberströmt™ - All OK now.

    Tested with: German Windows XP SP3

    0 讨论(0)
  • 2020-11-21 12:07

    The wcout must have the locale set differently to the CRT. Here's how it can be fixed:

    int _tmain(int argc, _TCHAR* argv[])
    {
        char* locale = setlocale(LC_ALL, "English"); // Get the CRT's current locale.
        std::locale lollocale(locale);
        setlocale(LC_ALL, locale); // Restore the CRT.
        std::wcout.imbue(lollocale); // Now set the std::wcout to have the locale that we got from the CRT.
        std::wcout << L"¡Hola!";
        std::cin.get();
        return 0;
    }
    

    I just tested it, and it displays the string here absolutely fine.

    0 讨论(0)
  • 2020-11-21 12:09

    Default encoding on:

    • Windows UTF-16.
    • Linux UTF-8.
    • MacOS UTF-8.

    My solution Steps, includes null chars \0 (avoid truncated). Without using functions on windows.h header:

    1. Add Macros to detect Platform.
    #if defined (_WIN32) 
    #define WINDOWSLIB 1
    
    #elif defined (__ANDROID__) || defined(ANDROID)//Android
    #define ANDROIDLIB 1
    
    #elif defined (__APPLE__)//iOS, Mac OS
    #define MACOSLIB 1
    
    #elif defined (__LINUX__) || defined(__gnu_linux__) || defined(__linux__)//_Ubuntu - Fedora - Centos - RedHat
    #define LINUXLIB 1
    #endif
    
    1. Create conversion functions std::wstring to std::string or viceversa.
    #include <locale>
    #include <iostream>
    #include <string>
    #ifdef WINDOWSLIB
    #include <Windows.h>
    #endif
    
    using namespace std::literals::string_literals;
    
    // Convert std::wstring to std::string
    std::string WidestringToString(const std::wstring& wstr, const std::string& locale)
    {
        if (wstr.empty())
        {
            return std::string();
        }
        size_t pos;
        size_t begin = 0;
        std::string ret;
        size_t  size;
    #ifdef WINDOWSLIB
        _locale_t lc = _create_locale(LC_ALL, locale.c_str());
        pos = wstr.find(static_cast<wchar_t>(0), begin);
        while (pos != std::wstring::npos && begin < wstr.length())
        {
            std::wstring segment = std::wstring(&wstr[begin], pos - begin);
            _wcstombs_s_l(&size, nullptr, 0, &segment[0], _TRUNCATE, lc);
            std::string converted = std::string(size, 0);
            _wcstombs_s_l(&size, &converted[0], size, &segment[0], _TRUNCATE, lc);
            ret.append(converted);
            begin = pos + 1;
            pos = wstr.find(static_cast<wchar_t>(0), begin);
        }
        if (begin <= wstr.length()) {
            std::wstring segment = std::wstring(&wstr[begin], wstr.length() - begin);
            _wcstombs_s_l(&size, nullptr, 0, &segment[0], _TRUNCATE, lc);
            std::string converted = std::string(size, 0);
            _wcstombs_s_l(&size, &converted[0], size, &segment[0], _TRUNCATE, lc);
            converted.resize(size - 1);
            ret.append(converted);
        }
        _free_locale(lc);
    #elif defined LINUXLIB
        std::string currentLocale = setlocale(LC_ALL, nullptr);
        setlocale(LC_ALL, locale.c_str());
        pos = wstr.find(static_cast<wchar_t>(0), begin);
        while (pos != std::wstring::npos && begin < wstr.length())
        {
            std::wstring segment = std::wstring(&wstr[begin], pos - begin);
            size = wcstombs(nullptr, segment.c_str(), 0);
            std::string converted = std::string(size, 0);
            wcstombs(&converted[0], segment.c_str(), converted.size());
            ret.append(converted);
            ret.append({ 0 });
            begin = pos + 1;
            pos = wstr.find(static_cast<wchar_t>(0), begin);
        }
        if (begin <= wstr.length()) {
            std::wstring segment = std::wstring(&wstr[begin], wstr.length() - begin);
            size = wcstombs(nullptr, segment.c_str(), 0);
            std::string converted = std::string(size, 0);
            wcstombs(&converted[0], segment.c_str(), converted.size());
            ret.append(converted);
        }
        setlocale(LC_ALL, currentLocale.c_str());
    #elif defined MACOSLIB
    #endif
    
        return ret;
    }
    
    // Convert std::string to std::wstring
    std::wstring StringToWideString(const std::string& str, const std::string& locale)
    {
        if (str.empty())
        {
            return std::wstring();
        }
    
        size_t pos;
        size_t begin = 0;
        std::wstring ret;
        size_t  size;
    
    #ifdef WINDOWSLIB
        _locale_t lc = _create_locale(LC_ALL, locale.c_str());
        pos = str.find(static_cast<char>(0), begin);
        while (pos != std::string::npos) {
            std::string segment = std::string(&str[begin], pos - begin);
            std::wstring converted = std::wstring(segment.size() + 1, 0);
            _mbstowcs_s_l(&size, &converted[0], converted.size(), &segment[0], _TRUNCATE, lc);
            converted.resize(size - 1);
            ret.append(converted);
            ret.append({ 0 });
            begin = pos + 1;
            pos = str.find(static_cast<char>(0), begin);
        }
        if (begin < str.length()) {
            std::string segment = std::string(&str[begin], str.length() - begin);
            std::wstring converted = std::wstring(segment.size() + 1, 0);
            _mbstowcs_s_l(&size, &converted[0], converted.size(), &segment[0], _TRUNCATE, lc);
            converted.resize(size - 1);
            ret.append(converted);
        }
        _free_locale(lc);
    #elif defined LINUXLIB
        std::string currentLocale = setlocale(LC_ALL, nullptr);
        setlocale(LC_ALL, locale.c_str());
        pos = str.find(static_cast<char>(0), begin);
        while (pos != std::string::npos) {
            std::string segment = std::string(&str[begin], pos - begin);
            std::wstring converted = std::wstring(segment.size(), 0);
            size = mbstowcs(&converted[0], &segment[0], converted.size());
            converted.resize(size);
            ret.append(converted);
            ret.append({ 0 });
            begin = pos + 1;
            pos = str.find(static_cast<char>(0), begin);
        }
        if (begin < str.length()) {
            std::string segment = std::string(&str[begin], str.length() - begin);
            std::wstring converted = std::wstring(segment.size(), 0);
            size = mbstowcs(&converted[0], &segment[0], converted.size());
            converted.resize(size);
            ret.append(converted);
        }
        setlocale(LC_ALL, currentLocale.c_str());
    #elif defined MACOSLIB
    #endif
    
        return ret;
    }
    
    1. Print std::string. Check RawString Suffix.

    Linux Code. Print directly std::string using std::cout.
    If you have std::wstring.
    1. Convert to std::string.
    2. Print with std::cout.

    std::wstring x = L"\0\001日本ABC\0DE\0F\0G                                                                    
    0 讨论(0)
提交回复
热议问题