I have another question yet. If I had a std::wstring looking like this:
ドイツ語で検索していてこちらのサイトにたどり着きました。
How could I possibly get it
Here's a version that converts from UTF-16 (wchar) to hex-encoded UTF-8 using the Win32-specific WideCharToMultiByte() function.
#include
#include
#include
#include
std::string wstring_to_utf8_hex(const std::wstring &input)
{
std::string output;
int cbNeeded = WideCharToMultiByte(CP_UTF8, 0, input.c_str(), -1, NULL, 0, NULL, NULL);
if (cbNeeded > 0) {
char *utf8 = new char[cbNeeded];
if (WideCharToMultiByte(CP_UTF8, 0, input.c_str(), -1, utf8, cbNeeded, NULL, NULL) != 0) {
for (char *p = utf8; *p; *p++) {
char onehex[5];
_snprintf(onehex, sizeof(onehex), "%%%02.2X", (unsigned char)*p);
output.append(onehex);
}
}
delete[] utf8;
}
return output;
}
int main(int, char*[])
{
std::wstring ja = L"ドイツ語で検索していてこちらのサイトにたどり着きました。";
std::cout << "result=" << wstring_to_utf8_hex(ja) << std::endl;
return 0;
}
To go the other way, you'll need to use some parsing to decode the hex values into a UTF-8 buffer, and then call the complimentary MultiByteToWideChar() to get it back into a wchar array.
#include
#include
#include
#include
#include
#include
std::string unhexlify(const std::string &input)
{
std::string output;
for (const char *p = input.c_str(); *p; ) {
if (p[0] == '%' && isxdigit(p[1]) && isxdigit(p[2])) {
int ch = (isdigit(p[1]) ? p[1] - '0' : toupper(p[1]) - 'A' + 10) * 16 +
(isdigit(p[2]) ? p[2] - '0' : toupper(p[2]) - 'A' + 10);
output.push_back((char)ch);
p += 3;
} else if (p[0] == '%' && p[1] == '#' && isdigit(p[2])) {
int ch = atoi(p + 2);
output.push_back((char)ch);
p += 2;
while (*p && isdigit(*p)) p++;
if (*p == ';') p++;
} else {
output.push_back(*p++);
}
}
return output;
}
std::wstring utf8_hex_to_wstring(const std::string &input)
{
std::wstring output;
std::string utf8 = unhexlify(input);
int cchNeeded = MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), -1, NULL, 0);
if (cchNeeded > 0) {
wchar_t *widebuf = new wchar_t[cchNeeded];
if (MultiByteToWideChar(CP_UTF8, 0, utf8.c_str(), -1, widebuf, cchNeeded) != 0) {
output = widebuf;
}
delete[] widebuf;
}
return output;
}
int main(int, char*[])
{
std::wstring ja = L"ドイツ語で検索していてこちらのサイトにたどり着きました。";
std::string hex = "%E3%83%89%E3%82%A4%E3%83%84%E8%AA%9E%E3%81%A7%E6%A4%9C%E7%B4%A2%E3%81%97%E3%81%A6%E3%81%84%E3%81%A6%E3%81%93%E3%81%A1%E3%82%89%E3%81%AE%E3%82%B5%E3%82%A4%E3%83%88%E3%81%AB%E3%81%9F%E3%81%A9%E3%82%8A%E7%9D%80%E3%81%8D%E3%81%BE%E3%81%97%E3%81%9F%E3%80%82";
std::wstring newja = utf8_hex_to_wstring(hex);
std::cout << "match?=" << (newja == ja ? "yes" : "no") << std::endl;
return 0;
}