问题
I try to make method which converts s-jis string to utf-8 string using iconv
.
I wrote a code below,
#include <iconv.h>
#include <iostream>
#include <stdio.h>
using namespace std;
#define BUF_SIZE 1024
size_t z = (size_t) BUF_SIZE-1;
bool sjis2utf8( char* text_sjis, char* text_utf8 )
{
iconv_t ic;
ic = iconv_open("UTF8", "SJIS"); // sjis->utf8
iconv(ic , &text_sjis, &z, &text_utf8, &z);
iconv_close(ic);
return true;
}
int main(void)
{
char hello[BUF_SIZE] = "hello";
char bye[BUF_SIZE] = "bye";
char tmp[BUF_SIZE] = "something else";
sjis2utf8(hello, tmp);
cout << tmp << endl;
sjis2utf8(bye, tmp);
cout << tmp << endl;
}
and, output should be
hello
bye
but in fact,
hello
hello
Does anyone know why this phenomenon occurs? What's wrong with my program?
Note that "hello" and "bye" are Japanese s-jis strings in my original program, but I altered it to make program easy to see.
回答1:
I think you are misusing the iconv
function by passing it the global variable z
. The first time you call sjis2utf8
, z
is decremented to 0. The second call to sjis2utf8
have no effect (z==0) and leaves tmp
unchanged.
From the iconv
documentation :
size_t iconv (iconv_t cd,
const char* * inbuf, size_t * inbytesleft,
char* * outbuf, size_t * outbytesleft);
The iconv function converts one multibyte character at a time, and for each character conversion it increments *inbuf and decrements *inbytesleft by the number of converted input bytes, it increments *outbuf and decrements *outbytesleft by the number of converted output bytes, and it updates the conversion state contained in cd.
You should use two separate variables for the buffers lengths :
size_t il = BUF_SIZE - 1 ;
size_t ol = BUF_SIZE - 1 ;
iconv(ic, &text_sjis, &il, &text_utf8, &ol) ;
Then check the return value of iconv
and the buffers lengths for the conversion success.
回答2:
#include <iconv.h>
#include <iostream>
#include <stdio.h>
#include <string.h>
using namespace std;
const size_t BUF_SIZE=1024;
class IConv {
iconv_t ic_;
public:
IConv(const char* to, const char* from)
: ic_(iconv_open(to,from)) { }
~IConv() { iconv_close(ic_); }
bool convert(char* input, char* output, size_t& out_size) {
size_t inbufsize = strlen(input)+1;// s-jis string should be null terminated,
// if s-jis is not null terminated or it has
// multiple byte chars with null in them this
// will not work, or to provide in other way
// input buffer length....
return iconv(ic_, &input, &inbufsize, &output, &out_size);
}
};
int main(void)
{
char hello[BUF_SIZE] = "hello";
char bye[BUF_SIZE] = "bye";
char tmp[BUF_SIZE] = "something else";
IConv ic("UTF8","SJIS");
size_t outsize = BUF_SIZE;//you will need it
ic.convert(hello, tmp, outsize);
cout << tmp << endl;
outsize = BUF_SIZE;
ic.convert(bye, tmp, outsize);
cout << tmp << endl;
}
- based on Kleist's answer
回答3:
You must put length of entry string in third parameter of iconv
.
Try:
//...
int len = strlen(text_sjis);
iconv(ic , &text_sjis, &len, &text_utf8, &z);
//...
回答4:
size_t iconv (iconv_t cd,
const char* * inbuf, size_t * inbytesleft,
char* * outbuf, size_t * outbytesleft);
iconv
changes the value pointed to by inbytesleft
. So after your first run z
is 0. To fix this you should use calculate the length of inbuf
and store it in a local variable before each conversion.
It is described here: http://www.gnu.org/s/libiconv/documentation/libiconv/iconv.3.html
And since you tagged this as C++ I would suggest wrapping everything up in a nice little class, as far as I can tell from the documentation you can reuse the inconv_t
gained from iconv_open
for as many conversions as you'd like.
#include <iconv.h>
#include <iostream>
#include <stdio.h>
#include <string.h>
using namespace std;
const size_t BUF_SIZE = 1024;
size_t z = (size_t) BUF_SIZE-1;
class IConv {
iconv_t ic_;
public:
IConv(const char* to, const char* from)
: ic_(iconv_open(to,from)) { }
~IConv() { iconv_close(ic_); }
bool convert(char* input, char* output, size_t outbufsize) {
size_t inbufsize = strlen(input);
return iconv(ic_, &input, &inbufsize, &output, &outbufsize);
}
};
int main(void)
{
char hello[BUF_SIZE] = "hello";
char bye[BUF_SIZE] = "bye";
char tmp[BUF_SIZE] = "something else";
IConv ic("UTF8","SJIS");
ic.convert(hello, tmp, BUF_SIZE);
cout << tmp << endl;
ic.convert(bye, tmp, BUF_SIZE);
cout << tmp << endl;
}
来源:https://stackoverflow.com/questions/8104154/iconv-only-works-once