java char encoding for strange string from API

 ̄綄美尐妖づ 提交于 2019-12-22 11:25:41

问题


I'm having strange issue with a response got from API. I'm using apache HTTP Client to get response. Response header has the following

Content-Type=[application/json; charset=utf-16]
Transfer-Encoding=[chunked]
X-Powered-By=[ASP.NET] // Yes, people using ASP.NET

So based on this, when I get response, my response looks like follows

笀∀匀琀愀琀甀猀䌀漀搀攀∀㨀㈀ 

So I tried the following.

String body = "笀∀匀琀愀琀甀猀䌀漀搀攀∀㨀㈀";
String charSetString = "utf-8|utf-16|utf-16le, all possible combination"
body = new String(body.getBytes(Charset.forName(charSetString));
body = body.replaceAll("[^\\x00-\\x7F]", "");

But no luck. Started to look at first char. Actual response in first char is { I converted first char from response to ascii

(int)body.charAt(0) 

Value is 31488; Whereas Ascii value of { is 123; if I do 31488/256 = 123 and converting this to char giving me { so I did the following

String encoded = "";
for(int i=0; i< body.length(); i++) {
    encoded += ((char) ((int)body.charAt(i) / 256 ));
}

And it worked. But this is so bad conversation for single API. What I'm missing, what exactly the charset of response if I get 31488 for {

Update

My API call code.

import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.http.impl.client.HttpClientBuilder;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.ResponseEntity;
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
import org.springframework.util.SerializationUtils;
import org.springframework.web.client.RestTemplate;


public class HTTPClientManager {
    RestTemplate restTemplate = null;

    public void setup() {

        HttpComponentsClientHttpRequestFactory clientHttpRequestFactory = null;
            clientHttpRequestFactory = new HttpComponentsClientHttpRequestFactory(
                    HttpClientBuilder.create().build());
        clientHttpRequestFactory.setReadTimeout(5 * 1000);
        clientHttpRequestFactory.setConnectTimeout(5 * 1000);
        restTemplate = new RestTemplate(clientHttpRequestFactory);
    }

    public static void main(String...strings) throws FileNotFoundException, IOException {
        HTTPClientManager ht = new HTTPClientManager();
        ht.setup();
        Map<String, Object> properties = new LinkedHashMap<>();
        properties.put(Const.METHOD, "GET");
        properties.put(Const.URL, strings[0]);
        properties.put(Const.CHAR_SET, "UTF-16LE");

        Map<String, Object> ob = ht.getResponse(properties);
        try {
            String res = ob.get(Const.RESPONSE).toString();
            System.out.println("Response ->>>>>>>>> \n " + res);
        }catch(Exception e) {
            e.printStackTrace();
        }

       try (FileOutputStream fos = new FileOutputStream("response")) {
            fos.write(SerializationUtils.serialize(ob));
       }
    }

    public static class Const {
        public static final String REQUEST = "request";
        public static final String URL = "url";
        public static final String CHAR_SET = "charSet";
        public static final String RESPONSE = "response";
        public static final String METHOD = "method";
        public static final String REQUEST_HEADER = "reqHeader";
        public static final String RESPONSE_HEADER = "resHeader";
    }



    public Map<String, Object> getResponse(Map<String, Object> properties) {
        HttpHeaders headers = new HttpHeaders();
        HttpEntity requestEntity = null;
        Map<String, Object> responseReturn = new LinkedHashMap<>();
        HttpMethod method = null;

        if (properties.get(Const.METHOD).toString().equals("GET")) {
            method = HttpMethod.GET;
            requestEntity = new HttpEntity<String>("", headers);
        } else if (properties.get(Const.METHOD).toString().equals("POST")) {
            method = HttpMethod.POST;
            requestEntity = new HttpEntity<String>(properties.get(Const.REQUEST).toString(), headers);
        }else if (properties.get(Const.METHOD).toString().equals("PUT")) {
            method = HttpMethod.PUT;
            requestEntity = new HttpEntity<String>(properties.get(Const.REQUEST).toString(), headers);
        }else if (properties.get(Const.METHOD).toString().equals("DELETE")) {
            method = HttpMethod.DELETE;
            requestEntity = new HttpEntity<String>(properties.get(Const.REQUEST).toString(), headers);
        }
        ResponseEntity<String> response = null;
        try {
            response = restTemplate.exchange(properties.get(Const.URL).toString(), method, requestEntity, String.class);
            String body = response.getBody();
            if(properties.get(Const.CHAR_SET) != null) {
                try {
                body = new String(body.getBytes(Charset.forName(properties.get(Const.CHAR_SET).toString())));
                body = body.replaceAll("[^\\x00-\\x7F]", "");
                }catch(Exception e) {
                    e.printStackTrace();
                }
            }
            responseReturn.put(Const.RESPONSE, body!=null?body:"");
            responseReturn.put(Const.RESPONSE_HEADER, response.getHeaders());
        } catch (org.springframework.web.client.HttpClientErrorException |org.springframework.web.client.HttpServerErrorException  exception) {
            exception.printStackTrace();
        }catch(org.springframework.web.client.ResourceAccessException exception){
            exception.printStackTrace();
        }catch(Exception exception){
            exception.printStackTrace();
        }

        return responseReturn;
    }

}

回答1:


I think that your problem is that you make a wrong assumption that your reply comes in UTF-16 i.e your line Content-Type=[application/json; charset=utf-16] is wrong. Try to remove the charset part (Content-Type=[application/json]) or set it to UTF-8 (Content-Type=[application/json; charset=utf-8]) and see what happens. I believe that reply that you are getting is: {"StatusCode":2. Not sure why the reply is seemingly missing '}' at the end, but other then that it makes sense. BTW I managed to interpret your reply by converting your reply string to unicode sequence. That gave me the sequence: \u7b00\u2200\u5300\u7400\u6100\u7400\u7500\u7300\u4300\u6f00\u6400\u6500\u2200\u3a00\u3200. This gave the idea that by forcing to interpret the response as utf-16 you messed up the content. So if I changed the sequence to \u007b\u0022\u0053\u0074\u0061\u0074\u0075\u0073\u0043\u006f\u0064\u0065\u0022\u003a\u0032 and converted it back to String from unicodes I got {"StatusCode":2.

BTW If you're interested in tool to convert any string to unicode sequence and vise-versa then you can use MgntUtils open source library (written by me). All I had to do to convert your response string is:

String result = "笀∀匀琀愀琀甀猀䌀漀搀攀∀㨀㈀";
        result = StringUnicodeEncoderDecoder.encodeStringToUnicodeSequence(result);
        System.out.println(result);

Here is the link to the article that describes the utilities in the library and where to get it (Available both on github and Maven central)

In the article look for paragraph "String Unicode converter" for explanation of this feature. The library also contains a simple Http client feature (not described in the article but described in its javadoc.



来源:https://stackoverflow.com/questions/50996742/java-char-encoding-for-strange-string-from-api

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!