I am new to Node.js, and I am trying to use the request model to scrap a website, I am having problem with the encoding: the target website is using big5 as encoding, and I wish
I use iconv-lite
to decode big5 to utf8.
And you should set encoding:null
that request
will return raw encoding page.
This is sample code.
var iconv = require('iconv-lite');
var request = require('request');
request({ url: 'http://amis.afa.gov.tw/v-asp/v101r.asp',encoding:null}, function(err, response, body) {
if (!err && response.statusCode == 200) {
var str = iconv.decode(new Buffer(body), "big5");
console.log(str);
}
});
And return is
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=big5">
<title>v101r</title>
<meta name="GENERATOR" content="Microsoft FrontPage 4.0">
<meta name="Microsoft Theme" content="none, default">
</head>
<body>
<p align="center">查無結果!</p>
<p align="center"><font color="#800080">請使用瀏覽器工具列中</font><em><font
size="4" color="#000080">[上一頁]</font></em><font color="#800080">回到前一輸入條件畫面繼續查詢</font></p>
</body>
</html>
I use node.js 0.10.20
on RedHat EL 6.4
and iconv-lite 0.2.11
, request 2.27.0
Might I suggest my codepage library:
var request = require('request'), codepage = require('codepage')
request({ url: 'http://amis.afa.gov.tw/v-asp/v101r.asp',encoding:null}, function(err, response, body) {
if (!err && response.statusCode == 200) {
var str = codepage.utils.decode(950, new Buffer(body));
console.log(str);
}
});
yields
... <p align="center"><font color="#800080">請使用瀏覽器工具列中</font><em><font
size="4" color="#000080">[上一頁]</font></em><font color="#800080">回到前一輸入條件畫面繼續查詢</font></p>