/** * Created by Administrator on 2016/9/22 0022. */ //依赖的模块 var http=require('http'); var fs=require('fs'); var cheerio=require('cheerio'); var request=require('request'); var iconv = require('iconv-lite'); //设置参数 new GetSexyPics({ initUrl: 'http://www.mm131.com/xinggan/2655_20.html',//初始化网页请求地址 maxNum: 100,//设置最大爬取数量(maxNum-index的值即爬取图片的数量) intervalTime: 800,//设置间隔时间 index: 0//从0开始标记爬取的数量 }); function GetSexyPics(params) { _this=this; _this.setParams=params; this.createFolder = function (name) { fs.exists('./' + name, function (exists) { if (!exists) { fs.mkdir('./' + name); } }); }; this.getSexyPics = function (initUrl) { var resData = ''; var req=http.get(initUrl, function (res) { res.on('data', function (chunk) { //转换编码 resData += iconv.decode(chunk, 'gb2312'); }); res.on('end', function () { //用cheerio模块解析请求的数据 var $ = cheerio.load(resData); var images = { //获取图片标题 title: $('.content h5').text().trim(), //记录请求页面数 index: _this.setParams.index = _this.setParams.index + 1 }; //下载当前页面的图片 downloadImg($, images.title, images.index); var nextPage = $('.content-page a.page-ch:last-child').attr('href'); var newUrl = 'http://www.mm131.com/xinggan/' + nextPage; //如果没有当前组的图片点完则跳转到下一组 newUrl = nextPage ? newUrl : $('.updown .updown_r').attr('href'); newUrl = encodeURI(newUrl); //设置最大爬取数量 if (_this.setParams.index < _this.setParams.maxNum) { setTimeout(function () { _this.getSexyPics(newUrl); }, _this.setParams.intervalTime); } //保存下次爬取图片时需要初始化的网页地址 var nextTimeInitUrl = newUrl; //把日志保存起来,下次手动改initUrl为日志里的地址 var logs = '下次需要执行的初始化地址是\(替换initUrl即可\)---' + nextTimeInitUrl; saveLogs(logs); }); }).on('error', function (e) { console.log(e); console.log("错误:" + e.message); }); function saveLogs(logs) { fs.writeFile('./logs/' + '日志' + '.txt', logs, 'utf-8', function (err) { if (err)console.log(err); }) } function downloadImg($, imgTil, idx) { var imgUrl = ''; $('.content-pic a img').each(function () { imgUrl = encodeURI($(this).attr('src'));//获取图片的地址 var suffix = imgUrl.substring(imgUrl.lastIndexOf('.'));//获取图片后缀名 console.log('第' + idx + '张图片---' + imgTil + '---' + imgUrl); request(imgUrl).pipe(fs.createWriteStream('./img/' + imgTil + suffix)); }); } req.on('error',function(err){ console.log(err) }) }; this.init = function () { this.createFolder('img'); this.createFolder('logs'); this.getSexyPics(_this.setParams.initUrl); }; this.init(); }
觉得好用的给个赞,...
来源:https://www.cnblogs.com/leyi/p/5903068.html