【NodeJS】爬虫之使用 eventproxy 控制并发

匿名 (未验证) 提交于 2019-12-03 00:21:02

eventproxy 帮你管理到底这些异步操作是否完成

源代码:

var eventproxy = require('eventproxy'); var superagent = require('superagent'); var cheerio = require('cheerio'); var url = require('url');  var cnodeUrl = 'https://cnodejs.org/';  superagent.get(cnodeUrl)   .end(function (err, res) {     if (err) {       return console.error(err);     }     var topicUrls = [];     var $ = cheerio.load(res.text);     $('#topic_list .topic_title').each(function (idx, element) {       var $element = $(element);       var href = url.resolve(cnodeUrl, $element.attr('href'));       topicUrls.push(href);     });      var ep = new eventproxy();      ep.after('topic_html', topicUrls.length, function (topics) {       topics = topics.map(function (topicPair) {         var topicUrl = topicPair[0];         var topicHtml = topicPair[1];         var $ = cheerio.load(topicHtml);         return ({           title: $('.topic_full_title').text().trim(),           href: topicUrl,           comment1: $('.reply_content').eq(0).text().trim(),         });       });        console.log('final:');       console.log(topics);     });      topicUrls.forEach(function (topicUrl) {       superagent.get(topicUrl)         .end(function (err, res) {           console.log('fetch ' + topicUrl + ' successful');           ep.emit('topic_html', [topicUrl, res.text]);         });     });   }); 

效果演示图:

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!