问题
I am trying to scrape a quora log page like this Quora question log to get to the bottom of page and get the name of person who added the question. i am using this code for phantomjs
var page = require('webpage').create();
var fs = require('fs');
var output = './temp_htmls/test1.html';
page.open('url', function() {
fs.write(output,page.content,'w');
phantom.exit();
});
But this only fetches a single web page and cannot fetch complete webpage until end. Can someone point what i am missing with phantomjs to be able to make this work
EDIT I tried to work different ways as mentioned in link in comment Link in comment and came up with this code
var system = require('system');
var fs = require('fs');
var output = './temp_htmls/test1.html';
var webpage = require('webpage').create();
webpage.viewportSize = { width: 1280, height: 800 };
webpage.scrollPosition = { top: 0, left: 0 };
var userid = system.args[1];
var profileUrl = "http://www.twitter.com/barackobama";
webpage.open(profileUrl, function(status) {
if (status === 'fail') {
console.error('webpage did not open successfully');
phantom.exit(1);
}
var i = 0,
top,
queryFn = function() {
return document.body.scrollHeight;
};
setInterval(function() {
top = webpage.evaluate(queryFn);
i++;
console.log("1:");
webpage.scrollPosition = { top: top + 1, left: 0 };
console.log("top = " + top);//increments properly for twitter
fs.write(output,webpage.content,'w');
if (i >= 10) {
phantom.exit();
}
}, 3000);
});
This works for twitter infinite scrolls and the console.log value of top for twitter is fine. But for Quora log link(in the original question), it does not increment. So scrolling does not seem to be working for Quora. What changes do i make. Any help?
来源:https://stackoverflow.com/questions/33146879/scrape-dynamic-loading-pages-with-phantomjs