问题
I am trying to click a 'next' button N number of times and grab the page source each time. I understand that I can run an arbitrary function on the remote website, so instead of click() I just use the remote function nextPage() How do I run the following, an arbitrary number of times:
var casper = require('casper').create();
casper.start('http://www.example.com', function() {
this.echo(this.getHTML());
this.echo('-------------------------');
var numTimes = 4, count = 2;
casper.repeat(numTimes, function() {
this.thenEvaluate(function() {
nextPage(++count);
});
this.then(function() {
this.echo(this.getHTML());
this.echo('-------------------------');
});
});
});
'i' here is an index I tried to use in a javascript for loop.
So tl;dr: I want lick 'next', print pages source, click 'next', print page source, click 'next'... continue that N number of times.
回答1:
First, you can pass a value to the remote page context (i.e. to thenEvaluate
function like this:
this.thenEvaluate(function(remoteCount) {
nextPage(remoteCount);
}, ++count);
However, Casper#repeat
might not be a good function to use here as the loop would NOT wait for each page load and then capture the content.
You may rather devise a event based chaining.
The work-flow of the code would be:
Have a global variable (or at-least a variable accessible to the functions mentioned below) to store the
count
and thelimit
.listen to the load.finished event and grab the HTML here and then call the next page.
A simplified code can be:
var casper = require('casper').create();
var limit = 5, count = 1;
casper.on('load.finished', function (status) {
if (status !== 'success') {
this.echo ("Failed to load page.");
}
else {
this.echo(this.getHTML());
this.echo('-------------------------');
}
if(++count > limit) {
this.echo ("Finished!");
}
else {
this.evaluate(function(remoteCount) {
nextPage(remoteCount);
// [Edit the line below was added later]
console.log(remoteCount);
return remoteCount;
}, count);
}
});
casper.start('http://www.example.com').run();
NOTE: If you pages with high load of JS processes etc. you may also want to add a
wait
before calling the nextPage :
this.wait(
1000, // in ms
function () {
this.evaluate(function(remoteCount) {
nextPage(remoteCount);
}, count);
}
);
[EDIT ADDED] The following event listeners will help you debug.
// help is tracing page's console.log
casper.on('remote.message', function(msg) {
console.log('[Remote Page] ' + msg);
});
// Print out all the error messages from the web page
casper.on("page.error", function(msg, trace) {
casper.echo("[Remote Page Error] " + msg, "ERROR");
casper.echo("[Remote Error trace] " + JSON.stringify(trace, undefined, 4));
});
回答2:
You could try using Casper#repeat
This should do, for the most part, what you want:
var numTimes = 10, count = 1;
casper.repeat(numTimes, function() {
this.thenEvaluate(function(count) {
nextPage(count);
}, ++count);
this.then(function() {
this.echo(this.getHTML());
this.echo('-------------------------');
});
});
回答3:
var global_page_links = [];
casper.then(function(){
for(var i=1; i<=5; i++){
// you just add all your links to array, and use it in casper.each()
global_page_links.push(YOUR_LINK);
}
this.each(global_page_links, function(self, link) {
if (link){
self.thenOpen(link, function() {
console.log("OPENED: "+this.getCurrentUrl());
// do here what you need, evaluate() etc.
});
}
});
});
This is answer to question, how to use for() in casperjs to launch several links
来源:https://stackoverflow.com/questions/18835159/how-to-for-loop-in-casperjs