I\'m using PhantomJS v1.4.1 to load some web pages. I don\'t have access to their server-side, I just getting links pointing to them. I\'m using obsolete version of Phantom
In my program, I use some logic to judge if it was onload: watching it's network request, if there was no new request on past 200ms, I treat it onload.
Use this, after onLoadFinish().
function onLoadComplete(page, callback){
var waiting = []; // request id
var interval = 200; //ms time waiting new request
var timer = setTimeout( timeout, interval);
var max_retry = 3; //
var counter_retry = 0;
function timeout(){
if(waiting.length && counter_retry < max_retry){
timer = setTimeout( timeout, interval);
counter_retry++;
return;
}else{
try{
callback(null, page);
}catch(e){}
}
}
//for debug, log time cost
var tlogger = {};
bindEvent(page, 'request', function(req){
waiting.push(req.id);
});
bindEvent(page, 'receive', function (res) {
var cT = res.contentType;
if(!cT){
console.log('[contentType] ', cT, ' [url] ', res.url);
}
if(!cT) return remove(res.id);
if(cT.indexOf('application') * cT.indexOf('text') != 0) return remove(res.id);
if (res.stage === 'start') {
console.log('!!received start: ', res.id);
//console.log( JSON.stringify(res) );
tlogger[res.id] = new Date();
}else if (res.stage === 'end') {
console.log('!!received end: ', res.id, (new Date() - tlogger[res.id]) );
//console.log( JSON.stringify(res) );
remove(res.id);
clearTimeout(timer);
timer = setTimeout(timeout, interval);
}
});
bindEvent(page, 'error', function(err){
remove(err.id);
if(waiting.length === 0){
counter_retry = 0;
}
});
function remove(id){
var i = waiting.indexOf( id );
if(i < 0){
return;
}else{
waiting.splice(i,1);
}
}
function bindEvent(page, evt, cb){
switch(evt){
case 'request':
page.onResourceRequested = cb;
break;
case 'receive':
page.onResourceReceived = cb;
break;
case 'error':
page.onResourceError = cb;
break;
case 'timeout':
page.onResourceTimeout = cb;
break;
}
}
}