I\'m using PhantomJS v1.4.1 to load some web pages. I don\'t have access to their server-side, I just getting links pointing to them. I\'m using obsolete version of Phantom
I would rather periodically check for document.readyState
status (https://developer.mozilla.org/en-US/docs/Web/API/document.readyState). Although this approach is a bit clunky, you can be sure that inside onPageReady
function you are using fully loaded document.
var page = require("webpage").create(),
url = "http://example.com/index.html";
function onPageReady() {
var htmlContent = page.evaluate(function () {
return document.documentElement.outerHTML;
});
console.log(htmlContent);
phantom.exit();
}
page.open(url, function (status) {
function checkReadyState() {
setTimeout(function () {
var readyState = page.evaluate(function () {
return document.readyState;
});
if ("complete" === readyState) {
onPageReady();
} else {
checkReadyState();
}
});
}
checkReadyState();
});
Additional explanation:
Using nested setTimeout
instead of setInterval
prevents checkReadyState
from "overlapping" and race conditions when its execution is prolonged for some random reasons. setTimeout
has a default delay of 4ms (https://stackoverflow.com/a/3580085/1011156) so active polling will not drastically affect program performance.
document.readyState === "complete"
means that document is completely loaded with all resources (https://html.spec.whatwg.org/multipage/dom.html#current-document-readiness).
I found this solution useful in a NodeJS app. I use it just in desperate cases because it launches a timeout in order to wait for the full page load.
The second argument is the callback function which is going to be called once the response is ready.
phantom = require('phantom');
var fullLoad = function(anUrl, callbackDone) {
phantom.create(function (ph) {
ph.createPage(function (page) {
page.open(anUrl, function (status) {
if (status !== 'success') {
console.error("pahtom: error opening " + anUrl, status);
ph.exit();
} else {
// timeOut
global.setTimeout(function () {
page.evaluate(function () {
return document.documentElement.innerHTML;
}, function (result) {
ph.exit(); // EXTREMLY IMPORTANT
callbackDone(result); // callback
});
}, 5000);
}
});
});
});
}
var callback = function(htmlBody) {
// do smth with the htmlBody
}
fullLoad('your/url/', callback);