phantomjs not waiting for “full” page load

前端 未结 14 995
南旧
南旧 2020-11-22 13:48

I\'m using PhantomJS v1.4.1 to load some web pages. I don\'t have access to their server-side, I just getting links pointing to them. I\'m using obsolete version of Phantom

相关标签:
14条回答
  • 2020-11-22 14:41

    I found this approach useful in some cases:

    page.onConsoleMessage(function(msg) {
      // do something e.g. page.render
    });
    

    Than if you own the page put some script inside:

    <script>
      window.onload = function(){
        console.log('page loaded');
      }
    </script>
    
    0 讨论(0)
  • 2020-11-22 14:41

    This is an old question, but since I was looking for full page load but for Spookyjs (that uses casperjs and phantomjs) and didn't find my solution, I made my own script for that, with the same approach as the user deemstone . What this approach does is, for a given quantity of time, if the page did not receive or started any request it will end the execution.

    On casper.js file (if you installed it globally, the path would be something like /usr/local/lib/node_modules/casperjs/modules/casper.js) add the following lines:

    At the top of the file with all the global vars:

    var waitResponseInterval = 500
    var reqResInterval = null
    var reqResFinished = false
    var resetTimeout = function() {}
    

    Then inside function "createPage(casper)" just after "var page = require('webpage').create();" add the following code:

     resetTimeout = function() {
         if(reqResInterval)
             clearTimeout(reqResInterval)
    
         reqResInterval = setTimeout(function(){
             reqResFinished = true
             page.onLoadFinished("success")
         },waitResponseInterval)
     }
     resetTimeout()
    

    Then inside "page.onResourceReceived = function onResourceReceived(resource) {" on the first line add:

     resetTimeout()
    

    Do the same for "page.onResourceRequested = function onResourceRequested(requestData, request) {"

    Finally, on "page.onLoadFinished = function onLoadFinished(status) {" on the first line add:

     if(!reqResFinished)
     {
          return
     }
     reqResFinished = false
    

    And that's it, hope this one helps someone in trouble like I was. This solution is for casperjs but works directly for Spooky.

    Good luck !

    0 讨论(0)
  • 2020-11-22 14:42

    You could try a combination of the waitfor and rasterize examples:

    /**
     * See https://github.com/ariya/phantomjs/blob/master/examples/waitfor.js
     * 
     * Wait until the test condition is true or a timeout occurs. Useful for waiting
     * on a server response or for a ui change (fadeIn, etc.) to occur.
     *
     * @param testFx javascript condition that evaluates to a boolean,
     * it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
     * as a callback function.
     * @param onReady what to do when testFx condition is fulfilled,
     * it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
     * as a callback function.
     * @param timeOutMillis the max amount of time to wait. If not specified, 3 sec is used.
     */
    function waitFor(testFx, onReady, timeOutMillis) {
        var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
            start = new Date().getTime(),
            condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()), //< defensive code
            interval = setInterval(function() {
                if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
                    // If not time-out yet and condition not yet fulfilled
                    condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
                } else {
                    if(!condition) {
                        // If condition still not fulfilled (timeout but condition is 'false')
                        console.log("'waitFor()' timeout");
                        phantom.exit(1);
                    } else {
                        // Condition fulfilled (timeout and/or condition is 'true')
                        console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
                        typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
                        clearInterval(interval); //< Stop this interval
                    }
                }
            }, 250); //< repeat check every 250ms
    };
    
    var page = require('webpage').create(), system = require('system'), address, output, size;
    
    if (system.args.length < 3 || system.args.length > 5) {
        console.log('Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom]');
        console.log('  paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"');
        phantom.exit(1);
    } else {
        address = system.args[1];
        output = system.args[2];
        if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") {
            size = system.args[3].split('*');
            page.paperSize = size.length === 2 ? {
                width : size[0],
                height : size[1],
                margin : '0px'
            } : {
                format : system.args[3],
                orientation : 'portrait',
                margin : {
                    left : "5mm",
                    top : "8mm",
                    right : "5mm",
                    bottom : "9mm"
                }
            };
        }
        if (system.args.length > 4) {
            page.zoomFactor = system.args[4];
        }
        var resources = [];
        page.onResourceRequested = function(request) {
            resources[request.id] = request.stage;
        };
        page.onResourceReceived = function(response) {
            resources[response.id] = response.stage;
        };
        page.open(address, function(status) {
            if (status !== 'success') {
                console.log('Unable to load the address!');
                phantom.exit();
            } else {
                waitFor(function() {
                    // Check in the page if a specific element is now visible
                    for ( var i = 1; i < resources.length; ++i) {
                        if (resources[i] != 'end') {
                            return false;
                        }
                    }
                    return true;
                }, function() {
                   page.render(output);
                   phantom.exit();
                }, 10000);
            }
        });
    }
    
    0 讨论(0)
  • 2020-11-22 14:45

    This the code I use:

    var system = require('system');
    var page = require('webpage').create();
    
    page.open('http://....', function(){
          console.log(page.content);
          var k = 0;
    
          var loop = setInterval(function(){
              var qrcode = page.evaluate(function(s) {
                 return document.querySelector(s).src;
              }, '.qrcode img');
    
              k++;
              if (qrcode){
                 console.log('dataURI:', qrcode);
                 clearInterval(loop);
                 phantom.exit();
              }
    
              if (k === 50) phantom.exit(); // 10 sec timeout
          }, 200);
      });
    

    Basically given the fact you're supposed to know that the page is full downloaded when a given element appears on the DOM. So the script is going to wait until this happens.

    0 讨论(0)
  • 2020-11-22 14:47

    I use a personnal blend of the phantomjs waitfor.js example.

    This is my main.js file:

    'use strict';
    
    var wasSuccessful = phantom.injectJs('./lib/waitFor.js');
    var page = require('webpage').create();
    
    page.open('http://foo.com', function(status) {
      if (status === 'success') {
        page.includeJs('https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min.js', function() {
          waitFor(function() {
            return page.evaluate(function() {
              if ('complete' === document.readyState) {
                return true;
              }
    
              return false;
            });
          }, function() {
            var fooText = page.evaluate(function() {
              return $('#foo').text();
            });
    
            phantom.exit();
          });
        });
      } else {
        console.log('error');
        phantom.exit(1);
      }
    });
    

    And the lib/waitFor.js file (which is just a copy and paste of the waifFor() function from the phantomjs waitfor.js example):

    function waitFor(testFx, onReady, timeOutMillis) {
        var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
            start = new Date().getTime(),
            condition = false,
            interval = setInterval(function() {
                if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
                    // If not time-out yet and condition not yet fulfilled
                    condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
                } else {
                    if(!condition) {
                        // If condition still not fulfilled (timeout but condition is 'false')
                        console.log("'waitFor()' timeout");
                        phantom.exit(1);
                    } else {
                        // Condition fulfilled (timeout and/or condition is 'true')
                        // console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
                        typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condi>
                        clearInterval(interval); //< Stop this interval
                    }
                }
            }, 250); //< repeat check every 250ms
    }
    

    This method is not asynchronous but at least am I assured that all the resources were loaded before I try using them.

    0 讨论(0)
  • 2020-11-22 14:50

    Another approach is to just ask PhantomJS to wait for a bit after the page has loaded before doing the render, as per the regular rasterize.js example, but with a longer timeout to allow the JavaScript to finish loading additional resources:

    page.open(address, function (status) {
        if (status !== 'success') {
            console.log('Unable to load the address!');
            phantom.exit();
        } else {
            window.setTimeout(function () {
                page.render(output);
                phantom.exit();
            }, 1000); // Change timeout as required to allow sufficient time 
        }
    });
    
    0 讨论(0)
提交回复
热议问题