phantomjs not waiting for “full” page load

前端 未结 14 971
南旧
南旧 2020-11-22 13:48

I\'m using PhantomJS v1.4.1 to load some web pages. I don\'t have access to their server-side, I just getting links pointing to them. I\'m using obsolete version of Phantom

相关标签:
14条回答
  • 2020-11-22 14:29

    Maybe you can use the onResourceRequested and onResourceReceived callbacks to detect asynchronous loading. Here's an example of using those callbacks from their documentation:

    var page = require('webpage').create();
    page.onResourceRequested = function (request) {
        console.log('Request ' + JSON.stringify(request, undefined, 4));
    };
    page.onResourceReceived = function (response) {
        console.log('Receive ' + JSON.stringify(response, undefined, 4));
    };
    page.open(url);
    

    Also, you can look at examples/netsniff.js for a working example.

    0 讨论(0)
  • 2020-11-22 14:34

    Do Mouse move while page is loading should work.

     page.sendEvent('click',200, 660);
    
    do { phantom.page.sendEvent('mousemove'); } while (page.loading);
    

    UPDATE

    When submitting the form, nothing was returned, so the program stopped. The program did not wait for the page to load as it took a few seconds for the redirect to begin.

    telling it to move the mouse until the URL changes to the home page gave the browser as much time as it needed to change. then telling it to wait for the page to finish loading allowed the page to full load before the content was grabbed.

    page.evaluate(function () {
    document.getElementsByClassName('btn btn-primary btn-block')[0].click();
    });
    do { phantom.page.sendEvent('mousemove'); } while (page.evaluate(function()
    {
    return document.location != "https://www.bestwaywholesale.co.uk/";
    }));
    do { phantom.page.sendEvent('mousemove'); } while (page.loading);
    
    0 讨论(0)
  • 2020-11-22 14:38

    This is an implementation of Supr's answer. Also it uses setTimeout instead of setInterval as Mateusz Charytoniuk suggested.

    Phantomjs will exit in 1000ms when there isn't any request or response.

    // load the module
    var webpage = require('webpage');
    // get timestamp
    function getTimestamp(){
        // or use Date.now()
        return new Date().getTime();
    }
    
    var lastTimestamp = getTimestamp();
    
    var page = webpage.create();
    page.onResourceRequested = function(request) {
        // update the timestamp when there is a request
        lastTimestamp = getTimestamp();
    };
    page.onResourceReceived = function(response) {
        // update the timestamp when there is a response
        lastTimestamp = getTimestamp();
    };
    
    page.open(html, function(status) {
        if (status !== 'success') {
            // exit if it fails to load the page
            phantom.exit(1);
        }
        else{
            // do something here
        }
    });
    
    function checkReadyState() {
        setTimeout(function () {
            var curentTimestamp = getTimestamp();
            if(curentTimestamp-lastTimestamp>1000){
                // exit if there isn't request or response in 1000ms
                phantom.exit();
            }
            else{
                checkReadyState();
            }
        }, 100);
    }
    
    checkReadyState();
    
    0 讨论(0)
  • 2020-11-22 14:38

    this is my solution its worked for me .

    page.onConsoleMessage = function(msg, lineNum, sourceId) {
    
        if(msg=='hey lets take screenshot')
        {
            window.setInterval(function(){      
                try
                {               
                     var sta= page.evaluateJavaScript("function(){ return jQuery.active;}");                     
                     if(sta == 0)
                     {      
                        window.setTimeout(function(){
                            page.render('test.png');
                            clearInterval();
                            phantom.exit();
                        },1000);
                     }
                }
                catch(error)
                {
                    console.log(error);
                    phantom.exit(1);
                }
           },1000);
        }       
    };
    
    
    page.open(address, function (status) {      
        if (status !== "success") {
            console.log('Unable to load url');
            phantom.exit();
        } else { 
           page.setContent(page.content.replace('</body>','<script>window.onload = function(){console.log(\'hey lets take screenshot\');}</script></body>'), address);
        }
    });
    
    0 讨论(0)
  • 2020-11-22 14:39

    Here is a solution that waits for all resource requests to complete. Once complete it will log the page content to the console and generate a screenshot of the rendered page.

    Although this solution can serve as a good starting point, I have observed it fail so it's definitely not a complete solution!

    I didn't have much luck using document.readyState.

    I was influenced by the waitfor.js example found on the phantomjs examples page.

    var system = require('system');
    var webPage = require('webpage');
    
    var page = webPage.create();
    var url = system.args[1];
    
    page.viewportSize = {
      width: 1280,
      height: 720
    };
    
    var requestsArray = [];
    
    page.onResourceRequested = function(requestData, networkRequest) {
      requestsArray.push(requestData.id);
    };
    
    page.onResourceReceived = function(response) {
      var index = requestsArray.indexOf(response.id);
      if (index > -1 && response.stage === 'end') {
        requestsArray.splice(index, 1);
      }
    };
    
    page.open(url, function(status) {
    
      var interval = setInterval(function () {
    
        if (requestsArray.length === 0) {
    
          clearInterval(interval);
          var content = page.content;
          console.log(content);
          page.render('yourLoadedPage.png');
          phantom.exit();
        }
      }, 500);
    });
    
    0 讨论(0)
  • 2020-11-22 14:40

    In my program, I use some logic to judge if it was onload: watching it's network request, if there was no new request on past 200ms, I treat it onload.

    Use this, after onLoadFinish().

    function onLoadComplete(page, callback){
        var waiting = [];  // request id
        var interval = 200;  //ms time waiting new request
        var timer = setTimeout( timeout, interval);
        var max_retry = 3;  //
        var counter_retry = 0;
    
        function timeout(){
            if(waiting.length && counter_retry < max_retry){
                timer = setTimeout( timeout, interval);
                counter_retry++;
                return;
            }else{
                try{
                    callback(null, page);
                }catch(e){}
            }
        }
    
        //for debug, log time cost
        var tlogger = {};
    
        bindEvent(page, 'request', function(req){
            waiting.push(req.id);
        });
    
        bindEvent(page, 'receive', function (res) {
            var cT = res.contentType;
            if(!cT){
                console.log('[contentType] ', cT, ' [url] ', res.url);
            }
            if(!cT) return remove(res.id);
            if(cT.indexOf('application') * cT.indexOf('text') != 0) return remove(res.id);
    
            if (res.stage === 'start') {
                console.log('!!received start: ', res.id);
                //console.log( JSON.stringify(res) );
                tlogger[res.id] = new Date();
            }else if (res.stage === 'end') {
                console.log('!!received end: ', res.id, (new Date() - tlogger[res.id]) );
                //console.log( JSON.stringify(res) );
                remove(res.id);
    
                clearTimeout(timer);
                timer = setTimeout(timeout, interval);
            }
    
        });
    
        bindEvent(page, 'error', function(err){
            remove(err.id);
            if(waiting.length === 0){
                counter_retry = 0;
            }
        });
    
        function remove(id){
            var i = waiting.indexOf( id );
            if(i < 0){
                return;
            }else{
                waiting.splice(i,1);
            }
        }
    
        function bindEvent(page, evt, cb){
            switch(evt){
                case 'request':
                    page.onResourceRequested = cb;
                    break;
                case 'receive':
                    page.onResourceReceived = cb;
                    break;
                case 'error':
                    page.onResourceError = cb;
                    break;
                case 'timeout':
                    page.onResourceTimeout = cb;
                    break;
            }
        }
    }
    
    0 讨论(0)
提交回复
热议问题