save html output of page after execution of the page's javascript

后端 未结 7 915
情话喂你
情话喂你 2020-11-29 21:12

There is a site I am trying to scrape, that first loads an html/js modifies the form input fields using js and then POSTs. How can I get the final html output of the POSTed

相关标签:
7条回答
  • 2020-11-29 21:44

    After 2 long days of struggling and frustration I finally got my similar issue solved. What did the trick was the waitfor.js example in PhantomJS' official website. Be happy!

    "use strict";
    
    function waitFor(testFx, onReady, timeOutMillis) {
        var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
            start = new Date().getTime(),
            condition = false,
            interval = setInterval(function() {
                if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
                    // If not time-out yet and condition not yet fulfilled
                    condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
                } else {
                    if(!condition) {
                        // If condition still not fulfilled (timeout but condition is 'false')
                        console.log("'waitFor()' timeout");
                        phantom.exit(1);
                    } else {
                        // Condition fulfilled (timeout and/or condition is 'true')
                        console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
                        typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
                        clearInterval(interval); //< Stop this interval
                    }
                }
            }, 250); //< repeat check every 250ms
    };
    
    
    var page = require('webpage').create();
    
    // Open Twitter on 'sencha' profile and, onPageLoad, do...
    page.open("http://twitter.com/#!/sencha", function (status) {
        // Check for page load success
        if (status !== "success") {
            console.log("Unable to access network");
        } else {
            // Wait for 'signin-dropdown' to be visible
            waitFor(function() {
                // Check in the page if a specific element is now visible
                return page.evaluate(function() {
                    return $("#signin-dropdown").is(":visible");
                });
            }, function() {
               console.log("The sign-in dialog should be visible now.");
               phantom.exit();
            });
        }
    });
    
    0 讨论(0)
提交回复
热议问题