CasperJS running out of memory

早过忘川 提交于 2019-12-24 11:05:20

问题


I'm running the following script with CasperJS and after about 1/3rd of the way through the array it starts running out of swap space and the machine becomes extremely slow. What am i doing wrong here?

searchPages is an array of 54 numbers corresponding to a URL value for a search page.

casper.each(searchPages,function(casper,index){
    loadSearch(casper,index);
});


function loadSearch(casper,index){
    var currentTime = new Date();
    var month = currentTime.getMonth() + 2;
    var day = currentTime.getDate();
    var year = currentTime.getFullYear();
    var dateStart = month + "/" + day + "/" + year;
    month = currentTime.getMonth() + 3;
    var dateEnd = month + "/" + day + "/" + year;

    casper.thenOpen(url,function(){
        var myfile = "data-"+year + "-" + month + "-" + day+".html";
        this.evaluate(function(j) {
            document.querySelector('select[name="searchParameters.localeId"]').selectedIndex = j;
        },index);
        this.evaluate(function(start) {
            $("#leaveDate").val(start);
        },dateStart);
        this.evaluate(function(end) {
            $("#returnDate").val(end);
        },dateEnd);
        this.evaluate(function() {
            $("#OSB_btn").click();
        });

        this.waitForSelector('#destinationForPackage', function() {
            if (this.exists('#destinationForPackage')){
                var name = casper.evaluate(function() {
                    return $("#destinationForPackage option[value='" + $("#destinationForPackage").val() + "']").text()
                });
                if (name != "Going To"){
                    if (name == null){
                        console.log("it's null");
                    }else{
                        name = name.replace("/","_");
                        casper.capture('Captures/Searches/search_' + name + '.jpg');
                        console.log("Capturing search_" + name);
                    }
                }
            }else{
                console.log("Still doesn't exist...retry");
                loadSearch(casper,index);
            }

        },function(){
            console.log("Search page timed-out.");  
        },20000);
    });
}

And it adds about 3GB per loop.


回答1:


Well turns out this is a very well-known issue with PhantomJS. 3+ years as an open bug and apparently it has something to do with QT Webkit. Nonetheless, i was able to solve it by closing each page during the loop and re-opening a new Phantom page. It's a bit of a hacky work-around, but the memory consumption is far less. However, after about 200 pages, it still has a pretty high memory usage (1GB+). So, i break up my scripts into blocks of 200 and just start the next one upon completion. Here is the finished product that completes successfully without too much memory usage. It uses less on MacOS than Windows for some reason.

casper.start(url,function(){
    this.echo('continuing captures...');
}).each(searchPages,function(casper,index){
    loadSearch(this,index);
});

function loadSearch(casper,index){
    var currentTime = new Date();
    var month = currentTime.getMonth() + 1;
    var day = currentTime.getDate() + 1;
    var year = currentTime.getFullYear();
    var dateStart = month + "/" + day + "/" + year;
    var fortnightAway = new Date(+new Date + 12096e5);
    var dateEnd = fortnightAway.getMonth() + 1 + "/" + fortnightAway.getDate() + "/" + fortnightAway.getFullYear();

    casper.page.close();
    casper.page = require('webpage').create();

    casper.thenOpen(url,function(){
        var myfile = "data-"+year + "-" + month + "-" + day+".html";
        this.evaluate(function(j) {
            document.querySelector('select[name="searchParameters.localeId"]').selectedIndex = j;
        },index);
        this.evaluate(function(start) {
            $("#leaveDate").val(start);
        },dateStart);
        this.evaluate(function(end) {
            $("#returnDate").val(end);
        },dateEnd);
        this.evaluate(function() {
            $("#OSB_btn").click();
        });
        this.waitForSelector('#destinationForPackage', function() {
            if (this.exists('#destinationForPackage')){
                var name = casper.evaluate(function() {
                    return $("#destinationForPackage option[value='" + $("#destinationForPackage").val() + "']").text()
                });
                if (name != "Going To"){
                    if (name == null){
                        console.log("it's null");
                    }else{
                        name = name.replace("/","_");
                        name = name.replace("/","_");
                        casper.capture('Captures/Searches/search_' + name + '.jpg');
                        console.log("Capturing search_" + name);
                    }
                }
            }else{
                console.log("Search failed to load. Retrying");
                loadSearch(casper,index);
            }

        },function(){
            console.log("Search page timed-out. Retrying");
            loadSearch(casper,index);
        },20000);
    });
}



回答2:


There might be a better solution to the original issue, but for a quick fix on running out of memory, try setTimeout to make the recursive call without winding up the stack...

setTimeout(() => loadSearch(casper,index), 0);

(This idea assumes that the memory issue is the result of too much recursive depth over a long wait time).



来源:https://stackoverflow.com/questions/44348193/casperjs-running-out-of-memory

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!