问题
I am desperately trying to process something like 200 screenshots in a single shot, my first attempt was to follow the guidelines with a simple script invoked 200 times,
phantom.create()
.then(function(instance) {
console.log("1 - instance")
phInstance = instance;
return instance.createPage();
})
.then(function(page) {
console.log("2 - page")
sitepage = page;
return page.open(url);
})
.then(function(status) {
console.log("3 - render")
sitepage.property('clipRect', {top: 0, left: 0, width:3000,height:890}).then(function() {
sitepage.render(fname).then(function(finished) {
console.log("\t\t\t---> finished");
sitepage.close();
phInstance.exit();
callback({msg: 'ok'})
phantom.exit();
return;
});
});
})
this approach kinda works, but it's really overwhelming for the cpu, the problem is related to the fact that this way of doings things leads to 200 phantom processes that quickly eats up all the memory.
A more profitable way of doing so, would be to create a single phantom instance and then drive it to open one page at the time and render it, something that could be done with a phantom script, like so:
var content, counter, f, fs, grab_screen, img, lines, next_screen, page, system, url;
page = require('webpage').create();
system = require('system');
fs = require('fs');
content = '';
lines = [];
url = '';
img = '';
counter = 0;
page.viewportSize = {
width: 1200,
height: 800
};
page.settings.userAgent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36';
f = fs.open("sites.txt", "r");
content = f.read();
lines = content.split("\n");
grab_screen = function() {
var site;
site = lines[counter];
url = 'http://' + site + '/';
img = 'screens/' + site + '.png';
console.log("Grabbing screen for: " + url);
return page.open(url, function(status) {
return window.setTimeout(function() {
page.render(img);
counter++;
return next_screen();
}, 200);
});
};
next_screen = function() {
console.log("On to " + counter + " ...");
if (counter < lines.length) {
return grab_screen();
} else {
return phantom.exit();
}
};
next_screen();
so I was wondering how to achieve that with phantomjs-node.
回答1:
I finally solved my problem with two things:
- realizing that node.js is NOT multithreading.
- Using a single instance of phantom, to render multiple urls.
here's how it came out:
var webshot = function(id) {
console.log('makeshot ', shots[id].url);
requestSync("POST", "http://localhost:4041/options/set", { json:{ opts:JSON.stringify(shots[id].options) } });
phInstance.createPage().then(function(_page) {
console.log("2 - page")
sitepage = _page;
return _page.open(shots[id].url);
})
.then(function(status) {
console.log("3 - render %s / %s", id, shots.length);
sitepage.property('clipRect', {top: 0, left: 0, width:1500,height:220}).then(function() {
sitepage.render(shots[id].fname).then(function(finished) {
console.log("\t\t\t---> finished");
sitepage.close();
fnames[Math.ceil(parseInt(shots[id].options.pack_id)/mt_per_snap)-1] = "localhost_" + shots[id].options.pack_id + ".png";
if(id<shots.length-1) {
id += 1;
webshot(id);
} else {
console.log("all done: %s files has been written", shots.length);
// invoke pdf generation for the pdf page
cb("files_written", { });
generatePDF();
}
return;
});
});
})
}
so, long story short: I have put the page I wanted to render in a separate script, which I feed with variables before making the shot, and this solves the "multithreading problem", afterwards I have a single variable named phInstance, that is declared as follows:
var initPhantom = function() {
phantom.create()
.then(function(instance) {
console.log("1 - instance")
phInstance = instance;
})
}
remember to kill the phantom instance once you're done, otherwise it will stay there and suck your resources for good.
回答2:
You could try something like webshot
. I'm using it with async.js
,
however I sometimes get Error: PhantomJS exited with return value 1
.
Have not yet found out why.
async.map(
links,
function(link, cb) {
var config = {...}; // your webshot options
var folder = link; // make unique folder name from link?
var file = path.join('./', 'screenshots', folder, 'screenshot.png');
webshot(link, file, config, function(err) {
cb(err, link);
});
},
function(e, links) {
// done
}
);
Resources:
https://www.npmjs.com/package/webshot https://www.npmjs.com/package/asyncjs
来源:https://stackoverflow.com/questions/36694591/batch-screenshots-with-phantom-for-nodejs