Batch screenshots with phantom for nodejs

房东的猫 提交于 2020-02-25 13:09:59

问题


I am desperately trying to process something like 200 screenshots in a single shot, my first attempt was to follow the guidelines with a simple script invoked 200 times,

phantom.create()
  .then(function(instance) {
      console.log("1 - instance")
      phInstance = instance;
      return instance.createPage();
  })
  .then(function(page) {
      console.log("2 - page")
      sitepage = page;
    return page.open(url);
  })
  .then(function(status) {
        console.log("3 - render")
        sitepage.property('clipRect', {top: 0, left: 0, width:3000,height:890}).then(function() {
          sitepage.render(fname).then(function(finished) { 
            console.log("\t\t\t---> finished");
            sitepage.close();
            phInstance.exit();
            callback({msg: 'ok'})
            phantom.exit();
            return;
          });
        });
  })

this approach kinda works, but it's really overwhelming for the cpu, the problem is related to the fact that this way of doings things leads to 200 phantom processes that quickly eats up all the memory.

A more profitable way of doing so, would be to create a single phantom instance and then drive it to open one page at the time and render it, something that could be done with a phantom script, like so:

  var content, counter, f, fs, grab_screen, img, lines, next_screen, page, system, url;
  page = require('webpage').create();
  system = require('system');
  fs = require('fs');
  content = '';
  lines = [];
  url = '';
  img = '';
  counter = 0;

  page.viewportSize = {
    width: 1200,
    height: 800
  };

  page.settings.userAgent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36';

  f = fs.open("sites.txt", "r");

  content = f.read();

  lines = content.split("\n");

  grab_screen = function() {
    var site;
    site = lines[counter];
    url = 'http://' + site + '/';
    img = 'screens/' + site + '.png';
    console.log("Grabbing screen for: " + url);
    return page.open(url, function(status) {
      return window.setTimeout(function() {
        page.render(img);
        counter++;
        return next_screen();
      }, 200);
    });
  };

  next_screen = function() {
    console.log("On to " + counter + " ...");
    if (counter < lines.length) {
      return grab_screen();
    } else {
      return phantom.exit();
    }
  };

  next_screen();

so I was wondering how to achieve that with phantomjs-node.


回答1:


I finally solved my problem with two things:

  1. realizing that node.js is NOT multithreading.
  2. Using a single instance of phantom, to render multiple urls.

here's how it came out:

  var webshot = function(id) {
      console.log('makeshot ', shots[id].url);
      requestSync("POST", "http://localhost:4041/options/set", { json:{ opts:JSON.stringify(shots[id].options) } });
      phInstance.createPage().then(function(_page) {
          console.log("2 - page")
          sitepage = _page;
          return _page.open(shots[id].url);
      })
      .then(function(status) {
            console.log("3 - render %s / %s", id, shots.length);
            sitepage.property('clipRect', {top: 0, left: 0, width:1500,height:220}).then(function() {
              sitepage.render(shots[id].fname).then(function(finished) { 
                console.log("\t\t\t---> finished");
                sitepage.close();
                fnames[Math.ceil(parseInt(shots[id].options.pack_id)/mt_per_snap)-1] = "localhost_" + shots[id].options.pack_id + ".png";
                if(id<shots.length-1) {
                  id += 1;
                  webshot(id);
                } else {
                  console.log("all done: %s files has been written", shots.length);
                  // invoke pdf generation for the pdf page
                  cb("files_written", {  });
                  generatePDF();

                }
                return;
              });
            });
      })
  }

so, long story short: I have put the page I wanted to render in a separate script, which I feed with variables before making the shot, and this solves the "multithreading problem", afterwards I have a single variable named phInstance, that is declared as follows:

  var initPhantom = function() {
    phantom.create()
        .then(function(instance) {
            console.log("1 - instance")
            phInstance = instance;
        })
  }

remember to kill the phantom instance once you're done, otherwise it will stay there and suck your resources for good.




回答2:


You could try something like webshot. I'm using it with async.js, however I sometimes get Error: PhantomJS exited with return value 1. Have not yet found out why.

async.map(
    links,
    function(link, cb) {
        var config = {...}; // your webshot options
        var folder = link; // make unique folder name from link?
        var file = path.join('./', 'screenshots', folder, 'screenshot.png');
        webshot(link, file, config, function(err) {
            cb(err, link);
        });
    },
    function(e, links) {
        // done
    }
);

Resources:

https://www.npmjs.com/package/webshot https://www.npmjs.com/package/asyncjs



来源:https://stackoverflow.com/questions/36694591/batch-screenshots-with-phantom-for-nodejs

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!