Memory leak in Node.js scraper

前端 未结 4 1802
小鲜肉
小鲜肉 2021-02-04 13:23

This is a simple scraper written in JavaScript with Node.js, for scraping Wikipedia for periodic table element data. The dependencies are jsdom for DOM manipulation and chain-ga

4条回答
  •  余生分开走
    2021-02-04 13:48

    I think I have a better work-around, reuse your instance of jsdom by setting the window.document.innerHTML property. Solved my memory leak problems!

        // jsdom has a memory leak when using multiple instance
        // cache a single instance and swap out innerHTML
        var dom = require('jsdom');
        var win;
        var useJQuery = function(html, fnCallback) {
            if (!win) {
                var defEnv = {
                    html:html,
                    scripts:['jquery-1.5.min.js'],
                };
                dom.env(defEnv, function (err, window) {
                    if (err) throw new Error('failed to init dom');
                    win = window;
                    fnCallback(window.jQuery);
                });
            }
            else {
                win.document.innerHTML = html;
                fnCallback(win.jQuery);
            }
        };
        ....
        // Use it!
        useJQuery(html, function($) { $('woohoo').val('test'); });
    

提交回复
热议问题