Simplest way to download and unzip files in Node.js cross-platform?

前端 未结 11 1453
一个人的身影
一个人的身影 2020-11-30 23:44

Just looking for a simple solution to downloading and unzipping .zip or .tar.gz files in Node.js on any operating system.

Not sure if this

相关标签:
11条回答
  • 2020-11-30 23:59

    Another working example:

    var zlib = require('zlib');
    var tar = require('tar');
    var ftp = require('ftp');
    
    var files = [];
    
    var conn = new ftp();
    conn.on('connect', function(e) 
    {
        conn.auth(function(e) 
        {
            if (e)
            {
                throw e;
            }
            conn.get('/tz/tzdata-latest.tar.gz', function(e, stream) 
            {
                stream.on('success', function() 
                {
                    conn.end();
    
                    console.log("Processing files ...");
    
                    for (var name in files)
                    {
                        var file = files[name];
    
                        console.log("filename: " + name);
                        console.log(file);
                    }
                    console.log("OK")
                });
                stream.on('error', function(e) 
                {
                    console.log('ERROR during get(): ' + e);
                    conn.end();
                });
    
                console.log("Reading ...");
    
                stream
                .pipe(zlib.createGunzip())
                .pipe(tar.Parse())
                .on("entry", function (e) 
                {    
                    var filename = e.props["path"];
                    console.log("filename:" + filename);
                    if( files[filename] == null )
                    {
                        files[filename] = "";
                    }
                    e.on("data", function (c) 
                    {
                        files[filename] += c.toString();
                    })    
                });
            });
        });
    })
    .connect(21, "ftp.iana.org");
    
    0 讨论(0)
  • 2020-12-01 00:01

    Checkout adm-zip.

    ADM-ZIP is a pure JavaScript implementation for zip data compression for NodeJS.

    The library allows you to:

    • decompress zip files directly to disk or in-memory buffers
    • compress files and store them to disk in .zip format or in compressed buffers
    • update content of/add new/delete files from an existing .zip
    0 讨论(0)
  • 2020-12-01 00:01

    I was looking forward this for a long time, and found no simple working example, but based on these answers I created the downloadAndUnzip() function.

    The usage is quite simple:

    downloadAndUnzip('http://your-domain.com/archive.zip', 'yourfile.xml')
        .then(function (data) {
            console.log(data); // unzipped content of yourfile.xml in root of archive.zip
        })
        .catch(function (err) {
            console.error(err);
        });
    

    And here is the declaration:

    var AdmZip = require('adm-zip');
    var request = require('request');
    
    var downloadAndUnzip = function (url, fileName) {
    
        /**
         * Download a file
         * 
         * @param url
         */
        var download = function (url) {
            return new Promise(function (resolve, reject) {
                request({
                    url: url,
                    method: 'GET',
                    encoding: null
                }, function (err, response, body) {
                    if (err) {
                        return reject(err);
                    }
                    resolve(body);
                });
            });
        };
    
        /**
         * Unzip a Buffer
         * 
         * @param buffer
         * @returns {Promise}
         */
        var unzip = function (buffer) {
            return new Promise(function (resolve, reject) {
    
                var resolved = false;
    
                var zip = new AdmZip(buffer);
                var zipEntries = zip.getEntries(); // an array of ZipEntry records
    
                zipEntries.forEach(function (zipEntry) {
                    if (zipEntry.entryName == fileName) {
                        resolved = true;
                        resolve(zipEntry.getData().toString('utf8'));
                    }
                });
    
                if (!resolved) {
                    reject(new Error('No file found in archive: ' + fileName));
                }
            });
        };
    
    
        return download(url)
            .then(unzip);
    };
    
    0 讨论(0)
  • 2020-12-01 00:10

    It's 2017 (October 26th, to be exact).

    For an ancient and pervasive technology such as unzip I would expect there to exist a fairly popular, mature node.js unzip library that is "stagnant" and "unmaintained" because it is "complete".

    However, most libraries appear either to be completely terrible or to have commits recently as just a few months ago. This is quite concerning... so I've gone through several unzip libraries, read their docs, and tried their examples to try to figure out WTF. For example, I've tried these:

    • thejoshwolfe/yauzl
    • antelle/node-stream-zip
    • ZJONSSON/node-unzipper
    • EvanOxfeld/node-unzip
    • Stuk/jszip
    • kriskowal/zip

    Update 2020: Haven't tried it yet, but there's also archiver

    Top Recommendation: yauzl

    Works great for completely downloaded file. Not as great for streaming.

    Well documented. Works well. Makes sense.

    2nd Pick: node-stream-zip

    antelle's node-stream-zip seems to be the best

    Install:

    npm install --save node-stream-zip
    

    Usage:

    'use strict';
    
    var fs = require('fs');
    var StreamZip = require('node-stream-zip');
    
    var zip = new StreamZip({
      file: './example.zip'
    , storeEntries: true
    });
    
    zip.on('error', function (err) { console.error('[ERROR]', err); });
    
    zip.on('ready', function () {
      console.log('All entries read: ' + zip.entriesCount);
      //console.log(zip.entries());
    });
    
    zip.on('entry', function (entry) {
      var pathname = path.resolve('./temp', entry.name);
      if (/\.\./.test(path.relative('./temp', pathname))) {
          console.warn("[zip warn]: ignoring maliciously crafted paths in zip file:", entry.name);
          return;
      }
    
      if ('/' === entry.name[entry.name.length - 1]) {
        console.log('[DIR]', entry.name);
        return;
      }
    
      console.log('[FILE]', entry.name);
      zip.stream(entry.name, function (err, stream) {
        if (err) { console.error('Error:', err.toString()); return; }
    
        stream.on('error', function (err) { console.log('[ERROR]', err); return; });
    
        // example: print contents to screen
        //stream.pipe(process.stdout);
    
        // example: save contents to file
        fs.mkdir(
          path.dirname(pathname),
          { recursive: true },
          function (err) {
            stream.pipe(fs.createWriteStream(pathname));
          }
        );
      });
    });
    

    Security Warning:

    Not sure if this checks entry.name for maliciously crafted paths that would resolve incorrectly (such as ../../../foo or /etc/passwd).

    You can easily check this yourself by comparing /\.\./.test(path.relative('./to/dir', path.resolve('./to/dir', entry.name))).

    Pros: (Why do I think it's the best?)

    • can unzip normal files (maybe not some crazy ones with weird extensions)
    • can stream
    • seems to not have to load the whole zip to read entries
    • has examples in normal JavaScript (not compiled)
    • doesn't include the kitchen sink (i.e. url loading, S3, or db layers)
    • uses some existing code from a popular library
    • doesn't have too much senseless hipster or ninja-foo in the code

    Cons:

    • Swallows errors like a hungry hippo
    • Throws strings instead of errors (no stack traces)
    • zip.extract() doesn't seem to work (hence I used zip.stream() in my example)

    Runner up: node-unzipper

    Install:

    npm install --save unzipper
    

    Usage:

    'use strict';
    
    var fs = require('fs');
    var unzipper = require('unzipper');
    
    fs.createReadStream('./example.zip')
      .pipe(unzipper.Parse())
      .on('entry', function (entry) {
        var fileName = entry.path;
        var type = entry.type; // 'Directory' or 'File'
    
        console.log();
        if (/\/$/.test(fileName)) {
          console.log('[DIR]', fileName, type);
          return;
        }
    
        console.log('[FILE]', fileName, type);
    
        // TODO: probably also needs the security check
    
        entry.pipe(process.stdout/*fs.createWriteStream('output/path')*/);
        // NOTE: To ignore use entry.autodrain() instead of entry.pipe()
      });
    

    Pros:

    • Seems to work in a similar manner to node-stream-zip, but less control
    • A more functional fork of unzip
    • Seems to run in serial rather than in parallel

    Cons:

    • Kitchen sink much? Just includes a ton of stuff that's not related to unzipping
    • Reads the whole file (by chunk, which is fine), not just random seeks
    0 讨论(0)
  • 2020-12-01 00:12

    Download and extract for .tar.gz:

    const https = require("https");
    const tar = require("tar");
    
    https.get("https://url.to/your.tar.gz", function(response) {
      response.pipe(
        tar.x({
          strip: 1,
          C: "some-dir"
        })
      );
    });
    
    0 讨论(0)
提交回复
热议问题