Download xlsx from S3 and parse it

前端 未结 5 1682
星月不相逢
星月不相逢 2021-01-05 10:33

I need a service to download an excel file from Amazon S3, then parse with node-xlsx

The problem is that I can\'t get xlsx to parse the file. When I try to read back

相关标签:
5条回答
  • 2021-01-05 10:50

    fs.writeFile is asynchronous. The file won't be there till the call back is called.

    https://nodejs.org/api/fs.html#fs_fs_writefile_file_data_options_callback

    fs.writeFile('message.txt', 'Hello Node.js', (err) => {
      if (err) throw err;
      console.log('It\'s saved!');
    });
    
    0 讨论(0)
  • 2021-01-05 11:00

    The node-xlsx module requires that the entire xlsx buffer be available. So you cannot pass it a ReadStream like you're currently doing. Try this method which entirely avoids writing to disk:

    router.get('/process', (req, res) => {
        var fileName = 'https://some-bucket.s3.amazonaws.com/some-excel-file.xlsx'
        https.get(fileName, response => {
            var chunks = []
            response.on('data', chunk => chunks.push(chunk))
            .on('end', () => {
                var book = xlsx.parse(Buffer.concat(chunks))
                book.forEach(sheet => console.log('sheet', sheet.name))
                res.status(200)          
            })
            .on('error', e => {
                res.status(500)
            })
        })
        return
    })
    
    0 讨论(0)
  • 2021-01-05 11:01

    This is how you can read a file from S3 nodejs and keep it in memory without first writing the file to some location on disk. It can be used with a combination of S3 and AWS Lambda so that you don't have to write the files to some location on the Lambda.

    Remember this processes is asynchronous.

       var params = {
            Bucket: "",
            Key: ""
        };
    
        var file = s3.getObject(params).createReadStream();
        var buffers = [];
    
        file.on('data', function (data) {
            buffers.push(data);
        });
    
        file.on('end', function () {
            var buffer = Buffer.concat(buffers);
            var workbook = xlsx.parse(buffer);
            console.log("workbook", workbook)
        });
    
    0 讨论(0)
  • 2021-01-05 11:11

    Another way of doing this is using exceljs

    const AWS = require('aws-sdk');
    const Excel = require('exceljs');
    
    async function downloadFile(){
        AWS.config.update({
          accessKeyId: AMAZON_ACCESS_KEY,
          secretAccessKey: AMAZON_SECRET_ACCESS_KEY,
        });
    
        const s3 = new AWS.S3();
        const stream = await s3.getObject({ Bucket: 'yor_buket', Key: 'file_name'}).createReadStream();
        return stream;
    }
    
    
    async function loadWorkbook(stream){
    
        return new Promise((resolve, reject) = > {
            let rows = [];
            const workbook = new Excel.Workbook();
    
            workbook.xlsx.read(stream).then(function(workbook){
    
                const worksheet = workbook.getWorksheet('sheet_name');
    
                worksheet.eachRow({ includeEmpty: false}, function(row) {
                    rows.push(row.values); 
                });
            });
            resolve(rows);
        });
    
    }
    
    async function loadFromS3(){
        const stream = await downloadFile();
        const dataRows = await loadWorkbook(stream);
        console.log(dataRows);
    }
    
    0 讨论(0)
  • 2021-01-05 11:12

    If you want use async/wait, here solution:

        const AWS = require('aws-sdk');
        const XLSX = require('xlsx');
    
        AWS.config.update({
          accessKeyId: AMAZON_ACCESS_KEY,
          secretAccessKey: AMAZON_SECRET_ACCESS_KEY,
        });
    
        // Get buffered file from s3    
        function getBufferFromS3(file, callback){
          const buffers = [];
          const s3 = new AWS.S3();
          const stream = s3.getObject({ Bucket: 'yor_buket', Key: file}).createReadStream();
          stream.on('data', data => buffers.push(data));
          stream.on('end', () => callback(null, Buffer.concat(buffers)));
          stream.on('error', error => callback(error));
        }
    
        // promisify read stream from s3
        function getBufferFromS3Promise(file) {
          return new Promise((resolve, reject) => {
            getBufferFromS3(file, (error, s3buffer) => {
              if (error) return reject(error);
              return resolve(s3buffer);
            });
          });
        };
    
        // create workbook from buffer
        const buffer = await getBufferFromS3Promise(file);
        const workbook = XLSX.read(buffer);
    
    
        // If you want to send the workbook as a download to the api end point in node
        const fileName = "Categories.xlsx";
        res.setHeader('Content-disposition', 'attachment; filename=' + fileName);
        res.setHeader('Content-type', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
        const wbout = XLSX.write(workbook, { bookType: 'xlsx', type: 'buffer'});
        res.send(new Buffer(wbout));
    
    0 讨论(0)
提交回复
热议问题