I need a service to download an excel file from Amazon S3, then parse with node-xlsx
The problem is that I can\'t get xlsx to parse the file. When I try to read back
fs.writeFile
is asynchronous. The file won't be there till the call back is called.
https://nodejs.org/api/fs.html#fs_fs_writefile_file_data_options_callback
fs.writeFile('message.txt', 'Hello Node.js', (err) => {
if (err) throw err;
console.log('It\'s saved!');
});
The node-xlsx
module requires that the entire xlsx buffer be available. So you cannot pass it a ReadStream
like you're currently doing. Try this method which entirely avoids writing to disk:
router.get('/process', (req, res) => {
var fileName = 'https://some-bucket.s3.amazonaws.com/some-excel-file.xlsx'
https.get(fileName, response => {
var chunks = []
response.on('data', chunk => chunks.push(chunk))
.on('end', () => {
var book = xlsx.parse(Buffer.concat(chunks))
book.forEach(sheet => console.log('sheet', sheet.name))
res.status(200)
})
.on('error', e => {
res.status(500)
})
})
return
})
This is how you can read a file from S3 nodejs and keep it in memory without first writing the file to some location on disk. It can be used with a combination of S3 and AWS Lambda so that you don't have to write the files to some location on the Lambda.
Remember this processes is asynchronous.
var params = {
Bucket: "",
Key: ""
};
var file = s3.getObject(params).createReadStream();
var buffers = [];
file.on('data', function (data) {
buffers.push(data);
});
file.on('end', function () {
var buffer = Buffer.concat(buffers);
var workbook = xlsx.parse(buffer);
console.log("workbook", workbook)
});
Another way of doing this is using exceljs
const AWS = require('aws-sdk');
const Excel = require('exceljs');
async function downloadFile(){
AWS.config.update({
accessKeyId: AMAZON_ACCESS_KEY,
secretAccessKey: AMAZON_SECRET_ACCESS_KEY,
});
const s3 = new AWS.S3();
const stream = await s3.getObject({ Bucket: 'yor_buket', Key: 'file_name'}).createReadStream();
return stream;
}
async function loadWorkbook(stream){
return new Promise((resolve, reject) = > {
let rows = [];
const workbook = new Excel.Workbook();
workbook.xlsx.read(stream).then(function(workbook){
const worksheet = workbook.getWorksheet('sheet_name');
worksheet.eachRow({ includeEmpty: false}, function(row) {
rows.push(row.values);
});
});
resolve(rows);
});
}
async function loadFromS3(){
const stream = await downloadFile();
const dataRows = await loadWorkbook(stream);
console.log(dataRows);
}
If you want use async/wait, here solution:
const AWS = require('aws-sdk');
const XLSX = require('xlsx');
AWS.config.update({
accessKeyId: AMAZON_ACCESS_KEY,
secretAccessKey: AMAZON_SECRET_ACCESS_KEY,
});
// Get buffered file from s3
function getBufferFromS3(file, callback){
const buffers = [];
const s3 = new AWS.S3();
const stream = s3.getObject({ Bucket: 'yor_buket', Key: file}).createReadStream();
stream.on('data', data => buffers.push(data));
stream.on('end', () => callback(null, Buffer.concat(buffers)));
stream.on('error', error => callback(error));
}
// promisify read stream from s3
function getBufferFromS3Promise(file) {
return new Promise((resolve, reject) => {
getBufferFromS3(file, (error, s3buffer) => {
if (error) return reject(error);
return resolve(s3buffer);
});
});
};
// create workbook from buffer
const buffer = await getBufferFromS3Promise(file);
const workbook = XLSX.read(buffer);
// If you want to send the workbook as a download to the api end point in node
const fileName = "Categories.xlsx";
res.setHeader('Content-disposition', 'attachment; filename=' + fileName);
res.setHeader('Content-type', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
const wbout = XLSX.write(workbook, { bookType: 'xlsx', type: 'buffer'});
res.send(new Buffer(wbout));