Stream uploading file to S3 on Node.js using formidable and (knox or aws-sdk)

偶尔善良 提交于 2019-11-27 12:27:57

问题


I'm trying to stream upload a file submitted via a form directly to an Amazon S3 bucket, using aws-sdk or knox. Form handling is done with formidable.

My question is: how do I properly use formidable with aws-sdk (or knox) using each of these libraries' latest features for handling streams?

I'm aware that this topic has already been asked here in different flavors, ie:

  • How to receive an uploaded file using node.js formidable library and save it to Amazon S3 using knox?
  • node application stream file upload directly to amazon s3
  • Accessing the raw file stream from a node-formidable file upload (and its very useful accepted answer on overiding form.onPart())

However, I believe the answers are a bit outdated and/or off topic (ie. CORS support, which I don't wish to use for now for various reasons) and/or, most importantly, make no reference to the latest features from either aws-sdk (see: https://github.com/aws/aws-sdk-js/issues/13#issuecomment-16085442) or knox (notably putStream() or its readableStream.pipe(req) variant, both explained in the doc).

After hours of struggling, I came to the conclusion that I needed some help (disclaimer: I'm quite a newbie with streams).

HTML form:

<form action="/uploadPicture" method="post" enctype="multipart/form-data">
  <input name="picture" type="file" accept="image/*">
  <input type="submit">
</form>

Express bodyParser middleware is configured this way:

app.use(express.bodyParser({defer: true}))

POST request handler:

uploadPicture = (req, res, next) ->
  form = new formidable.IncomingForm()
  form.parse(req)

  form.onPart = (part) ->
    if not part.filename
      # Let formidable handle all non-file parts (fields)
      form.handlePart(part)
    else
      handlePart(part, form.bytesExpected)

  handlePart = (part, fileSize) ->
    # aws-sdk version
    params =
      Bucket: "mybucket"
      Key: part.filename
      ContentLength: fileSize
      Body: part # passing stream object as body parameter

    awsS3client.putObject(params, (err, data) ->
      if err
        console.log err
      else
        console.log data
    )

However, I'm getting the following error:

{ [RequestTimeout: Your socket connection to the server was not read from or written to within the timeout period. Idle connections will be closed.]

message: 'Your socket connection to the server was not read from or written to within the timeout period. Idle connections will be closed.', code: 'RequestTimeout', name: 'RequestTimeout', statusCode: 400, retryable: false }

A knox version of handlePart() function tailored this way also miserably fails:

handlePart = (part, fileSize) ->
  headers =
    "Content-Length": fileSize
    "Content-Type": part.mime
  knoxS3client.putStream(part, part.filename, headers, (err, res) ->
    if err
      console.log err
    else
      console.log res
  )      

I also get a big res object with a 400 statusCode somewhere.

Region is configured to eu-west-1 in both case.

Additional notes:

node 0.10.12

latest formidable from npm (1.0.14)

latest aws-sdk from npm (1.3.1)

latest knox from npm (0.8.3)


回答1:


Well, according to the creator of Formidable, direct streaming to Amazon S3 is impossible :

The S3 API requires you to provide the size of new files when creating them. This information is not available for multipart/form-data files until they have been fully received. This means streaming is impossible.

Indeed, form.bytesExpected refers to the size of the whole form, and not the size of the single file.

The data must therefore either hit the memory or the disk on the server first before being uploaded to S3.




回答2:


Using AWS S3's multipartUpload (s3-upload-stream as working module) and node-formidable's readable stream, you can pipe the stream to upload like this:

var formidable = require('formidable');
var http = require('http');
var util = require('util');
var AWS      = require('aws-sdk');
var config = require('./config');
var s3 = new AWS.S3({
    accessKeyId: config.get('S3_ACCESS_KEY'),
    secretAccessKey: config.get('S3_SECRET_KEY'),
    apiVersion: '2006-03-01'
});
var s3Stream = require('s3-upload-stream')(s3);
var bucket = 'bucket-name';
var key = 'abcdefgh';


http.createServer(function(req, res) {

    if (req.url == '/upload' && req.method.toLowerCase() == 'post') {

        var form = new formidable.IncomingForm();
        form.on('progress', function(bytesReceived, bytesExpected) {
            //console.log('onprogress', parseInt( 100 * bytesReceived / bytesExpected ), '%');
        });

        form.on('error', function(err) {
            console.log('err',err);
        });

        // This 'end' is for the client to finish uploading
        // upload.on('uploaded') is when the uploading is
        // done on AWS S3
        form.on('end', function() {
            console.log('ended!!!!', arguments);
        });

        form.on('aborted', function() {
            console.log('aborted', arguments);
        });

        form.onPart = function(part) {
            console.log('part',part);
            // part looks like this
            //    {
            //        readable: true,
            //        headers:
            //        {
            //            'content-disposition': 'form-data; name="upload"; filename="00video38.mp4"',
            //            'content-type': 'video/mp4'
            //        },
            //        name: 'upload',
            //            filename: '00video38.mp4',
            //        mime: 'video/mp4',
            //        transferEncoding: 'binary',
            //        transferBuffer: ''
            //    }

            var start = new Date().getTime();
            var upload = s3Stream.upload({
                "Bucket": bucket,
                "Key": part.filename
            });

            // Optional configuration
            //upload.maxPartSize(20971520); // 20 MB
            upload.concurrentParts(5);

            // Handle errors.
            upload.on('error', function (error) {
                console.log('errr',error);
            });
            upload.on('part', function (details) {
                console.log('part',details);
            });
            upload.on('uploaded', function (details) {
                var end = new Date().getTime();
                console.log('it took',end-start);
                console.log('uploaded',details);
            });

            // Maybe you could add compress like
            // part.pipe(compress).pipe(upload)
            part.pipe(upload);
        };

        form.parse(req, function(err, fields, files) {
            res.writeHead(200, {'content-type': 'text/plain'});
            res.write('received upload:\n\n');
            res.end(util.inspect({fields: fields, files: files}));
        });
        return;
    }

    // show a file upload form
    res.writeHead(200, {'content-type': 'text/html'});
    res.end(
        '<form action="/upload" enctype="multipart/form-data" method="post">'+
        '<input type="text" name="title"><br>'+
        '<input type="file" name="upload" multiple="multiple"><br>'+
        '<input type="submit" value="Upload">'+
        '</form>'
    );
}).listen(8080);



回答3:


Since this post is so old and I believe streaming directly is now supported, I spent a lot of time reading out of date answers on this topic...

If it helps anyone I was able to stream from the client to s3 directly without the need for installing packages:

https://gist.github.com/mattlockyer/532291b6194f6d9ca40cb82564db9d2a

The server assumes req is a stream object, in my case a File object was used in xhr(send) which will send binary data in modern browsers.

const fileUploadStream = (req, res) => {
  //get "body" args from header
  const { id, fn } = JSON.parse(req.get('body'));
  const Key = id + '/' + fn; //upload to s3 folder "id" with filename === fn
  const params = {
    Key,
    Bucket: bucketName, //set somewhere
    Body: req, //req is a stream
  };
  s3.upload(params, (err, data) => {
    if (err) {
      res.send('Error Uploading Data: ' + JSON.stringify(err) + '\n' + JSON.stringify(err.stack));
    } else {
      res.send(Key);
    }
  });
};

Yes it breaks convention but if you look at the gist it's much cleaner than anything else I found relying on other packages.

+1 for pragmatism and thanks to @SalehenRahman for his help.




回答4:


Try to add the 'ContentType' to the Upload params (https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#upload-property)

...
  const params = {
      Key,
      Bucket: bucketName,
      Body: req,
      ContentType: 'image/jpg'
  };
  s3.upload(params, (err, data) => {
      if (err) return err;
      console.log(data);
  });
...


来源:https://stackoverflow.com/questions/17309559/stream-uploading-file-to-s3-on-node-js-using-formidable-and-knox-or-aws-sdk

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!