问题
I'm trying to stream upload a file submitted via a form directly to an Amazon S3 bucket, using aws-sdk or knox. Form handling is done with formidable.
My question is: how do I properly use formidable with aws-sdk (or knox) using each of these libraries' latest features for handling streams?
I'm aware that this topic has already been asked here in different flavors, ie:
- How to receive an uploaded file using node.js formidable library and save it to Amazon S3 using knox?
- node application stream file upload directly to amazon s3
- Accessing the raw file stream from a node-formidable file upload (and its very useful accepted answer on overiding form.onPart())
However, I believe the answers are a bit outdated and/or off topic (ie. CORS support, which I don't wish to use for now for various reasons) and/or, most importantly, make no reference to the latest features from either aws-sdk (see: https://github.com/aws/aws-sdk-js/issues/13#issuecomment-16085442) or knox (notably putStream() or its readableStream.pipe(req) variant, both explained in the doc).
After hours of struggling, I came to the conclusion that I needed some help (disclaimer: I'm quite a newbie with streams).
HTML form:
<form action="/uploadPicture" method="post" enctype="multipart/form-data">
<input name="picture" type="file" accept="image/*">
<input type="submit">
</form>
Express bodyParser middleware is configured this way:
app.use(express.bodyParser({defer: true}))
POST request handler:
uploadPicture = (req, res, next) ->
form = new formidable.IncomingForm()
form.parse(req)
form.onPart = (part) ->
if not part.filename
# Let formidable handle all non-file parts (fields)
form.handlePart(part)
else
handlePart(part, form.bytesExpected)
handlePart = (part, fileSize) ->
# aws-sdk version
params =
Bucket: "mybucket"
Key: part.filename
ContentLength: fileSize
Body: part # passing stream object as body parameter
awsS3client.putObject(params, (err, data) ->
if err
console.log err
else
console.log data
)
However, I'm getting the following error:
{ [RequestTimeout: Your socket connection to the server was not read from or written to within the timeout period. Idle connections will be closed.]
message: 'Your socket connection to the server was not read from or written to within the timeout period. Idle connections will be closed.', code: 'RequestTimeout', name: 'RequestTimeout', statusCode: 400, retryable: false }
A knox version of handlePart() function tailored this way also miserably fails:
handlePart = (part, fileSize) ->
headers =
"Content-Length": fileSize
"Content-Type": part.mime
knoxS3client.putStream(part, part.filename, headers, (err, res) ->
if err
console.log err
else
console.log res
)
I also get a big res object with a 400 statusCode somewhere.
Region is configured to eu-west-1 in both case.
Additional notes:
node 0.10.12
latest formidable from npm (1.0.14)
latest aws-sdk from npm (1.3.1)
latest knox from npm (0.8.3)
回答1:
Well, according to the creator of Formidable, direct streaming to Amazon S3 is impossible :
The S3 API requires you to provide the size of new files when creating them. This information is not available for multipart/form-data files until they have been fully received. This means streaming is impossible.
Indeed, form.bytesExpected refers to the size of the whole form, and not the size of the single file.
The data must therefore either hit the memory or the disk on the server first before being uploaded to S3.
回答2:
Using AWS S3's multipartUpload (s3-upload-stream as working module) and node-formidable's readable stream, you can pipe the stream to upload like this:
var formidable = require('formidable');
var http = require('http');
var util = require('util');
var AWS = require('aws-sdk');
var config = require('./config');
var s3 = new AWS.S3({
accessKeyId: config.get('S3_ACCESS_KEY'),
secretAccessKey: config.get('S3_SECRET_KEY'),
apiVersion: '2006-03-01'
});
var s3Stream = require('s3-upload-stream')(s3);
var bucket = 'bucket-name';
var key = 'abcdefgh';
http.createServer(function(req, res) {
if (req.url == '/upload' && req.method.toLowerCase() == 'post') {
var form = new formidable.IncomingForm();
form.on('progress', function(bytesReceived, bytesExpected) {
//console.log('onprogress', parseInt( 100 * bytesReceived / bytesExpected ), '%');
});
form.on('error', function(err) {
console.log('err',err);
});
// This 'end' is for the client to finish uploading
// upload.on('uploaded') is when the uploading is
// done on AWS S3
form.on('end', function() {
console.log('ended!!!!', arguments);
});
form.on('aborted', function() {
console.log('aborted', arguments);
});
form.onPart = function(part) {
console.log('part',part);
// part looks like this
// {
// readable: true,
// headers:
// {
// 'content-disposition': 'form-data; name="upload"; filename="00video38.mp4"',
// 'content-type': 'video/mp4'
// },
// name: 'upload',
// filename: '00video38.mp4',
// mime: 'video/mp4',
// transferEncoding: 'binary',
// transferBuffer: ''
// }
var start = new Date().getTime();
var upload = s3Stream.upload({
"Bucket": bucket,
"Key": part.filename
});
// Optional configuration
//upload.maxPartSize(20971520); // 20 MB
upload.concurrentParts(5);
// Handle errors.
upload.on('error', function (error) {
console.log('errr',error);
});
upload.on('part', function (details) {
console.log('part',details);
});
upload.on('uploaded', function (details) {
var end = new Date().getTime();
console.log('it took',end-start);
console.log('uploaded',details);
});
// Maybe you could add compress like
// part.pipe(compress).pipe(upload)
part.pipe(upload);
};
form.parse(req, function(err, fields, files) {
res.writeHead(200, {'content-type': 'text/plain'});
res.write('received upload:\n\n');
res.end(util.inspect({fields: fields, files: files}));
});
return;
}
// show a file upload form
res.writeHead(200, {'content-type': 'text/html'});
res.end(
'<form action="/upload" enctype="multipart/form-data" method="post">'+
'<input type="text" name="title"><br>'+
'<input type="file" name="upload" multiple="multiple"><br>'+
'<input type="submit" value="Upload">'+
'</form>'
);
}).listen(8080);
回答3:
Since this post is so old and I believe streaming directly is now supported, I spent a lot of time reading out of date answers on this topic...
If it helps anyone I was able to stream from the client to s3 directly without the need for installing packages:
https://gist.github.com/mattlockyer/532291b6194f6d9ca40cb82564db9d2a
The server assumes req
is a stream object, in my case a File object was used in xhr(send) which will send binary data in modern browsers.
const fileUploadStream = (req, res) => {
//get "body" args from header
const { id, fn } = JSON.parse(req.get('body'));
const Key = id + '/' + fn; //upload to s3 folder "id" with filename === fn
const params = {
Key,
Bucket: bucketName, //set somewhere
Body: req, //req is a stream
};
s3.upload(params, (err, data) => {
if (err) {
res.send('Error Uploading Data: ' + JSON.stringify(err) + '\n' + JSON.stringify(err.stack));
} else {
res.send(Key);
}
});
};
Yes it breaks convention but if you look at the gist it's much cleaner than anything else I found relying on other packages.
+1 for pragmatism and thanks to @SalehenRahman for his help.
回答4:
Try to add the 'ContentType' to the Upload params (https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#upload-property)
...
const params = {
Key,
Bucket: bucketName,
Body: req,
ContentType: 'image/jpg'
};
s3.upload(params, (err, data) => {
if (err) return err;
console.log(data);
});
...
来源:https://stackoverflow.com/questions/17309559/stream-uploading-file-to-s3-on-node-js-using-formidable-and-knox-or-aws-sdk