I created an upload script in node.js using express/formidable. It basically works, but I am wondering where and when to check the uploaded file e. g. for the maximum file s
With help from some guys at the node IRC and the node mailing list, here is what I do:
I am using formidable to handle the file upload. Using the progress
event I can check the maximum filesize like this:
form.on('progress', function(bytesReceived, bytesExpected) {
if (bytesReceived > MAX_UPLOAD_SIZE) {
console.log('### ERROR: FILE TOO LARGE');
}
});
Reliably checking the mimetype is much more difficult. The basic Idea is to use the progress
event, then if enough of the file is uploaded use a file --mime-type
call and check the output of that external command. Simplified it looks like this:
// contains the path of the uploaded file,
// is grabbed in the fileBegin event below
var tmpPath;
form.on('progress', function validateMimetype(bytesReceived, bytesExpected) {
var percent = (bytesReceived / bytesExpected * 100) | 0;
// pretty basic check if enough bytes of the file are written to disk,
// might be too naive if the file is small!
if (tmpPath && percent > 25) {
var child = exec('file --mime-type ' + tmpPath, function (err, stdout, stderr) {
var mimetype = stdout.substring(stdout.lastIndexOf(':') + 2, stdout.lastIndexOf('\n'));
console.log('### file CALL OUTPUT', err, stdout, stderr);
if (err || stderr) {
console.log('### ERROR: MIMETYPE COULD NOT BE DETECTED');
} else if (!ALLOWED_MIME_TYPES[mimetype]) {
console.log('### ERROR: INVALID MIMETYPE', mimetype);
} else {
console.log('### MIMETYPE VALIDATION COMPLETE');
}
});
form.removeListener('progress', validateMimetype);
}
});
form.on('fileBegin', function grabTmpPath(_, fileInfo) {
if (fileInfo.path) {
tmpPath = fileInfo.path;
form.removeListener('fileBegin', grabTmpPath);
}
});
The new version of Connect (2.x.) has this already baked into the bodyParser
using the limit middleware: https://github.com/senchalabs/connect/blob/master/lib/middleware/multipart.js#L44-61
I think it's much better this way as you just kill the request when it exceeds the maximum limit instead of just stopping the formidable parser (and letting the request "go on").
More about the limit middleware: http://www.senchalabs.org/connect/limit.html