I\'m wondering what would be the best way to check if a file is binary or ASCII with Node.js?
There appears to be two ways not specific to node.js:
ASCII defines characters 0-127, so if a file's entire contents are byte values in that range then it can be considered an ASCII file.
function fileIsAscii(filename, callback) {
// Read the file with no encoding for raw buffer access.
require('fs').readFile(filename, function(err, buf) {
if (err) throw err;
var isAscii = true;
for (var i=0, len=buf.length; i<len; i++) {
if (buf[i] > 127) { isAscii=false; break; }
}
callback(isAscii); // true iff all octets are in [0, 127].
});
}
fileIsAscii('/usr/share/dict/words', function(x){/* x === true */});
fileIsAscii('/bin/ls', function(x){/* x === false */});
If performance is critical then consider writing a custom C++ function per your linked answer.
I came here from google but as I couldn't find a satisfactory answer, I took another approach which works for me:
const string_to_test = "I am just a piece of text";
//const binary_to_test = "��˰!1�H��1�1����!H�=u�!�";
if(/\ufffd/.test(string_to_test) === true){
console.log("I'm 'binary'");
}else{
console.log("I'm proper text");
}
How does it works? If you try to open binary data in a normal way (without using a hex editor), it will encounter some rendering problems which translate to you as a succession of this weird character � called "Replacement character".
Thanks to the comments on this question by David Schwartz, I created istextorbinary to solve this problem.