I have large text files, which range between 30MB
and 10GB
. How can I count the number of lines in a file using Node.js
?
I hav
There is an npm module called count-lines-in-file. I've been using it for smallish (<1000 lines) files and it's worked great so far.
Here is another way without so much nesting.
var fs = require('fs');
filePath = process.argv[2];
fileBuffer = fs.readFileSync(filePath);
to_string = fileBuffer.toString();
split_lines = to_string.split("\n");
console.log(split_lines.length-1);
Best solution I've found is using promises, async, and await. This is also an example of how await for the fulfillment of a promise:
#!/usr/bin/env node
const fs = require('fs');
const readline = require('readline');
function main() {
function doRead() {
return new Promise(resolve => {
var inf = readline.createInterface({
input: fs.createReadStream('async.js'),
crlfDelay: Infinity
});
var count = 0;
inf.on('line', (line) => {
console.log(count + ' ' + line);
count += 1;
});
inf.on('close', () => resolve(count));
});
}
async function showRead() {
var x = await doRead();
console.log('line count: ' + x);
}
showRead();
}
main();
since iojs 1.5.0 there is Buffer#indexOf()
method, using it to compare to Andrey Sidorov' answer:
ubuntu@server:~$ wc logs
7342500 27548750 427155000 logs
ubuntu@server:~$ time wc -l logs
7342500 logs
real 0m0.180s
user 0m0.088s
sys 0m0.084s
ubuntu@server:~$ nvm use node
Now using node v0.12.1
ubuntu@server:~$ time node countlines.js logs
7342500
real 0m2.559s
user 0m2.200s
sys 0m0.340s
ubuntu@server:~$ nvm use iojs
Now using node iojs-v1.6.2
ubuntu@server:~$ time iojs countlines2.js logs
7342500
real 0m1.363s
user 0m0.920s
sys 0m0.424s
ubuntu@server:~$ cat countlines.js
var i;
var count = 0;
require('fs').createReadStream(process.argv[2])
.on('data', function(chunk) {
for (i=0; i < chunk.length; ++i)
if (chunk[i] == 10) count++;
})
.on('end', function() {
console.log(count);
});
ubuntu@server:~$ cat countlines2.js
var i;
var count = 0;
require('fs').createReadStream(process.argv[2])
.on('data', function(chunk) {
var index = -1;
while((index = chunk.indexOf(10, index + 1)) > -1) count++
})
.on('end', function() {
console.log(count);
});
ubuntu@server:~$
You could do this as the comments suggest using wc
var exec = require('child_process').exec;
exec('wc /path/to/file', function (error, results) {
console.log(results);
});
solution without using wc:
var i;
var count = 0;
require('fs').createReadStream(process.argv[2])
.on('data', function(chunk) {
for (i=0; i < chunk.length; ++i)
if (chunk[i] == 10) count++;
})
.on('end', function() {
console.log(count);
});
it's slower, but not that much you might expect - 0.6s for 140M+ file including node.js loading & startup time
>time node countlines.js video.mp4
619643
real 0m0.614s
user 0m0.489s
sys 0m0.132s
>time wc -l video.mp4
619643 video.mp4
real 0m0.133s
user 0m0.108s
sys 0m0.024s
>wc -c video.mp4
144681406 video.mp4