Node.js: Count the number of lines in a file

前端 未结 10 623
一向
一向 2020-12-02 23:02

I have large text files, which range between 30MB and 10GB. How can I count the number of lines in a file using Node.js?

I hav

相关标签:
10条回答
  • 2020-12-02 23:23

    There is an npm module called count-lines-in-file. I've been using it for smallish (<1000 lines) files and it's worked great so far.

    0 讨论(0)
  • 2020-12-02 23:26

    Here is another way without so much nesting.

    var fs = require('fs');
    filePath = process.argv[2];
    fileBuffer =  fs.readFileSync(filePath);
    to_string = fileBuffer.toString();
    split_lines = to_string.split("\n");
    console.log(split_lines.length-1);
    
    0 讨论(0)
  • 2020-12-02 23:26

    Best solution I've found is using promises, async, and await. This is also an example of how await for the fulfillment of a promise:

    #!/usr/bin/env node
    const fs = require('fs');
    const readline = require('readline');
    function main() {
        function doRead() {
            return new Promise(resolve => {
                var inf = readline.createInterface({
                    input: fs.createReadStream('async.js'),
                    crlfDelay: Infinity
                });
                var count = 0;
                inf.on('line', (line) => {
                    console.log(count + ' ' + line);
                    count += 1;
                });
                inf.on('close', () => resolve(count));
            });
        }
        async function showRead() {
            var x = await doRead();
            console.log('line count: ' + x);
        }
        showRead();
    }
    main();
    
    0 讨论(0)
  • 2020-12-02 23:29

    since iojs 1.5.0 there is Buffer#indexOf() method, using it to compare to Andrey Sidorov' answer:

    ubuntu@server:~$ wc logs
      7342500  27548750 427155000 logs
    ubuntu@server:~$ time wc -l logs 
    7342500 logs
    
    real    0m0.180s
    user    0m0.088s
    sys 0m0.084s
    ubuntu@server:~$ nvm use node
    Now using node v0.12.1
    ubuntu@server:~$ time node countlines.js logs 
    7342500
    
    real    0m2.559s
    user    0m2.200s
    sys 0m0.340s
    ubuntu@server:~$ nvm use iojs
    Now using node iojs-v1.6.2
    ubuntu@server:~$ time iojs countlines2.js logs 
    7342500
    
    real    0m1.363s
    user    0m0.920s
    sys 0m0.424s
    ubuntu@server:~$ cat countlines.js 
    var i;
    var count = 0;
    require('fs').createReadStream(process.argv[2])
      .on('data', function(chunk) {
        for (i=0; i < chunk.length; ++i)
          if (chunk[i] == 10) count++;
      })
      .on('end', function() {
        console.log(count);
      });
    ubuntu@server:~$ cat countlines2.js 
    var i;
    var count = 0;
    require('fs').createReadStream(process.argv[2])
      .on('data', function(chunk) {
        var index = -1;
        while((index = chunk.indexOf(10, index + 1)) > -1) count++
      })
      .on('end', function() {
        console.log(count);
      });
    ubuntu@server:~$ 
    
    0 讨论(0)
  • 2020-12-02 23:32

    You could do this as the comments suggest using wc

    var exec = require('child_process').exec;
    
    exec('wc /path/to/file', function (error, results) {
        console.log(results);
    });
    
    0 讨论(0)
  • 2020-12-02 23:36

    solution without using wc:

    var i;
    var count = 0;
    require('fs').createReadStream(process.argv[2])
      .on('data', function(chunk) {
        for (i=0; i < chunk.length; ++i)
          if (chunk[i] == 10) count++;
      })
      .on('end', function() {
        console.log(count);
      });
    

    it's slower, but not that much you might expect - 0.6s for 140M+ file including node.js loading & startup time

    >time node countlines.js video.mp4 
    619643
    
    real    0m0.614s
    user    0m0.489s
    sys 0m0.132s
    
    >time wc -l video.mp4 
    619643 video.mp4
    real    0m0.133s
    user    0m0.108s
    sys 0m0.024s
    
    >wc -c video.mp4
    144681406  video.mp4
    
    0 讨论(0)
提交回复
热议问题