Read a file one line at a time in node.js?

前端 未结 29 1101
深忆病人
深忆病人 2020-11-22 04:33

I am trying to read a large file one line at a time. I found a question on Quora that dealt with the subject but I\'m missing some connections to make the whole thing fit to

相关标签:
29条回答
  • 2020-11-22 05:01

    I ended up with a massive, massive memory leak using Lazy to read line by line when trying to then process those lines and write them to another stream due to the way drain/pause/resume in node works (see: http://elegantcode.com/2011/04/06/taking-baby-steps-with-node-js-pumping-data-between-streams/ (i love this guy btw)). I haven't looked closely enough at Lazy to understand exactly why, but I couldn't pause my read stream to allow for a drain without Lazy exiting.

    I wrote the code to process massive csv files into xml docs, you can see the code here: https://github.com/j03m/node-csv2xml

    If you run the previous revisions with Lazy line it leaks. The latest revision doesn't leak at all and you can probably use it as the basis for a reader/processor. Though I have some custom stuff in there.

    Edit: I guess I should also note that my code with Lazy worked fine until I found myself writing large enough xml fragments that drain/pause/resume because a necessity. For smaller chunks it was fine.

    0 讨论(0)
  • 2020-11-22 05:01

    Edit:

    Use a transform stream.


    With a BufferedReader you can read lines.

    new BufferedReader ("lorem ipsum", { encoding: "utf8" })
        .on ("error", function (error){
            console.log ("error: " + error);
        })
        .on ("line", function (line){
            console.log ("line: " + line);
        })
        .on ("end", function (){
            console.log ("EOF");
        })
        .read ();
    
    0 讨论(0)
  • 2020-11-22 05:01

    I was frustrated by the lack of a comprehensive solution for this, so I put together my own attempt (git / npm). Copy-pasted list of features:

    • Interactive line processing (callback-based, no loading the entire file into RAM)
    • Optionally, return all lines in an array (detailed or raw mode)
    • Interactively interrupt streaming, or perform map/filter like processing
    • Detect any newline convention (PC/Mac/Linux)
    • Correct eof / last line treatment
    • Correct handling of multi-byte UTF-8 characters
    • Retrieve byte offset and byte length information on per-line basis
    • Random access, using line-based or byte-based offsets
    • Automatically map line-offset information, to speed up random access
    • Zero dependencies
    • Tests

    NIH? You decide :-)

    0 讨论(0)
  • 2020-11-22 05:01

    While you should probably use the readline module as the top answer suggests, readline appears to be oriented toward command line interfaces rather than line reading. It's also a little bit more opaque regarding buffering. (Anyone who needs a streaming line oriented reader probably will want to tweak buffer sizes). The readline module is ~1000 lines while this, with stats and tests, is 34.

    const EventEmitter = require('events').EventEmitter;
    class LineReader extends EventEmitter{
        constructor(f, delim='\n'){
            super();
            this.totalChars = 0;
            this.totalLines = 0;
            this.leftover = '';
    
            f.on('data', (chunk)=>{
                this.totalChars += chunk.length;
                let lines = chunk.split(delim);
                if (lines.length === 1){
                    this.leftover += chunk;
                    return;
                }
                lines[0] = this.leftover + lines[0];
                this.leftover = lines[lines.length-1];
                if (this.leftover) lines.pop();
                this.totalLines += lines.length;
                for (let l of lines) this.onLine(l);
            });
            // f.on('error', ()=>{});
            f.on('end', ()=>{console.log('chars', this.totalChars, 'lines', this.totalLines)});
        }
        onLine(l){
            this.emit('line', l);
        }
    }
    //Command line test
    const f = require('fs').createReadStream(process.argv[2], 'utf8');
    const delim = process.argv[3];
    const lineReader = new LineReader(f, delim);
    lineReader.on('line', (line)=> console.log(line));
    

    Here's an even shorter version, without the stats, at 19 lines:

    class LineReader extends require('events').EventEmitter{
        constructor(f, delim='\n'){
            super();
            this.leftover = '';
            f.on('data', (chunk)=>{
                let lines = chunk.split(delim);
                if (lines.length === 1){
                    this.leftover += chunk;
                    return;
                }
                lines[0] = this.leftover + lines[0];
                this.leftover = lines[lines.length-1];
                if (this.leftover) 
                    lines.pop();
                for (let l of lines)
                    this.emit('line', l);
            });
        }
    }
    
    0 讨论(0)
  • 2020-11-22 05:03
    function createLineReader(fileName){
        var EM = require("events").EventEmitter
        var ev = new EM()
        var stream = require("fs").createReadStream(fileName)
        var remainder = null;
        stream.on("data",function(data){
            if(remainder != null){//append newly received data chunk
                var tmp = new Buffer(remainder.length+data.length)
                remainder.copy(tmp)
                data.copy(tmp,remainder.length)
                data = tmp;
            }
            var start = 0;
            for(var i=0; i<data.length; i++){
                if(data[i] == 10){ //\n new line
                    var line = data.slice(start,i)
                    ev.emit("line", line)
                    start = i+1;
                }
            }
            if(start<data.length){
                remainder = data.slice(start);
            }else{
                remainder = null;
            }
        })
    
        stream.on("end",function(){
            if(null!=remainder) ev.emit("line",remainder)
        })
    
        return ev
    }
    
    
    //---------main---------------
    fileName = process.argv[2]
    
    lineReader = createLineReader(fileName)
    lineReader.on("line",function(line){
        console.log(line.toString())
        //console.log("++++++++++++++++++++")
    })
    
    0 讨论(0)
  • 2020-11-22 05:03
    var fs = require('fs');
    
    function readfile(name,online,onend,encoding) {
        var bufsize = 1024;
        var buffer = new Buffer(bufsize);
        var bufread = 0;
        var fd = fs.openSync(name,'r');
        var position = 0;
        var eof = false;
        var data = "";
        var lines = 0;
    
        encoding = encoding || "utf8";
    
        function readbuf() {
            bufread = fs.readSync(fd,buffer,0,bufsize,position);
            position += bufread;
            eof = bufread ? false : true;
            data += buffer.toString(encoding,0,bufread);
        }
    
        function getLine() {
            var nl = data.indexOf("\r"), hasnl = nl !== -1;
            if (!hasnl && eof) return fs.closeSync(fd), online(data,++lines), onend(lines); 
            if (!hasnl && !eof) readbuf(), nl = data.indexOf("\r"), hasnl = nl !== -1;
            if (!hasnl) return process.nextTick(getLine);
            var line = data.substr(0,nl);
            data = data.substr(nl+1);
            if (data[0] === "\n") data = data.substr(1);
            online(line,++lines);
            process.nextTick(getLine);
        }
        getLine();
    }
    

    I had the same problem and came up with above solution looks simular to others but is aSync and can read large files very quickly

    Hopes this helps

    0 讨论(0)
提交回复
热议问题