Reading a file in real-time using Node.js

后端 未结 4 393
無奈伤痛
無奈伤痛 2020-12-05 05:30

I need to work out the best way to read data that is being written to a file, using node.js, in real time. Trouble is, Node is a fast moving ship which makes finding the bes

相关标签:
4条回答
  • 2020-12-05 06:13

    I took the answer from @hasanyasin and wrapped it up into a modular promise. The basic idea is that you pass a file and a handler function that does something with the stringified-buffer that is read from the file. If the handler function returns true, then the file will stop being read. You can also set a timeout that will kill reading if the handler doesn't return true fast enough.

    The promiser will return true if the resolve() was called due to timeout, otherwise it will return false.

    See the bottom for usage example.

    // https://stackoverflow.com/a/11233045
    
    var fs = require('fs');
    var Promise = require('promise');
    
    class liveReaderPromiseMe {
        constructor(file, buffStringHandler, opts) {
            /*
                var opts = {
                    starting_position: 0,
                    byte_size: 256,
                    check_for_bytes_every_ms: 3000,
                    no_handler_resolution_timeout_ms: null
                };
            */
    
            if (file == null) {
                throw new Error("file arg must be present");
            } else {
                this.file = file;
            }
    
            if (buffStringHandler == null) {
                throw new Error("buffStringHandler arg must be present");
            } else {
                this.buffStringHandler = buffStringHandler;
            }
    
            if (opts == null) {
                opts = {};
            }
    
            if (opts.starting_position == null) {
                this.current_position = 0;
            } else {
                this.current_position = opts.starting_position;
            }
    
            if (opts.byte_size == null) {
                this.byte_size = 256;
            } else {
                this.byte_size = opts.byte_size;
            }
    
            if (opts.check_for_bytes_every_ms == null) {
                this.check_for_bytes_every_ms = 3000;
            } else {
                this.check_for_bytes_every_ms = opts.check_for_bytes_every_ms;
            }
    
            if (opts.no_handler_resolution_timeout_ms == null) {
                this.no_handler_resolution_timeout_ms = null;
            } else {
                this.no_handler_resolution_timeout_ms = opts.no_handler_resolution_timeout_ms;
            }
        }
    
    
        startHandlerTimeout() {
            if (this.no_handler_resolution_timeout_ms && (this._handlerTimer == null)) {
                var that = this;
                this._handlerTimer = setTimeout(
                    function() {
                        that._is_handler_timed_out = true;
                    },
                    this.no_handler_resolution_timeout_ms
                );
            }
        }
    
        clearHandlerTimeout() {
            if (this._handlerTimer != null) {
                clearTimeout(this._handlerTimer);
                this._handlerTimer = null;
            }
            this._is_handler_timed_out = false;
        }
    
        isHandlerTimedOut() {
            return !!this._is_handler_timed_out;
        }
    
    
        fsReadCallback(err, bytecount, buff) {
            try {
                if (err) {
                    throw err;
                } else {
                    this.current_position += bytecount;
                    var buff_str = buff.toString('utf-8', 0, bytecount);
    
                    var that = this;
    
                    Promise.resolve().then(function() {
                        return that.buffStringHandler(buff_str);
                    }).then(function(is_handler_resolved) {
                        if (is_handler_resolved) {
                            that.resolve(false);
                        } else {
                            process.nextTick(that.doReading.bind(that));
                        }
                    }).catch(function(err) {
                        that.reject(err);
                    });
                }
            } catch(err) {
                this.reject(err);
            }
        }
    
        fsRead(bytecount) {
            fs.read(
                this.file,
                new Buffer(bytecount),
                0,
                bytecount,
                this.current_position,
                this.fsReadCallback.bind(this)
            );
        }
    
        doReading() {
            if (this.isHandlerTimedOut()) {
                return this.resolve(true);
            } 
    
            var max_next_bytes = fs.fstatSync(this.file).size - this.current_position;
            if (max_next_bytes) {
                this.fsRead( (this.byte_size > max_next_bytes) ? max_next_bytes : this.byte_size );
            } else {
                setTimeout(this.doReading.bind(this), this.check_for_bytes_every_ms);
            }
        }
    
    
        promiser() {
            var that = this;
            return new Promise(function(resolve, reject) {
                that.resolve = resolve;
                that.reject = reject;
                that.doReading();
                that.startHandlerTimeout();
            }).then(function(was_resolved_by_timeout) {
                that.clearHandlerTimeout();
                return was_resolved_by_timeout;
            });
        }
    }
    
    
    module.exports = function(file, buffStringHandler, opts) {
        try {
            var live_reader = new liveReaderPromiseMe(file, buffStringHandler, opts);
            return live_reader.promiser();
        } catch(err) {
            return Promise.reject(err);
        }
    };
    

    Then use the above code like this:

    var fs = require('fs');
    var path = require('path');
    var Promise = require('promise');
    var liveReadAppendingFilePromiser = require('./path/to/liveReadAppendingFilePromiser');
    
    var ending_str = '_THIS_IS_THE_END_';
    var test_path = path.join('E:/tmp/test.txt');
    
    var s_list = [];
    var buffStringHandler = function(s) {
        s_list.push(s);
        var tmp = s_list.join('');
        if (-1 !== tmp.indexOf(ending_str)) {
            // if this return never occurs, then the file will be read until no_handler_resolution_timeout_ms
            // by default, no_handler_resolution_timeout_ms is null, so read will continue forever until this function returns something that evaluates to true
            return true;
            // you can also return a promise:
            //  return Promise.resolve().then(function() { return true; } );
        }
    };
    
    var appender = fs.openSync(test_path, 'a');
    try {
        var reader = fs.openSync(test_path, 'r');
        try {
            var options = {
                starting_position: 0,
                byte_size: 256,
                check_for_bytes_every_ms: 3000,
                no_handler_resolution_timeout_ms: 10000,
            };
    
            liveReadAppendingFilePromiser(reader, buffStringHandler, options)
            .then(function(did_reader_time_out) {
                console.log('reader timed out: ', did_reader_time_out);
                console.log(s_list.join(''));
            }).catch(function(err) {
                console.error('bad stuff: ', err);
            }).then(function() {
                fs.closeSync(appender);
                fs.closeSync(reader);
            });
    
            fs.write(appender, '\ncheck it out, I am a string');
            fs.write(appender, '\nwho killed kenny');
            //fs.write(appender, ending_str);
        } catch(err) {
            fs.closeSync(reader);
            console.log('err1');
            throw err;
        }
    } catch(err) {
        fs.closeSync(appender);
            console.log('err2');
        throw err;
    }
    
    0 讨论(0)
  • 2020-12-05 06:18

    If you want to keep the file as a persistent store of your data to prevent a loss of stream in case of a system crash or one of the members in your network of running processes dies, you can still continue on writing to a file and reading from it.

    If you do not need this file as a persistent storage of produced results from your Java process, then going with a Unix socket is much better for both the ease and also the performance.

    fs.watchFile() is not what you need because it works on file stats as filesystem reports it and since you want to read the file as it is already being written, this is not what you want.

    SHORT UPDATE: I am very sorry to realize that although I had accused fs.watchFile() for using file stats in previous paragraph, I had done the very same thing myself in my example code below! Although I had already warned readers to "take care!" because I had written it in just a few minutes without even testing well; still, it can be done better by using fs.watch() instead of watchFile or fstatSync if underlying system supports it.

    For reading/writing from a file, I have just written below for fun in my break:

    test-fs-writer.js: [You will not need this since you write file in your Java process]

    var fs = require('fs'),
        lineno=0;
    
    var stream = fs.createWriteStream('test-read-write.txt', {flags:'a'});
    
    stream.on('open', function() {
        console.log('Stream opened, will start writing in 2 secs');
        setInterval(function() { stream.write((++lineno)+' oi!\n'); }, 2000);
    });
    

    test-fs-reader.js: [Take care, this is just demonstration, check err objects!]

    var fs = require('fs'),
        bite_size = 256,
        readbytes = 0,
        file;
    
    fs.open('test-read-write.txt', 'r', function(err, fd) { file = fd; readsome(); });
    
    function readsome() {
        var stats = fs.fstatSync(file); // yes sometimes async does not make sense!
        if(stats.size<readbytes+1) {
            console.log('Hehe I am much faster than your writer..! I will sleep for a while, I deserve it!');
            setTimeout(readsome, 3000);
        }
        else {
            fs.read(file, new Buffer(bite_size), 0, bite_size, readbytes, processsome);
        }
    }
    
    function processsome(err, bytecount, buff) {
        console.log('Read', bytecount, 'and will process it now.');
    
        // Here we will process our incoming data:
            // Do whatever you need. Just be careful about not using beyond the bytecount in buff.
            console.log(buff.toString('utf-8', 0, bytecount));
    
        // So we continue reading from where we left:
        readbytes+=bytecount;
        process.nextTick(readsome);
    }
    

    You can safely avoid using nextTick and call readsome() directly instead. Since we are still working sync here, it is not necessary in any sense. I just like it. :p

    EDIT by Oliver Lloyd

    Taking the example above but extending it to read CSV data gives:

    var lastLineFeed,
        lineArray;
    function processsome(err, bytecount, buff) {
        lastLineFeed = buff.toString('utf-8', 0, bytecount).lastIndexOf('\n');
    
        if(lastLineFeed > -1){
    
            // Split the buffer by line
            lineArray = buff.toString('utf-8', 0, bytecount).slice(0,lastLineFeed).split('\n');
    
            // Then split each line by comma
            for(i=0;i<lineArray.length;i++){
                // Add read rows to an array for use elsewhere
                valueArray.push(lineArray[i].split(','));
            }   
    
            // Set a new position to read from
            readbytes+=lastLineFeed+1;
        } else {
            // No complete lines were read
            readbytes+=bytecount;
        }
        process.nextTick(readFile);
    }
    
    0 讨论(0)
  • 2020-12-05 06:29

    this module is an implementation of the principle @hasanyasin suggests:

    https://github.com/felixge/node-growing-file

    0 讨论(0)
  • 2020-12-05 06:32

    Why do you think tail -f is a hack?

    While figuring out I found a good example I would do something similar. Real time online activity monitor example with node.js and WebSocket:
    http://blog.new-bamboo.co.uk/2009/12/7/real-time-online-activity-monitor-example-with-node-js-and-websocket

    Just to make this answer complete, I wrote you an example code which would run under 0.8.0 - (the http server is a hack maybe).

    A child process is spawned running with tail, and since a child process is an EventEmitter with three streams (we use stdout in our case) you can just add the a listener with on

    filename: tailServer.js

    usage: node tailServer /var/log/filename.log

    var http = require("http");
    var filename = process.argv[2];
    
    
    if (!filename)
        return console.log("Usage: node tailServer filename");
    
    var spawn = require('child_process').spawn;
    var tail = spawn('tail', ['-f', filename]);
    
    http.createServer(function (request, response) {
        console.log('request starting...');
    
        response.writeHead(200, {'Content-Type': 'text/plain' });
    
        tail.stdout.on('data', function (data) {
          response.write('' + data);                
        });
    }).listen(8088);
    
    console.log('Server running at http://127.0.0.1:8088/');
    
    0 讨论(0)
提交回复
热议问题