Remove last n lines from file using nodejs

為{幸葍}努か 提交于 2019-12-06 00:23:52

Let's create a huge file:

$ base64 /dev/urandom | head -1000000 > /tmp/crap
$ wc -l /tmp/crap
1000000 /tmp/crap
$ du -sh /tmp/crap
74M /tmp/crap

Here is your code:

$ cat /tmp/a.js
var fs = require('fs');

var filename = '/tmp/crap1';

fs.readFile(filename, function(err, data) {
    if(err) throw err;
    theFile = data.toString().split("\n");
    theFile.splice(-3,3);
    fs.writeFile(filename, theFile.join("\n"), function(err) {
    if(err) {
        return console.log(err);
    }
    console.log("Removed last 3 lines");
    console.log(theFile.length);
    });
});

And here is mine:

$ cat /tmp/b.js
var fs = require('fs'),
    util = require('util'),
    cp = require('child_process');

var filename = '/tmp/crap2';
var lines2nuke = 3;
var command = util.format('tail -n %d %s', lines2nuke, filename);

cp.exec(command, (err, stdout, stderr) => {
    if (err) throw err;
    var to_vanquish = stdout.length;
    fs.stat(filename, (err, stats) => {
        if (err) throw err;
        fs.truncate(filename, stats.size - to_vanquish, (err) => {
            if (err) throw err;
            console.log('File truncated!');
        })
    });
});

Let's make copies of the same file:

$ cp /tmp/crap /tmp/crap1
$ cp /tmp/crap /tmp/crap2

Let's see who is faster:

$ time node a.js
Removed last 3 lines
999998
node a.js  0.53s user 0.19s system 99% cpu 0.720 total

$ time node b.js
File truncated!
node b.js  0.08s user 0.01s system 100% cpu 0.091 total

When I increased the filesize 10 times, my system ran out of memory with a.js; but with b.js, it took:

$ time node b.js
File truncated!
node b.js  0.07s user 0.03s system 6% cpu 1.542 total

My code uses tail, which doesn't read the whole file, it seeks to the end then read blocks backwards until the expected number of lines have been reached, then it displays the lines in the proper direction until the end of the file. Now I now the number of bytes to disappear. Then I use fs.stat, which tells me the total number of bytes in the file. Now, I know how many bytes I actually want in the file at the end, after removal of those last n lines. At the end, I use fs.truncate, which causes the regular file to be truncated to a size of precisely the size (in bytes) specified to it.

Update:

OP says that the platform is Windows. In that case, we can modify this program to not invoke another utility, but do everything in node itself. Fortunately, the required functionality has already been made available for us as the node module read-last-lines. Now the updated, os-agnostic code looks like:

$ npm install read-last-lines
$ cat /tmp/c.js 
var fs = require('fs'),
    rll = require('read-last-lines');

var filename = '/tmp/crap2';
var lines2nuke = 3;

rll.read(filename, lines2nuke).then((lines) => {
    var to_vanquish = lines.length;
    fs.stat(filename, (err, stats) => {
        if (err) throw err;
        fs.truncate(filename, stats.size - to_vanquish, (err) => {
            if (err) throw err;
            console.log('File truncated!');
        })
    });
});

And on the 10x size file, it took:

$ time node c.js
File truncated!
node c.js  0.14s user 0.04s system 8% cpu 2.022 total
标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!