问题
I tried to use Node.js to process a 500MB Apache log file, converting its syntax from
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
to
ip.ip.ip.ip - - 02/Aug/2012:05:01:17 GET /path/of/access/ HTTP/1.1 302 26
, then write to another text file.
For better memory control and performance, I used fs.createReadStream
and fs.createWriteStream
, but only managed to write the first line into output.txt
, because the script ends with an error:
{ [Error: EBADF, write] errno: 9, code: 'EBADF' }
Here I posted some info that may help debug.
Head of input.txt
:
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:18 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
Content of output.txt
:
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
The whole script:
var fs = require('fs');
var data ='';
var n=0; //For line control
var r = fs.createReadStream('./input.txt',{
encoding: 'ascii',
start:0,
// end: 100000,
});
var w = fs.createWriteStream('./output.txt',{
encoding:'ascii'
});
function put(line){ //write into w;
++n;
w.write(line+'\n');
}
function end(){
r.destroy();
w.destroy();
}
function onData(chunk){
var hasNewline = chunk.indexOf('\n')!==-1;
if(hasNewline){
var arr = chunk.split('\n');
var first = arr.shift();
var last = arr.pop();
data+=first;
put(data); //write a complete line
arr.forEach(function(line){
put(line); //write a complete line
});
data=last;
}else{
data+=chunk;
}
if(n>100){
end();
}
}
function onErr(e){
console.log(e);
}
r.addListener( "data", onData);
r.addListener( "end", end);
r.addListener('error',onErr);
w.addListener('error',onErr);
回答1:
You've got two issues that I can see.
The first is that your end
function calls destroy
on the ReadStream, but in the general case this is triggered from the end
event, which means that the stream is already closing, and it is going to call destroy
automatically. That means that r.destroy
is going to be called twice, triggering an error. This is the cause of the error you are seeing printed.
The second issue is that you are calling destroy
on the WriteStream. I suggest you go read the docs for that: http://nodejs.org/api/stream.html#stream_stream_destroy_1
Specifically Any queued write data will not be sent
, which is why you are missing some of your output.
Basically, you should ONLY call destroy
on the ReadStream if you want it to close early, like in your n > 100
case. Then you want to use WriteStream's end
instead, so the stream has time to write all of the buffered data.
Here is a simplified version, which I think should work the same. I'd also not bother binding error
since errors are automatically printed to the console anyway.
var fs = require('fs');
var data ='';
var n=0; //For line control
var r = fs.createReadStream('./input.txt',{
encoding: 'ascii',
start:0,
// end: 100000,
});
var w = fs.createWriteStream('./output.txt',{
encoding:'ascii'
});
r.addListener( "data", function(chunk){
data += chunk;
var lines = data.split('\n');
data = lines.pop();
lines.forEach(function(line){
if (!r.readable) return; // If already destroyed
if (n >= 100) {
// Stop any more 'data' events and close the file.
// This will also trigger 'close' below and close the writestream.
r.destroy();
return;
}
n++;
w.write(line + '\n');
});
});
r.addListener( "end", function(){
// When we hit the end of the file, close the write stream,
// and write any remaining line content
w.write(data);
});
r.addListener("close", function(){
w.end();
});
来源:https://stackoverflow.com/questions/11945405/node-js-ebadf-error-when-writing-file-using-writable-stream