问题
In Node.js I have to read files in a folder and for each file get file handler info, this is my simplest implementation using fs.readdir
:
FileServer.prototype.listLocal = function (params) {
var self = this;
var options = {
limit: 100,
desc: 1
};
// override defaults
for (var attrname in params) { options[attrname] = params[attrname]; }
// media path is the media folder
var mediaDir = path.join(self._options.mediaDir, path.sep);
return new Promise((resolve, reject) => {
fs.readdir(mediaDir, (error, results) => {
if (error) {
self.logger.error("FileServer.list error:%s", error);
return reject(error);
} else { // list files
// cut to max files
results = results.slice(0, options.limit);
// filter default ext
results = results.filter(item => {
return (item.indexOf('.mp3') > -1);
});
// format meta data
results = results.map(file => {
var filePath = path.join(self._options.mediaDir, path.sep, file);
var item = {
name: file,
path: filePath
};
const fd = fs.openSync(filePath, 'r');
var fstat = fs.fstatSync(fd);
// file size in bytes
item.size = fstat.size;
item.sizehr = self.formatSizeUnits(fstat.size);
// "Birth Time" Time of file creation. Set once when the file is created.
item.birthtime = fstat.birthtime;
// "Modified Time" Time when file data last modified.
item.mtime = fstat.mtime;
// "Access Time" Time when file data last accessed.
item.atime = fstat.atime;
item.timestamp = new Date(item.mtime).getTime();
item.media_id = path.basename(filePath, '.mp3');
fs.closeSync(fd);//close file
return item;
});
if (options.desc) { // sort by most recent
results.sort(function (a, b) {
return b.timestamp - a.timestamp;
});
} else { // sort by older
results.sort(function (a, b) {
return a.timestamp - b.timestamp;
});
}
return resolve(results);
}
})
});
}
so that for each file I get an array of items
{
"name": "sample121.mp3",
"path": "/data/sample121.mp3",
"size": 5751405,
"sizehr": "5.4850 MB",
"birthtime": "2018-10-08T15:26:08.397Z",
"mtime": "2018-10-08T15:26:11.650Z",
"atime": "2018-10-10T09:01:48.534Z",
"timestamp": 1539012371650,
"media_id": "sample121"
}
That said, the problem is it's knonw that node.js fs.readdir
may freeze Node I/O Loop when the folder to list has a large number of files, let's say from ten thousands to hundred thousands and more.
This is a known issue - see here for more info.
There are also plans to improve fs.readdir
in a some way, like streaming - see here about this.
In the meanwhile I'm searching for like a patch to this, because my folders are pretty large.
Since the problem is the Event Loop get frozen, someone proposed a solution using process.nextTick
, that I have ensembled here
FileServer.prototype.listLocalNextTick = function (params) {
var self = this;
var options = {
limit: 100,
desc: 1
};
// override defaults
for (var attrname in params) { options[attrname] = params[attrname]; }
// media path is the media folder
var mediaDir = path.join(self._options.mediaDir, path.sep);
return new Promise((resolve, reject) => {
var AsyncArrayProcessor = function (inArray, inEntryProcessingFunction) {
var elemNum = 0;
var arrLen = inArray.length;
var ArrayIterator = function () {
inEntryProcessingFunction(inArray[elemNum]);
elemNum++;
if (elemNum < arrLen) process.nextTick(ArrayIterator);
}
if (elemNum < arrLen) process.nextTick(ArrayIterator);
}
fs.readdir(mediaDir, function (error, results) {
if (error) {
self.logger.error("FileServer.list error:%s", error);
return reject(error);
}
// cut to max files
results = results.slice(0, options.limit);
// filter default ext
results = results.filter(item => {
return (item.indexOf('.mp3') > -1);
});
var ProcessDirectoryEntry = function (file) {
// This may be as complex as you may fit in a single event loop
var filePath = path.join(self._options.mediaDir, path.sep, file);
var item = {
name: file,
path: filePath
};
const fd = fs.openSync(filePath, 'r');
var fstat = fs.fstatSync(fd);
// file size in bytes
item.size = fstat.size;
item.sizehr = self.formatSizeUnits(fstat.size);
// "Birth Time" Time of file creation. Set once when the file is created.
item.birthtime = fstat.birthtime;
// "Modified Time" Time when file data last modified.
item.mtime = fstat.mtime;
// "Access Time" Time when file data last accessed.
item.atime = fstat.atime;
item.timestamp = new Date(item.mtime).getTime();
item.media_id = path.basename(filePath, '.mp3');
// map to file item
file = item;
}//ProcessDirectoryEntry
// LP: fs.readdir() callback is finished, event loop continues...
AsyncArrayProcessor(results, ProcessDirectoryEntry);
if (options.desc) { // sort by most recent
results.sort(function (a, b) {
return b.timestamp - a.timestamp;
});
} else { // sort by older
results.sort(function (a, b) {
return a.timestamp - b.timestamp;
});
}
return resolve(results);
});
});
}//listLocalNextTick
This seems to avoid the original issue, but I cannot anymore map the files lists to the items with file handler I did before, because when running the AsyncArrayProcessor
on the files list, thus the ProcessDirectoryEntry
on each file entry the async nature of process.nextTick
causes that I cannot get back the results
array modified as in the previous listLocal
function where I just did an iterative array.map
of the results
array.
How to patch the listLocalNextTick
to behave like the listLocal
but keeping process.nextTick
approach?
[UPDATE]
According to the proposed solution, this is the best implementation so far:
/**
* Scan files in directory
* @param {String} needle
* @param {object} options
* @returns {nodeStream}
*/
scanDirStream : function(needle,params) {
var options = {
type: 'f',
name: '*'
};
for (var attrname in params) { options[attrname] = params[attrname]; }
return new Promise((resolve, reject) => {
var opt=[needle];
for (var k in options) {
var v = options[k];
if (!Util.empty(v)) {
opt.push('-' + k);
opt.push(v);
}
};
var data='';
var listing = spawn('find',opt)
listing.stdout.on('data', _data => {
var buff=Buffer.from(_data, 'utf-8').toString();
if(buff!='') data+=buff;
})
listing.stderr.on('data', error => {
return reject(Buffer.from(error, 'utf-8').toString());
});
listing.on('close', (code) => {
var res = data.split('\n');
return resolve(res);
});
});
Example of usage:
scanDirStream(mediaRoot,{
name: '*.mp3'
})
.then(results => {
console.info("files:%d", results);
})
.catch(error => {
console.error("error %s", error);
});
This can be eventually modified to add a tick callback at every stdout.on
event emitted when getting a new file in the directory listening.
回答1:
I have Created a wrapper around find for it but you could use dir or ls in the same way.
const { spawn } = require('child_process');
/**
* findNodeStream
* @param {String} dir
* @returns {nodeStream}
*/
const findNodeStream = (dir,options) => spawn('find',[dir,options].flat().filter(x=>x));
/**
* Usage Example:
let listing = findNodeStream('dir',[options])
listing.stdout.on('data', d=>console.log(d.toString()))
listing.stderr.on('data', d=>console.log(d.toString()))
listing.on('close', (code) => {
console.log(`child process exited with code ${code}`);
});
*/
this allows you to stream a directory chunked and not in a whole as fs.readdir does.
Important
NodeJS > 12.11.1 will have async readdir support Landed in cbd8d71 ( https://github.com/nodejs/node/commit/cbd8d715b2286e5726e6988921f5c870cbf74127 ) as fs{Promises}.opendir(), which returns an fs.Dir, which exposes an async iterator. tada
https://nodejs.org/api/fs.html#fs_fspromises_opendir_path_options
const fs = require('fs');
async function print(path) {
const dir = await fs.promises.opendir(path);
for await (const dirent of dir) {
console.log(dirent.name);
}
}
print('./').catch(console.error);
来源:https://stackoverflow.com/questions/53125592/node-fs-readdir-freezing-in-folders-with-too-many-files