Any ideas on an async directory search using fs.readdir? I realise that we could introduce recursion and call the read directory function with the next directory to read, bu
Here's yet another implementation. None of the above solutions have any limiters, and so if your directory structure is large, they're all going to thrash and eventually run out of resources.
var async = require('async');
var fs = require('fs');
var resolve = require('path').resolve;
var scan = function(path, concurrency, callback) {
var list = [];
var walker = async.queue(function(path, callback) {
fs.stat(path, function(err, stats) {
if (err) {
return callback(err);
} else {
if (stats.isDirectory()) {
fs.readdir(path, function(err, files) {
if (err) {
callback(err);
} else {
for (var i = 0; i < files.length; i++) {
walker.push(resolve(path, files[i]));
}
callback();
}
});
} else {
list.push(path);
callback();
}
}
});
}, concurrency);
walker.push(path);
walker.drain = function() {
callback(list);
}
};
Using a concurrency of 50 works pretty well, and is almost as fast as simpler implementations for small directory structures.
I recommend using node-glob to accomplish that task.
var glob = require( 'glob' );
glob( 'dirname/**/*.js', function( err, files ) {
console.log( files );
});
klaw and klaw-sync are worth considering for this sort of thing. These were part of node-fs-extra.
Another simple and helpful one
function walkDir(root) {
const stat = fs.statSync(root);
if (stat.isDirectory()) {
const dirs = fs.readdirSync(root).filter(item => !item.startsWith('.'));
let results = dirs.map(sub => walkDir(`${root}/${sub}`));
return [].concat(...results);
} else {
return root;
}
}
For fun, here is a flow based version that works with highland.js streams library. It was co-authored by Victor Vu.
###
directory >---m------> dirFilesStream >---------o----> out
| |
| |
+--------< returnPipe <-----------+
legend: (m)erge (o)bserve
+ directory has the initial file
+ dirListStream does a directory listing
+ out prints out the full path of the file
+ returnPipe runs stat and filters on directories
###
_ = require('highland')
fs = require('fs')
fsPath = require('path')
directory = _(['someDirectory'])
mergePoint = _()
dirFilesStream = mergePoint.merge().flatMap((parentPath) ->
_.wrapCallback(fs.readdir)(parentPath).sequence().map (path) ->
fsPath.join parentPath, path
)
out = dirFilesStream
# Create the return pipe
returnPipe = dirFilesStream.observe().flatFilter((path) ->
_.wrapCallback(fs.stat)(path).map (v) ->
v.isDirectory()
)
# Connect up the merge point now that we have all of our streams.
mergePoint.write directory
mergePoint.write returnPipe
mergePoint.end()
# Release backpressure. This will print files as they are discovered
out.each H.log
# Another way would be to queue them all up and then print them all out at once.
# out.toArray((files)-> console.log(files))
This is my answer. Hope it can help somebody.
My focus is to make the searching routine can stop at anywhere, and for a file found, tells the relative depth to the original path.
var _fs = require('fs');
var _path = require('path');
var _defer = process.nextTick;
// next() will pop the first element from an array and return it, together with
// the recursive depth and the container array of the element. i.e. If the first
// element is an array, it'll be dug into recursively. But if the first element is
// an empty array, it'll be simply popped and ignored.
// e.g. If the original array is [1,[2],3], next() will return [1,0,[[2],3]], and
// the array becomes [[2],3]. If the array is [[[],[1,2],3],4], next() will return
// [1,2,[2]], and the array becomes [[[2],3],4].
// There is an infinity loop `while(true) {...}`, because I optimized the code to
// make it a non-recursive version.
var next = function(c) {
var a = c;
var n = 0;
while (true) {
if (a.length == 0) return null;
var x = a[0];
if (x.constructor == Array) {
if (x.length > 0) {
a = x;
++n;
} else {
a.shift();
a = c;
n = 0;
}
} else {
a.shift();
return [x, n, a];
}
}
}
// cb is the callback function, it have four arguments:
// 1) an error object if any exception happens;
// 2) a path name, may be a directory or a file;
// 3) a flag, `true` means directory, and `false` means file;
// 4) a zero-based number indicates the depth relative to the original path.
// cb should return a state value to tell whether the searching routine should
// continue: `true` means it should continue; `false` means it should stop here;
// but for a directory, there is a third state `null`, means it should do not
// dig into the directory and continue searching the next file.
var ls = function(path, cb) {
// use `_path.resolve()` to correctly handle '.' and '..'.
var c = [ _path.resolve(path) ];
var f = function() {
var p = next(c);
p && s(p);
};
var s = function(p) {
_fs.stat(p[0], function(err, ss) {
if (err) {
// use `_defer()` to turn a recursive call into a non-recursive call.
cb(err, p[0], null, p[1]) && _defer(f);
} else if (ss.isDirectory()) {
var y = cb(null, p[0], true, p[1]);
if (y) r(p);
else if (y == null) _defer(f);
} else {
cb(null, p[0], false, p[1]) && _defer(f);
}
});
};
var r = function(p) {
_fs.readdir(p[0], function(err, files) {
if (err) {
cb(err, p[0], true, p[1]) && _defer(f);
} else {
// not use `Array.prototype.map()` because we can make each change on site.
for (var i = 0; i < files.length; i++) {
files[i] = _path.join(p[0], files[i]);
}
p[2].unshift(files);
_defer(f);
}
});
}
_defer(f);
};
var printfile = function(err, file, isdir, n) {
if (err) {
console.log('--> ' + ('[' + n + '] ') + file + ': ' + err);
return true;
} else {
console.log('... ' + ('[' + n + '] ') + (isdir ? 'D' : 'F') + ' ' + file);
return true;
}
};
var path = process.argv[2];
ls(path, printfile);