node.js fs.readdir recursive directory search

前端 未结 30 1537
醉酒成梦
醉酒成梦 2020-11-22 15:55

Any ideas on an async directory search using fs.readdir? I realise that we could introduce recursion and call the read directory function with the next directory to read, bu

相关标签:
30条回答
  • 2020-11-22 16:24

    There are basically two ways of accomplishing this. In an async environment you'll notice that there are two kinds of loops: serial and parallel. A serial loop waits for one iteration to complete before it moves onto the next iteration - this guarantees that every iteration of the loop completes in order. In a parallel loop, all the iterations are started at the same time, and one may complete before another, however, it is much faster than a serial loop. So in this case, it's probably better to use a parallel loop because it doesn't matter what order the walk completes in, just as long as it completes and returns the results (unless you want them in order).

    A parallel loop would look like this:

    var fs = require('fs');
    var path = require('path');
    var walk = function(dir, done) {
      var results = [];
      fs.readdir(dir, function(err, list) {
        if (err) return done(err);
        var pending = list.length;
        if (!pending) return done(null, results);
        list.forEach(function(file) {
          file = path.resolve(dir, file);
          fs.stat(file, function(err, stat) {
            if (stat && stat.isDirectory()) {
              walk(file, function(err, res) {
                results = results.concat(res);
                if (!--pending) done(null, results);
              });
            } else {
              results.push(file);
              if (!--pending) done(null, results);
            }
          });
        });
      });
    };
    

    A serial loop would look like this:

    var fs = require('fs');
    var path = require('path');
    var walk = function(dir, done) {
      var results = [];
      fs.readdir(dir, function(err, list) {
        if (err) return done(err);
        var i = 0;
        (function next() {
          var file = list[i++];
          if (!file) return done(null, results);
          file = path.resolve(dir, file);
          fs.stat(file, function(err, stat) {
            if (stat && stat.isDirectory()) {
              walk(file, function(err, res) {
                results = results.concat(res);
                next();
              });
            } else {
              results.push(file);
              next();
            }
          });
        })();
      });
    };
    

    And to test it out on your home directory (WARNING: the results list will be huge if you have a lot of stuff in your home directory):

    walk(process.env.HOME, function(err, results) {
      if (err) throw err;
      console.log(results);
    });
    

    EDIT: Improved examples.

    0 讨论(0)
  • 2020-11-22 16:24

    I loved the answer from chjj above and would not have been able to create my version of the parallel loop without that start.

    var fs = require("fs");
    
    var tree = function(dir, done) {
      var results = {
            "path": dir
            ,"children": []
          };
      fs.readdir(dir, function(err, list) {
        if (err) { return done(err); }
        var pending = list.length;
        if (!pending) { return done(null, results); }
        list.forEach(function(file) {
          fs.stat(dir + '/' + file, function(err, stat) {
            if (stat && stat.isDirectory()) {
              tree(dir + '/' + file, function(err, res) {
                results.children.push(res);
                if (!--pending){ done(null, results); }
              });
            } else {
              results.children.push({"path": dir + "/" + file});
              if (!--pending) { done(null, results); }
            }
          });
        });
      });
    };
    
    module.exports = tree;
    

    I created a Gist as well. Comments welcome. I am still starting out in the NodeJS realm so that is one way I hope to learn more.

    0 讨论(0)
  • 2020-11-22 16:25

    A. Have a look at the file module. It has a function called walk:

    file.walk(start, callback)

    Navigates a file tree, calling callback for each directory, passing in (null, dirPath, dirs, files).

    This may be for you! And yes, it is async. However, I think you would have to aggregate the full path's yourself, if you needed them.

    B. An alternative, and even one of my favourites: use the unix find for that. Why do something again, that has already been programmed? Maybe not exactly what you need, but still worth checking out:

    var execFile = require('child_process').execFile;
    execFile('find', [ 'somepath/' ], function(err, stdout, stderr) {
      var file_list = stdout.split('\n');
      /* now you've got a list with full path file names */
    });
    

    Find has a nice build-in caching mechanism that makes subsequent searches very fast, as long as only few folder have changed.

    0 讨论(0)
  • 2020-11-22 16:25

    Using Promises (Q) to solve this in a Functional style:

    var fs = require('fs'),
        fsPath = require('path'),
        Q = require('q');
    
    var walk = function (dir) {
      return Q.ninvoke(fs, 'readdir', dir).then(function (files) {
    
        return Q.all(files.map(function (file) {
    
          file = fsPath.join(dir, file);
          return Q.ninvoke(fs, 'lstat', file).then(function (stat) {
    
            if (stat.isDirectory()) {
              return walk(file);
            } else {
              return [file];
            }
          });
        }));
      }).then(function (files) {
        return files.reduce(function (pre, cur) {
          return pre.concat(cur);
        });
      });
    };
    

    It returns a promise of an array, so you can use it as:

    walk('/home/mypath').then(function (files) { console.log(files); });
    
    0 讨论(0)
  • 2020-11-22 16:26

    Because everyone should write his own, I made one.

    walk(dir, cb, endCb) cb(file) endCb(err | null)

    DIRTY

    module.exports = walk;
    
    function walk(dir, cb, endCb) {
      var fs = require('fs');
      var path = require('path');
    
      fs.readdir(dir, function(err, files) {
        if (err) {
          return endCb(err);
        }
    
        var pending = files.length;
        if (pending === 0) {
          endCb(null);
        }
        files.forEach(function(file) {
          fs.stat(path.join(dir, file), function(err, stats) {
            if (err) {
              return endCb(err)
            }
    
            if (stats.isDirectory()) {
              walk(path.join(dir, file), cb, function() {
                pending--;
                if (pending === 0) {
                  endCb(null);
                }
              });
            } else {
              cb(path.join(dir, file));
              pending--;
              if (pending === 0) {
                endCb(null);
              }
            }
          })
        });
    
      });
    }
    
    0 讨论(0)
  • 2020-11-22 16:28

    Using bluebird promise.coroutine:

    let promise = require('bluebird'),
        PC = promise.coroutine,
        fs = promise.promisifyAll(require('fs'));
    let getFiles = PC(function*(dir){
        let files = [];
        let contents = yield fs.readdirAsync(dir);
        for (let i = 0, l = contents.length; i < l; i ++) {
            //to remove dot(hidden) files on MAC
            if (/^\..*/.test(contents[i])) contents.splice(i, 1);
        }
        for (let i = 0, l = contents.length; i < l; i ++) {
            let content = path.resolve(dir, contents[i]);
            let contentStat = yield fs.statAsync(content);
            if (contentStat && contentStat.isDirectory()) {
                let subFiles = yield getFiles(content);
                files = files.concat(subFiles);
            } else {
                files.push(content);
            }
        }
        return files;
    });
    //how to use
    //easy error handling in one place
    getFiles(your_dir).then(console.log).catch(err => console.log(err));
    
    0 讨论(0)
提交回复
热议问题