Node.js & Amazon S3: How to iterate through all files in a bucket?

前端 未结 15 921
情书的邮戳
情书的邮戳 2020-12-02 14:17

Is there any Amazon S3 client library for Node.js that allows listing of all files in S3 bucket?

The most known aws2js and knox don\'t seem to have this functionalit

相关标签:
15条回答
  • 2020-12-02 14:42

    Published knox-copy when I couldn't find a good existing solution. Wraps all the pagination details of the Rest API into a familiar node stream:

    var knoxCopy = require('knox-copy');
    
    var client = knoxCopy.createClient({
      key: '<api-key-here>',
      secret: '<secret-here>',
      bucket: 'mrbucket'
    });
    
    client.streamKeys({
      // omit the prefix to list the whole bucket
      prefix: 'buckets/of/fun' 
    }).on('data', function(key) {
      console.log(key);
    });
    

    If you're listing fewer than 1000 files a single page will work:

    client.listPageOfKeys({
      prefix: 'smaller/bucket/o/fun'
    }, function(err, page) {
      console.log(page.Contents); // <- Here's your list of files
    });
    
    0 讨论(0)
  • 2020-12-02 14:43

    I ended up building a wrapper function around ListObjectsV2, works the same way and takes the same parameters but works recursively until IsTruncated=false and returns all the keys found as an array in the second parameter of the callback function

    const AWS = require('aws-sdk')
    const s3 = new AWS.S3()
    
    function listAllKeys(params, cb)
    {
       var keys = []
       if(params.data){
          keys = keys.concat(params.data)
       }
       delete params['data']
    
       s3.listObjectsV2(params, function(err, data){
         if(err){
           cb(err)
         } else if (data.IsTruncated) {
           params['ContinuationToken'] = data.NextContinuationToken
           params['data'] = data.Contents
           listAllKeys(params, cb)
         } else {
           keys = keys.concat(data.Contents)
           cb(null,keys)
         }
       })
    }
    
    0 讨论(0)
  • 2020-12-02 14:44

    I am using this version with async/await.
    This function will return the content in an array.
    I'm also using the NextContinuationToken instead of the Marker.

    async function getFilesRecursivelySub(param) {
    
        // Call the function to get list of items from S3.
        let result = await s3.listObjectsV2(param).promise();
    
        if(!result.IsTruncated) {
            // Recursive terminating condition.
            return result.Contents;
        } else {
            // Recurse it if results are truncated.
            param.ContinuationToken = result.NextContinuationToken;
            return result.Contents.concat(await getFilesRecursivelySub(param));
        }
    }
    
    async function getFilesRecursively() {
    
        let param = {
            Bucket: 'YOUR_BUCKET_NAME'
            // Can add more parameters here.
        };
    
        return await getFilesRecursivelySub(param);
    }
    
    0 讨论(0)
  • 2020-12-02 14:44

    If you want to get list of keys only within specific folder inside a S3 Bucket then this will be useful.

    Basically, listObjects function will start searching from the Marker we set and it will search until maxKeys: 1000 as limit. so it will search one by one folder and get you first 1000 keys it find from different folder in a bucket.

    Consider i have many folders inside my bucket with prefix as prod/some date/, Ex: prod/2017/05/12/ ,prod/2017/05/13/,etc.

    I want to fetch list of objects (file names) only within prod/2017/05/12/ folder then i will specify prod/2017/05/12/ as my start and prod/2017/05/13/ [your next folder name] as my end and in code i'm breaking the loop when i encounter the end.

    Each Keyin data.Contents will look like this.

    {      Key: 'prod/2017/05/13/4bf2c675-a417-4c1f-a0b4-22fc45f99207.jpg',
           LastModified: 2017-05-13T00:59:02.000Z,
           ETag: '"630b2sdfsdfs49ef392bcc16c833004f94ae850"',
           Size: 134236366,
           StorageClass: 'STANDARD',
           Owner: { } 
     }
    

    Code:

    var list = [];
    
    function listAllKeys(s3bucket, start, end) {
      s3.listObjects({
        Bucket: s3bucket,
        Marker: start,
        MaxKeys: 1000,
      }, function(err, data) {
          if (data.Contents) {
            for (var i = 0; i < data.Contents.length; i++) {
             var key = data.Contents[i].Key;    //See above code for the structure of data.Contents
              if (key.substring(0, 19) != end) {
                 list.push(key);
              } else {
                 break;   // break the loop if end arrived
              }
           }
            console.log(list);
            console.log('Total - ', list.length);      
         }
       });
     }
    
    listAllKeys('BucketName', 'prod/2017/05/12/', 'prod/2017/05/13/');
    

    Output:

    [ 'prod/2017/05/12/05/4bf2c675-a417-4c1f-a0b4-22fc45f99207.jpg',
      'prod/2017/05/12/05/a36528b9-e071-4b83-a7e6-9b32d6bce6d8.jpg',
      'prod/2017/05/12/05/bc4d6d4b-4455-48b3-a548-7a714c489060.jpg',
      'prod/2017/05/12/05/f4b8d599-80d0-46fa-a996-e73b8fd0cd6d.jpg',
      ... 689 more items ]
    Total - 692
    
    0 讨论(0)
  • 2020-12-02 14:44

    Here's what I came up with based on the other answers.
    You can await listAllKeys() without having to use callbacks.

    const listAllKeys = () =>
      new Promise((resolve, reject) => {
        let allKeys = [];
        const list = marker => {
          s3.listObjects({ Marker: marker }, (err, data) => {
            if (err) {
              reject(err);
            } else if (data.IsTruncated) {
              allKeys.push(data.Contents);
              list(data.NextMarker || data.Contents[data.Contents.length - 1].Key);
            } else {
              allKeys.push(data.Contents);
              resolve(allKeys);
            }
          });
        };
        list();
      });
    

    This assumes you've initialized the s3 variable like so

    const s3 = new aws.S3({
      apiVersion: API_VERSION,
      params: { Bucket: BUCKET_NAME }
    });
    
    0 讨论(0)
  • 2020-12-02 14:44

    The cleanest way to do it for me was through execution of s3cmd from my node script like this (The example here is to delete files recursively):

    var exec = require('child_process').exec;
    var child;
    var bucket = "myBucket";
    var prefix = "myPrefix"; // this parameter is optional
    var command = "s3cmd del -r s3://" + bucket + "/" + prefix;
    child = exec(command, {maxBuffer: 5000 * 1024}, function (error, stdout, stderr) { // the maxBuffer is here to avoid the maxBuffer node process error
                console.log('stdout: ' + stdout);
                if (error !== null) {
                    console.log('exec error: ' + error);
                }
            });
    
    0 讨论(0)
提交回复
热议问题