Is there any Amazon S3 client library for Node.js that allows listing of all files in S3 bucket?
The most known aws2js and knox don\'t seem to have this functionalit
Published knox-copy when I couldn't find a good existing solution. Wraps all the pagination details of the Rest API into a familiar node stream:
var knoxCopy = require('knox-copy');
var client = knoxCopy.createClient({
key: '<api-key-here>',
secret: '<secret-here>',
bucket: 'mrbucket'
});
client.streamKeys({
// omit the prefix to list the whole bucket
prefix: 'buckets/of/fun'
}).on('data', function(key) {
console.log(key);
});
If you're listing fewer than 1000 files a single page will work:
client.listPageOfKeys({
prefix: 'smaller/bucket/o/fun'
}, function(err, page) {
console.log(page.Contents); // <- Here's your list of files
});
I ended up building a wrapper function around ListObjectsV2, works the same way and takes the same parameters but works recursively until IsTruncated=false and returns all the keys found as an array in the second parameter of the callback function
const AWS = require('aws-sdk')
const s3 = new AWS.S3()
function listAllKeys(params, cb)
{
var keys = []
if(params.data){
keys = keys.concat(params.data)
}
delete params['data']
s3.listObjectsV2(params, function(err, data){
if(err){
cb(err)
} else if (data.IsTruncated) {
params['ContinuationToken'] = data.NextContinuationToken
params['data'] = data.Contents
listAllKeys(params, cb)
} else {
keys = keys.concat(data.Contents)
cb(null,keys)
}
})
}
I am using this version with async/await
.
This function will return the content in an array.
I'm also using the NextContinuationToken
instead of the Marker.
async function getFilesRecursivelySub(param) {
// Call the function to get list of items from S3.
let result = await s3.listObjectsV2(param).promise();
if(!result.IsTruncated) {
// Recursive terminating condition.
return result.Contents;
} else {
// Recurse it if results are truncated.
param.ContinuationToken = result.NextContinuationToken;
return result.Contents.concat(await getFilesRecursivelySub(param));
}
}
async function getFilesRecursively() {
let param = {
Bucket: 'YOUR_BUCKET_NAME'
// Can add more parameters here.
};
return await getFilesRecursivelySub(param);
}
If you want to get list of keys only within specific folder inside a S3 Bucket then this will be useful.
Basically, listObjects
function will start searching from the Marker
we set and it will search until maxKeys: 1000
as limit. so it will search one by one folder and get you first 1000 keys it find from different folder in a bucket.
Consider i have many folders inside my bucket with prefix as prod/some date/, Ex: prod/2017/05/12/ ,prod/2017/05/13/,etc
.
I want to fetch list of objects (file names) only within prod/2017/05/12/
folder then i will specify prod/2017/05/12/
as my start and prod/2017/05/13/
[your next folder name] as my end and in code i'm breaking the loop when i encounter the end.
Each Key
in data.Contents
will look like this.
{ Key: 'prod/2017/05/13/4bf2c675-a417-4c1f-a0b4-22fc45f99207.jpg',
LastModified: 2017-05-13T00:59:02.000Z,
ETag: '"630b2sdfsdfs49ef392bcc16c833004f94ae850"',
Size: 134236366,
StorageClass: 'STANDARD',
Owner: { }
}
Code:
var list = [];
function listAllKeys(s3bucket, start, end) {
s3.listObjects({
Bucket: s3bucket,
Marker: start,
MaxKeys: 1000,
}, function(err, data) {
if (data.Contents) {
for (var i = 0; i < data.Contents.length; i++) {
var key = data.Contents[i].Key; //See above code for the structure of data.Contents
if (key.substring(0, 19) != end) {
list.push(key);
} else {
break; // break the loop if end arrived
}
}
console.log(list);
console.log('Total - ', list.length);
}
});
}
listAllKeys('BucketName', 'prod/2017/05/12/', 'prod/2017/05/13/');
Output:
[ 'prod/2017/05/12/05/4bf2c675-a417-4c1f-a0b4-22fc45f99207.jpg',
'prod/2017/05/12/05/a36528b9-e071-4b83-a7e6-9b32d6bce6d8.jpg',
'prod/2017/05/12/05/bc4d6d4b-4455-48b3-a548-7a714c489060.jpg',
'prod/2017/05/12/05/f4b8d599-80d0-46fa-a996-e73b8fd0cd6d.jpg',
... 689 more items ]
Total - 692
Here's what I came up with based on the other answers.
You can await listAllKeys()
without having to use callbacks.
const listAllKeys = () =>
new Promise((resolve, reject) => {
let allKeys = [];
const list = marker => {
s3.listObjects({ Marker: marker }, (err, data) => {
if (err) {
reject(err);
} else if (data.IsTruncated) {
allKeys.push(data.Contents);
list(data.NextMarker || data.Contents[data.Contents.length - 1].Key);
} else {
allKeys.push(data.Contents);
resolve(allKeys);
}
});
};
list();
});
This assumes you've initialized the s3 variable like so
const s3 = new aws.S3({
apiVersion: API_VERSION,
params: { Bucket: BUCKET_NAME }
});
The cleanest way to do it for me was through execution of s3cmd from my node script like this (The example here is to delete files recursively):
var exec = require('child_process').exec;
var child;
var bucket = "myBucket";
var prefix = "myPrefix"; // this parameter is optional
var command = "s3cmd del -r s3://" + bucket + "/" + prefix;
child = exec(command, {maxBuffer: 5000 * 1024}, function (error, stdout, stderr) { // the maxBuffer is here to avoid the maxBuffer node process error
console.log('stdout: ' + stdout);
if (error !== null) {
console.log('exec error: ' + error);
}
});