Is there any Amazon S3 client library for Node.js that allows listing of all files in S3 bucket?
The most known aws2js and knox don\'t seem to have this functionalit
In fact aws2js supports listing of objects in a bucket on a low level via s3.get()
method call. To do it one has to pass prefix
parameter which is documented on Amazon S3 REST API page:
var s3 = require('aws2js').load('s3', awsAccessKeyId, awsSecretAccessKey);
s3.setBucket(bucketName);
var folder = encodeURI('some/path/to/S3/folder');
var url = '?prefix=' + folder;
s3.get(url, 'xml', function (error, data) {
console.log(error);
console.log(data);
});
The data
variable in the above snippet contains a list of all objects in the bucketName
bucket.
Using the official aws-sdk:
var allKeys = [];
function listAllKeys(marker, cb)
{
s3.listObjects({Bucket: s3bucket, Marker: marker}, function(err, data){
allKeys.push(data.Contents);
if(data.IsTruncated)
listAllKeys(data.NextMarker, cb);
else
cb();
});
}
see s3.listObjects
Edit 2017:
Same basic idea, but listObjectsV2( ... )
is now recommended and uses a ContinuationToken
(see s3.listObjectsV2):
var allKeys = [];
function listAllKeys(token, cb)
{
var opts = { Bucket: s3bucket };
if(token) opts.ContinuationToken = token;
s3.listObjectsV2(opts, function(err, data){
allKeys = allKeys.concat(data.Contents);
if(data.IsTruncated)
listAllKeys(data.NextContinuationToken, cb);
else
cb();
});
}
const { S3 } = require("aws-sdk");
const s3 = new S3();
async function* listAllKeys(opts) {
opts = { ...opts };
do {
const data = await s3.listObjectsV2(opts).promise();
opts.ContinuationToken = data.NextContinuationToken;
yield data;
} while (opts.ContinuationToken);
}
const opts = {
Bucket: "bucket-xyz" /* required */,
// ContinuationToken: 'STRING_VALUE',
// Delimiter: 'STRING_VALUE',
// EncodingType: url,
// FetchOwner: true || false,
// MaxKeys: 'NUMBER_VALUE',
// Prefix: 'STRING_VALUE',
// RequestPayer: requester,
// StartAfter: 'STRING_VALUE'
};
async function main() {
// using for of await loop
for await (const data of listAllKeys(opts)) {
console.log(data.Contents);
}
}
main();
thats it
async function main() {
const keys = listAllKeys(opts);
console.log(await keys.next());
// {value: {…}, done: false}
console.log(await keys.next());
// {value: {…}, done: false}
console.log(await keys.next());
// {value: undefined, done: true}
}
main();
const lister = (opts) => (o) => {
let needMore = true;
(async () => {
const keys = listAllKeys(opts);
for await (const data of keys) {
o.next(data);
if (!needMore) break;
}
o.complete();
})();
return () => (needMore = false);
};
// Using Rxjs
const { Observable } = require("rxjs");
const { flatMap } = require("rxjs/operators");
function listAll() {
return Observable.create(lister(opts))
.pipe(flatMap((v) => v.Contents))
.subscribe(console.log);
}
listAll();
const EventEmitter = require("events");
const _eve = new EventEmitter();
async function onData(data) {
// will be called for each set of data
console.log(data);
}
async function onError(error) {
// will be called if any error
console.log(error);
}
async function onComplete() {
// will be called when data completely received
}
_eve.on("next", onData);
_eve.on("error", onError);
_eve.on("complete", onComplete);
const stop = lister(opts)({
next: (v) => _eve.emit("next", v),
error: (e) => _eve.emit("error", e),
complete: (v) => _eve.emit("complete", v),
});
Although @Meekohi's answer does technically work, I've had enough heartache with the S3 portion of the AWS SDK for NodeJS. After all the previous struggling with modules such as aws-sdk
, s3
, knox
, I decided to install s3cmd via the OS package manager and shell-out to it using child_process
Something like:
var s3cmd = new cmd_exec('s3cmd', ['ls', filepath, 's3://'+inputBucket],
function (me, data) {me.stdout += data.toString();},
function (me) {me.exit = 1;}
);
response.send(s3cmd.stdout);
(Using the cmd_exec
implementation from this question)
This approach just works really well - including for other problematic things like file upload.
This is an old question and I guess the AWS JS SDK has changed a lot since it was asked. Here's yet another way to do it these days:
s3.listObjects({Bucket:'mybucket', Prefix:'some-pfx'}).
on('success', function handlePage(r) {
//... handle page of contents r.data.Contents
if(r.hasNextPage()) {
// There's another page; handle it
r.nextPage().on('success', handlePage).send();
} else {
// Finished!
}
}).
on('error', function(r) {
// Error!
}).
send();
Meekohi provided a very good answer, but the (new) documentation states that NextMarker can be undefined. When this is the case, you should use the last key as the marker.
So his codesample can be changed into:
var allKeys = [];
function listAllKeys(marker, cb) {
s3.listObjects({Bucket: s3bucket, Marker: marker}, function(err, data){
allKeys.push(data.Contents);
if(data.IsTruncated)
listAllKeys(data.NextMarker || data.Contents[data.Contents.length-1].Key, cb);
else
cb();
});
}
Couldn't comment on the original answer since I don't have the required reputation. Apologies for the bad mark-up btw.