I have a Lambda function in Node.js that processes new images added to my bucket. I want to run the function for all existing objects. How can I do this? I figured the easiest w
The following Lambda function will do what you require.
It will iterate through each file in your target S3 bucket and for each it will execute the desired lambda function against it emulating a put operation.
You're probably going to want to put a very long execution time allowance against this function
var TARGET_BUCKET="my-bucket-goes-here";
var TARGET_LAMBDA_FUNCTION_NAME="TestFunct";
var S3_PUT_SIMULATION_PARAMS={
"Records": [
{
"eventVersion": "2.0",
"eventTime": "1970-01-01T00:00:00.000Z",
"requestParameters": {
"sourceIPAddress": "127.0.0.1"
},
"s3": {
"configurationId": "testConfigRule",
"object": {
"eTag": "0123456789abcdef0123456789abcdef",
"sequencer": "0A1B2C3D4E5F678901",
"key": "HappyFace.jpg",
"size": 1024
},
"bucket": {
"arn": "arn:aws:s3:::mybucket",
"name": "sourcebucket",
"ownerIdentity": {
"principalId": "EXAMPLE"
}
},
"s3SchemaVersion": "1.0"
},
"responseElements": {
"x-amz-id-2": "EXAMPLE123/5678abcdefghijklambdaisawesome/mnopqrstuvwxyzABCDEFGH",
"x-amz-request-id": "EXAMPLE123456789"
},
"awsRegion": "us-east-1",
"eventName": "ObjectCreated:Put",
"userIdentity": {
"principalId": "EXAMPLE"
},
"eventSource": "aws:s3"
}
]
};
var aws = require('aws-sdk');
var s3 = new aws.S3();
var lambda = new aws.Lambda();
exports.handler = (event, context, callback) => {
retrieveS3BucketContents(TARGET_BUCKET, function(s3Objects){
simulateS3PutOperation(TARGET_BUCKET, s3Objects, simulateS3PutOperation, function(){
console.log("complete.");
});
});
};
function retrieveS3BucketContents(bucket, callback){
s3.listObjectsV2({
Bucket: TARGET_BUCKET
}, function(err, data) {
callback(data.Contents);
});
}
function simulateS3PutOperation(bucket, s3ObjectStack, callback, callbackEmpty){
var params = {
FunctionName: TARGET_LAMBDA_FUNCTION_NAME,
Payload: ""
};
if(s3ObjectStack.length > 0){
var s3Obj = s3ObjectStack.pop();
var p = S3_PUT_SIMULATION_PARAMS;
p.Records[0].s3.bucket.name = bucket;
p.Records[0].s3.object.key = s3Obj.Key;
params.Payload = JSON.stringify(p, null, 2);
lambda.invoke(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else{
callback(bucket, s3ObjectStack, callback, callbackEmpty);
}
});
}
else{
callbackEmpty();
}
}
Below is the full policy that your lambda query will need to execute this method, it allows R/W to CloudWatch logs and ListObject access to S3. You need to fill in your bucket details where you see MY-BUCKET-GOES-HERE
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Stmt1477382207000",
"Effect": "Allow",
"Action": [
"s3:ListBucket"
],
"Resource": [
"arn:aws:s3:::MY-BUCKET-GOES-HERE/*"
]
},
{
"Effect": "Allow",
"Action": [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": "arn:aws:logs:*:*:*"
}
]
}