Running S3-put-triggered Lambda function on existing S3 objects? - node.js

I have a Lambda function in Node.js that processes new images added to my bucket. I want to run the function for all existing objects. How can I do this? I figured the easiest way is to "re-put" each object, to trigger the function, but I'm not sure how to do this.
To be clear - I want to run, one-time, on each of the existing objects. The trigger is already working for new objects, I just need to run it on the objects that were inserted before the lambda function was created.

The following Lambda function will do what you require.
It will iterate through each file in your target S3 bucket and for each it will execute the desired lambda function against it emulating a put operation.
You're probably going to want to put a very long execution time allowance against this function
var TARGET_BUCKET="my-bucket-goes-here";
var TARGET_LAMBDA_FUNCTION_NAME="TestFunct";
var S3_PUT_SIMULATION_PARAMS={
"Records": [
{
"eventVersion": "2.0",
"eventTime": "1970-01-01T00:00:00.000Z",
"requestParameters": {
"sourceIPAddress": "127.0.0.1"
},
"s3": {
"configurationId": "testConfigRule",
"object": {
"eTag": "0123456789abcdef0123456789abcdef",
"sequencer": "0A1B2C3D4E5F678901",
"key": "HappyFace.jpg",
"size": 1024
},
"bucket": {
"arn": "arn:aws:s3:::mybucket",
"name": "sourcebucket",
"ownerIdentity": {
"principalId": "EXAMPLE"
}
},
"s3SchemaVersion": "1.0"
},
"responseElements": {
"x-amz-id-2": "EXAMPLE123/5678abcdefghijklambdaisawesome/mnopqrstuvwxyzABCDEFGH",
"x-amz-request-id": "EXAMPLE123456789"
},
"awsRegion": "us-east-1",
"eventName": "ObjectCreated:Put",
"userIdentity": {
"principalId": "EXAMPLE"
},
"eventSource": "aws:s3"
}
]
};
var aws = require('aws-sdk');
var s3 = new aws.S3();
var lambda = new aws.Lambda();
exports.handler = (event, context, callback) => {
retrieveS3BucketContents(TARGET_BUCKET, function(s3Objects){
simulateS3PutOperation(TARGET_BUCKET, s3Objects, simulateS3PutOperation, function(){
console.log("complete.");
});
});
};
function retrieveS3BucketContents(bucket, callback){
s3.listObjectsV2({
Bucket: TARGET_BUCKET
}, function(err, data) {
callback(data.Contents);
});
}
function simulateS3PutOperation(bucket, s3ObjectStack, callback, callbackEmpty){
var params = {
FunctionName: TARGET_LAMBDA_FUNCTION_NAME,
Payload: ""
};
if(s3ObjectStack.length > 0){
var s3Obj = s3ObjectStack.pop();
var p = S3_PUT_SIMULATION_PARAMS;
p.Records[0].s3.bucket.name = bucket;
p.Records[0].s3.object.key = s3Obj.Key;
params.Payload = JSON.stringify(p, null, 2);
lambda.invoke(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else{
callback(bucket, s3ObjectStack, callback, callbackEmpty);
}
});
}
else{
callbackEmpty();
}
}
Below is the full policy that your lambda query will need to execute this method, it allows R/W to CloudWatch logs and ListObject access to S3. You need to fill in your bucket details where you see MY-BUCKET-GOES-HERE
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Stmt1477382207000",
"Effect": "Allow",
"Action": [
"s3:ListBucket"
],
"Resource": [
"arn:aws:s3:::MY-BUCKET-GOES-HERE/*"
]
},
{
"Effect": "Allow",
"Action": [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": "arn:aws:logs:*:*:*"
}
]
}

This thread helped push me in the right direction as I needed to invoke a lambda function per file for an existing 50k files in two buckets. I decided to write it in python and limit the amount of lambda functions running simultaneously to 500 (the concurrency limit for many aws regions is 1000).
The script creates a worker pool of 500 threads who feed off a queue of bucket keys. Each worker waits for their lambda to be finished before picking up another. Since the execution of this script against my 50k files will take a couple hours, I'm just running it off my local machine. Hope this helps someone!
#!/usr/bin/env python
# Proper imports
import json
import time
import base64
from queue import Queue
from threading import Thread
from argh import dispatch_command
import boto3
from boto.s3.connection import S3Connection
client = boto3.client('lambda')
def invoke_lambdas():
try:
# replace these with your access keys
s3 = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
buckets = [s3.get_bucket('bucket-one'), s3.get_bucket('bucket-two')]
queue = Queue()
num_threads = 500
# create a worker pool
for i in range(num_threads):
worker = Thread(target=invoke, args=(queue,))
worker.setDaemon(True)
worker.start()
for bucket in buckets:
for key in bucket.list():
queue.put((bucket.name, key.key))
queue.join()
except Exception as e:
print(e)
def invoke(queue):
while True:
bucket_key = queue.get()
try:
print('Invoking lambda with bucket %s key %s. Remaining to process: %d'
% (bucket_key[0], bucket_key[1], queue.qsize()))
trigger_event = {
'Records': [{
's3': {
'bucket': {
'name': bucket_key[0]
},
'object': {
'key': bucket_key[1]
}
}
}]
}
# replace lambda_function_name with the actual name
# InvocationType='RequestResponse' means it will wait until the lambda fn is complete
response = client.invoke(
FunctionName='lambda_function_name',
InvocationType='RequestResponse',
LogType='None',
ClientContext=base64.b64encode(json.dumps({}).encode()).decode(),
Payload=json.dumps(trigger_event).encode()
)
if response['StatusCode'] != 200:
print(response)
except Exception as e:
print(e)
print('Exception during invoke_lambda')
queue.task_done()
if __name__ == '__main__':
dispatch_command(invoke_lambdas)

As I had to do this on a very large bucket, and lambda functions have a max. execution time of 10 minutes, I ended up doing a script with the Ruby AWS-SDK.
require 'aws-sdk-v1'
class LambdaS3Invoker
BUCKET_NAME = "HERE_YOUR_BUCKET"
FUNCTION_NAME = "HERE_YOUR_FUNCTION_NAME"
AWS_KEY = "HERE_YOUR_AWS_KEY"
AWS_SECRET = "HERE_YOUR_AWS_SECRET"
REGION = "HERE_YOUR_REGION"
def execute
bucket.objects({ prefix: 'products'}).each do |o|
lambda_invoke(o.key)
end
end
private
def lambda_invoke(key)
lambda.invoke({
function_name: FUNCTION_NAME,
invocation_type: 'Event',
payload: JSON.generate({
Records: [{
s3: {
object: {
key: key,
},
bucket: {
name: BUCKET_NAME,
}
}
}]
})
})
end
def lambda
#lambda ||= Aws::Lambda::Client.new(
region: REGION,
access_key_id: AWS_KEY,
secret_access_key: AWS_SECRET
)
end
def resource
#resource ||= Aws::S3::Resource.new(
access_key_id: AWS_KEY,
secret_access_key: AWS_SECRET
)
end
def bucket
#bucket ||= resource.bucket(BUCKET_NAME)
end
end
And then you can call it like:
LambdaS3Invoker.new.execute

What you need to do is create a one time script which uses AWS SDK to invoke your lambda function. This solution doesn't require you to "re-put" the object.
I am going to base my answer on AWS JS SDK.
To be clear - I want to run, one-time, on each of the existing
objects. The trigger is already working for new objects, I just need
to run it on the objects that were inserted before the lambda function
was created.
As you have a working lambda function which accepts S3 put events what you need to do is find all the unprocessed object in S3 (If you have DB entries for each S3 object the above should be easy if not then you might find the S3 list object function handy http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#listObjectsV2-property).
Then for each unprocessed S3 object obtained create a JSON object which looks like S3 Put Event Message(shown below) and call the Lambda invoke function with the above JSON object as payload.
You can find the lambda invoke function docs at http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Lambda.html#invoke-property
When creating the fake S3 Put Event Message Object for your lambda function you can ignore most of the actual object properties depending on your lambda function. I guess the least you will have to set is bucket name and object key.
S3 Put Event Message Structure http://docs.aws.amazon.com/AmazonS3/latest/dev/notification-content-structure.html
{
"Records":[
{
"eventVersion":"2.0",
"eventSource":"aws:s3",
"awsRegion":"us-east-1",
"eventTime":"1970-01-01T00:00:00.000Z",
"eventName":"ObjectCreated:Put",
"userIdentity":{
"principalId":"AIDAJDPLRKLG7UEXAMPLE"
},
"requestParameters":{
"sourceIPAddress":"127.0.0.1"
},
"responseElements":{
"x-amz-request-id":"C3D13FE58DE4C810",
"x-amz-id-2":"FMyUVURIY8/IgAtTv8xRjskZQpcIZ9KG4V5Wp6S7S/JRWeUWerMUE5JgHvANOjpD"
},
"s3":{
"s3SchemaVersion":"1.0",
"configurationId":"testConfigRule",
"bucket":{
"name":"mybucket",
"ownerIdentity":{
"principalId":"A3NL1KOZZKExample"
},
"arn":"arn:aws:s3:::mybucket"
},
"object":{
"key":"HappyFace.jpg",
"size":1024,
"eTag":"d41d8cd98f00b204e9800998ecf8427e",
"versionId":"096fKKXTRTtl3on89fVO.nfljtsv6qko",
"sequencer":"0055AED6DCD90281E5"
}
}
}
]
}

well basically what you need is to use some api calls(boto for example if you use python)and list all new objects or all objects in your s3 bucket and then process these objects
here is a snippet:
from boto.s3.connection import S3Connection
conn = S3Connection()
source = conn.get_bucket(src_bucket)
src_list = set([key.name for key in source.get_all_keys(headers=None, prefix=prefix)])
//and then you can go over this src list
for entry in src_list:
do something

Related

How to Post object using presigned post with KMS(customer managed key)

When i use presigned post to generate the url and other attributes, when i try to upload my image with server side encryption that is customer managed keys, this keys is created by me. In my case, I can upload with {"x-amz-server-side-encryption": "aws:kms"}. How can i upload customer managed key?
If, i want to upload image with Customer managed key, am i using the x-amz-server-side​-encryption​-customer-key and x-amz-server-side​-encryption​-customer-key-MD5?
here is my sample code:
import logging
import boto3
from botocore.exceptions import ClientError
s3_client = boto3.client("s3", config=Config(signature_version="s3v4"))
try:
bucket_name = "s3-bucket"
fields = {
"x-amz-server-side-encryption": "aws:kms",
# "x-amz-server-side​-encryption​-customer-algorithm": "AES256",
# "x-amz-server-side​-encryption​-customer-key": "<customer-managed-key>",
# "x-amz-server-side​-encryption​-customer-key-MD5": "<customer-managed-key>"
}
conditions = [
# 1Byte - 25MB
["content-length-range", 1, 26214400],
{"x-amz-server-side-encryption": "aws:kms"},
# {"x-amz-server-side​-encryption​-customer-algorithm": "AES256"},
# {"x-amz-server-side​-encryption​-customer-key": "<customer-managed-key>"},
# {"x-amz-server-side​-encryption​-customer-key-MD5": "<customer-managed-key>"}
]
file_name = "test.png"
response = s3_client.generate_presigned_post(bucket_name,
Key=file_name,
Fields=fields,
Conditions=conditions,
ExpiresIn=3000)
print(response)
except ClientError as e:
print(logging.error(e))
after i use "x-amz-server-side-encryption-aws-kms-key-id": "<KEY ID>", I got access d9
This is new sample code:
import logging
import boto3
from botocore.exceptions import ClientError
s3_client = boto3.client("s3", config=Config(signature_version="s3v4"))
try:
bucket_name = "s3-bucket"
fields = {
"x-amz-server-side-encryption": "aws:kms",
"x-amz-server-side-encryption-aws-kms-key-id": "<KEY ID>"
}
conditions = [
# 1Byte - 25MB
["content-length-range", 1, 26214400],
{"x-amz-server-side-encryption": "aws:kms"},
{"x-amz-server-side-encryption-aws-kms-key-id": "<KEY ID>"}
]
file_name = "test.png"
response = s3_client.generate_presigned_post(bucket_name,
Key=file_name,
Fields=fields,
Conditions=conditions,
ExpiresIn=300)
print(response)
except ClientError as e:
print(logging.error(e))
{
"code": 2000,
"messages": [],
"payload": {
"url": "https://s3-bucket.s3.amazonaws.com/",
"fields": {
"Content-Type": "image/png",
"x-amz-server-side-encryption": "aws:kms",
"x-amz-server-side-encryption-aws-kms-key-id": "12345678-01s1-abba-abcd-fb9f6e5bf13d",
"key": "kms005.png",
"x-amz-algorithm": "AWS4-HMAC-SHA256",
"x-amz-credential": "AKIAXHC4C5L2YWPYEWHO/20210223/us-east-1/s3/aws4_request",
"x-amz-date": "20210223T073640Z",
"policy": "eyJleHBpcmF0aW9uIjogIjIwMjEtMDItMjNUMDc6NDE6NDBaIiwgImNvbmRpdGlvbnMiOiBbWyJjb250ZW50LWxlbmd0aC1yYW5nZSIsIDEsIDI2MjE0NDAwXSwgeyJ4LWFtei1zZXJ2ZXItc2lkZS1lbmNyeXB0aW9uIjogImF3czprbXMifSwgeyJidWNrZXQiOiAiczMtYWRyaWFuLXRlc3QtYnVja2V0In0sIHsia2V5IjogImttczAwNS5wbmcifSwgeyJ4LWFtei1hbGdvcml0aG0iOiAiQVdTNC1ITUFDLVNIQTI1NiJ9LCB7IngtYW16LWNyZWRlbnRpYWwiOiAiQUtJQVhIQzRDNUwyWVdQWUVXSE8vMjAyMTAyMjMvdXMtZWFzdC0xL3MzL2F3czRfcmVxdWVzdCJ9LCB7IngtYW16LWRhdGUiOiAiMjAyMTAyMjNUMDczNjQwWiJ9XX0=",
"x-amz-signature": "e0c40e744d1989578517168341fa17a21c297ffa0e1be6c84e448dea373b7d16"
}
},
"request_id": "1234567890"
}"
Errors msg
Customer managed key, am i using the x-amz-server-side​-encryption​-customer-key and x-amz-server-side​-encryption​-customer-key-MD5?
There is no such header as x-amz-server-side​-encryption​-customer-key for SSE-KMS (its for SSE-C, see below). Instead, if you are going to use "x-amz-server-side-encryption": "aws:kms" and what to use your own CMK (not AWS Managed CMK) then you have to use:
x-amz-server-side-encryption-aws-kms-key-id - to specify the ID of the customer managed CMK used to protect the data
Header x-amz-server-side​-encryption​-customer-key-MD5 is for SSE-C (customer-provided keys), not for SSE-KMS.
In kms key policy must have a kms:Encrypt, kms:Decrypt, kms:ReEncrypt*, kms:GenerateDataKey* and kms:DescribeKey. After add the action into kms key policy it will upload successfully.
"Statement": [
"Action": [
"kms:Encrypt",
"kms:Decrypt",
"kms:ReEncrypt*",
"kms:GenerateDataKey*",
"kms:DescribeKey"
],
"Resource": "*"
}
]

How to create s3 bucket with logging enabled and make it private using boto3?

I want to create a bucket with
Logging
Encryption
Private, and
Alert
when accessed without https. How can I achieve this?
I have tried few lines using boto3 but getting error in logging?
def create_S3_Bucket(env, filepath):
s3_client= AWSresourceconnect(filepath,'s3')
bucket_name ="s3bucket123"
print(bucket_name)
try:
s3_bucket= s3_client.create_bucket(Bucket=bucket_name)
print('bucket created')
print(s3_bucket)
response = s3_client.put_bucket_encryption(Bucket=bucket_name,
ServerSideEncryptionConfiguration={
'Rules': [
{
'ApplyServerSideEncryptionByDefault': {
'SSEAlgorithm': 'AES256'
}
},
]
}
)
print("response of encrytpion")
print(response) #prints metadata successfully
responselogging = s3_client.put_bucket_logging(
Bucket= bucket_name,
BucketLoggingStatus={
'LoggingEnabled': {
'TargetBucket':bucket_name,
'TargetGrants': [
{
'Grantee': {
'Type': 'Group',
'URI': 'http://acs.amazonaws.com/groups/global/AllUsers',
},
'Permission': 'READ',
},
],
'TargetPrefix': 'test/',
},
},
)
print("response of logging")
print(responselogging)
Output= bucket_name
except Exception as e:
Output = "error:" + str(e)
print(e) #error as An error occurred (InvalidTargetBucketForLogging) when calling the PutBucketLogging operation: You must give the log-delivery group WRITE and READ_ACP permissions to the target bucket
bucket_name = ''
retrun Output
I want to enable
Logging
Private bucket and objects
Encryption

About Amazon SageMaker Ground Truth save in S3 after label

I am setting up the automatic data labelling pipeline for my colleague.
First, I define the ground truth request based on API (bucket, manifests, etc).
Second, I create this labelling job, and all files are uploaded in S3 immediately.
After that my colleague will receive an email saying it is ready to label it, then he will label the data and submit.
Until now, everything is well and quick. Then I check the SageMaker labelling job dashboard, it shows the task is in progress, and it takes very very long time to know it is completed or failed. I don't know the reason. Yesterday, it saved the results at 4 am, took around 6 hours. But if I create label job on website instead of sending requests, it will save the results quickly.
Can anyone explain it? Or maybe I need to set up a time sync or other configuration?
This is my config:
{
"InputConfig": {
"DataSource": {
"S3DataSource": {
"ManifestS3Uri": ""s3://{bucket_name}/{JOB_ID}/{manifest_name}-{JOB_ID}.manifest""
}
},
"DataAttributes": {
"ContentClassifiers": [
"FreeOfPersonallyIdentifiableInformation",
"FreeOfAdultContent"
]
}
},
"OutputConfig": {
"S3OutputPath": "s3://{bucket_name}/{JOB_ID}/output-{manifest_name}/"
},
"HumanTaskConfig": {
"AnnotationConsolidationConfig": {
"AnnotationConsolidationLambdaArn": "arn:aws:lambda:us-east-2:266458841044:function:PRE-TextMultiClass"
},
"PreHumanTaskLambdaArn": "arn:aws:lambda:us-east-2:266458841044:function:PRE-TextMultiClass",
"NumberOfHumanWorkersPerDataObject": 2,
"TaskDescription": "Dear Annotator, please label it according to instructions. Thank you!",
"TaskKeywords": [
"text",
"label"
],
"TaskTimeLimitInSeconds": 600,
"TaskTitle": "Label Text",
"UiConfig": {
"UiTemplateS3Uri": "s3://{bucket_name}/instructions.template"
},
"WorkteamArn": "work team arn"
},
"LabelingJobName": "Label",
"RoleArn": "my role arn",
"LabelAttributeName": "category",
"LabelCategoryConfigS3Uri": ""s3://{bucket_name}/labels.json""
}
I think my Lambda function is wrong, when I change to aws arn (preHuman and Annotation) everything works fine.
This is my afterLabeling Lambda:
import json
import boto3
from urllib.parse import urlparse
def lambda_handler(event, context):
consolidated_labels = []
parsed_url = urlparse(event['payload']['s3Uri']);
s3 = boto3.client('s3')
textFile = s3.get_object(Bucket = parsed_url.netloc, Key = parsed_url.path[1:])
filecont = textFile['Body'].read()
annotations = json.loads(filecont);
for dataset in annotations:
for annotation in dataset['annotations']:
new_annotation = json.loads(annotation['annotationData']['content'])
label = {
'datasetObjectId': dataset['datasetObjectId'],
'consolidatedAnnotation' : {
'content': {
event['labelAttributeName']: {
'workerId': annotation['workerId'],
'result': new_annotation,
'labeledContent': dataset['dataObject']
}
}
}
}
consolidated_labels.append(label)
return consolidated_labels
Are there any reasons?

Micronauts AWS API Gateway Authorizer JSON output issue

I've written a simple lambda function in Micronauts/Groovy to return Allow/Deny policies as an AWS API gateway authorizer. When used as the API gateway authorizer the JSON cannot be parsed
Execution failed due to configuration error: Could not parse policy
When testing locally the response has the correct property case in the JSON.
e.g:
{
"principalId": "user",
"PolicyDocument": {
"Context": {
"stringKey": "1551172564541"
},
"Version": "2012-10-17",
"Statement": [
{
"Action": "execute-api:Invoke",
"Effect": "Allow",
"Resource": "arn:aws:execute-api:eu-west-1:<account>:<ref>/*/GET/"
}
]
}}
When this is run in AWS the JSON response has the properties all in lowercase:
{
"principalId": "user",
"policyDocument": {
"context": {
"stringKey": "1551172664327"
},
"version": "2012-10-17",
"statement": [
{
"resource": "arn:aws:execute-api:eu-west-1:<account>:<ref>/*/GET/",
"action": "execute-api:Invoke",
"effect": "Allow"
}
]
}
}
Not sure if the case is the issue but I cannot see what else might be the issue (tried many variations in output).
I've tried various Jackson annotations (#JsonNaming(PropertyNamingStrategy.UpperCamelCaseStrategy.class) etc) but they do not seem to have an affect on the output in AWS.
Any idea how to sort this? Thanks.
Example code :
trying to get output to look like the example.
Running example locally using
runtime "io.micronaut:micronaut-function-web"
runtime "io.micronaut:micronaut-http-server-netty"
Lambda function handler:
AuthResponse sessionAuth(APIGatewayProxyRequestEvent event) {
AuthResponse authResponse = new AuthResponse()
authResponse.principalId = 'user'
authResponse.policyDocument = new PolicyDocument()
authResponse.policyDocument.version = "2012-10-17"
authResponse.policyDocument.setStatement([new session.auth.Statement(
Effect: Statement.Effect.Allow,
Action:"execute-api:Invoke",
Resource: "arn:aws:execute-api:eu-west-1:<account>:<ref>/*/GET/"
)])
return authResponse
}
AuthResponse looks like:
#CompileStatic
class AuthResponse {
String principalId
PolicyDocument policyDocument
}
#JsonNaming(PropertyNamingStrategy.UpperCamelCaseStrategy.class)
#CompileStatic
class PolicyDocument {
String Version
List<Statement> Statement = []
}
#JsonNaming(PropertyNamingStrategy.UpperCamelCaseStrategy.class)
#CompileStatic
class Statement {
String Action
String Effect
String Resource
}
Looks like you cannot rely on AWS lambda Java serializer to not change your JSON response if you are relying on some kind of annotation or mapper. If you want the response to be untouched you'll need to you the raw output stream type of handler.
See the end of this AWS doc Handler Input/Output Types (Java)

Amazon S3 waitFor() inside AWS Lambda

I'm having issue when calling S3.waitFor() function from inside Lambda function (Serverless nodejs). I'm trying to asynchronously write a file into Amazon S3 using S3.putObject() from one rest api, and poll the result file from another rest api using S3.waitFor() to see if the writing is ready/finished.
Please see the following snippet:
...
S3.waitFor('objectExists', {
Bucket: bucketName,
Key: fileName,
$waiter: {
maxAttempts: 5,
delay: 3
}
}, (error, data) => {
if (error) {
console.log("error:" + JSON.stringify(error))
} else {
console.log("Success")
}
});
...
Given valid bucketName and invalid fileName, when the code runs in my local test script, it returns error after 15secs (3secs x 5 retries) and generates result as follows:
error: {
"message": "Resource is not in the state objectExists",
"code": "ResourceNotReady",
"region": null,
"time": "2018-08-03T06:08:12.276Z",
"requestId": "AD621033DCEA7670",
"extendedRequestId": "JNkxddWX3IZfauJJ63SgVwyv5nShQ+Mworb8pgCmb1f/cQbTu3+52aFuEi8XGro72mJ4ik6ZMGA=",
"retryable": true,
"statusCode": 404,
"retryDelay": 3000
}
Meanwhile, when it is running inside AWS lambda function, it returns result directly as follows:
error: {
"message": "Resource is not in the state objectExists",
"code": "ResourceNotReady",
"region": null,
"time": "2018-08-03T05:49:43.178Z",
"requestId": "E34D731777472663",
"extendedRequestId": "ONMGnQkd14gvCfE/FWk54uYRG6Uas/hvV6OYeiax5BTOCVwbxGGvmHxMlOHuHPzxL5gZOahPUGM=",
"retryable": false,
"statusCode": 403,
"retryDelay": 3000
}
As you can see that the retryable and statusCode values are different between the two.
On lamba, it seems that it always get statusCode 403 when the file doesn't exists. While on my local, everything works as expected (retried 5 times every 3 seconds and received statusCode 404).
I wonder if it has anything to do with IAM role. Here's my IAM role statements settings inside my serverless.yml:
iamRoleStatements:
- Effect: "Allow"
Action:
- "logs:CreateLogGroup"
- "logs:CreateLogStream"
- "logs:PutLogEvents"
- "ec2:CreateNetworkInterface"
- "ec2:DescribeNetworkInterfaces"
- "ec2:DeleteNetworkInterface"
- "sns:Publish"
- "sns:Subscribe"
- "s3:*"
Resource: "*"
How to make it work from lambda function?
Thank you in advance!
It turned out that the key is on how you set the IAM Role for the bucket and all the objects under it.
Based on the AWS docs here, it states that S3.waitFor() is relying on the underlying S3.headObject().
Waits for the objectExists state by periodically calling the underlying S3.headObject() operation every 5 seconds (at most 20 times).
Meanwhile, S3.headObject() itself relies on HEAD Object API which has the following rule as stated on AWS Docs here:
You need the s3:GetObject permission for this operation. For more information, go to Specifying Permissions in a Policy in the Amazon Simple Storage Service Developer Guide. If the object you request does not exist, the error Amazon S3 returns depends on whether you also have the s3:ListBucket permission.
If you have the s3:ListBucket permission on the bucket, Amazon S3
will return a HTTP status code 404 ("no such key") error.
if you don’t have the s3:ListBucket permission, Amazon S3 will return
a HTTP status code 403 ("access denied") error.
It means that I need to add s3:ListBucket Action to the Bucket resource containing the objects to be able to get response 404 when the objects doesn't exist.
Therefore, I've configured the cloudformation AWS::IAM::Policy resource as below, where I added s3:Get* and s3:List* action specifically on the Bucket itself (i.e.: S3FileStorageBucket).
"IamPolicyLambdaExecution": {
"Type": "AWS::IAM::Policy",
"DependsOn": [
"IamRoleLambdaExecution",
"S3FileStorageBucket"
],
"Properties": {
"PolicyName": { "Fn::Join": ["-", ["Live-RolePolicy", { "Ref": "environment"}]]},
"PolicyDocument": {
"Version": "2012-10-17",
"Statement": [
{
"Effect":"Allow",
"Action": [
"s3:Get*",
"s3:List*"
],
"Resource": {
"Fn::Join": [
"",
[
"arn:aws:s3:::",
{
"Ref": "S3FileStorageBucket"
}
]
]
}
},
{
"Effect":"Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
],
"Resource": {
"Fn::Join": [
"",
[
"arn:aws:s3:::",
{
"Ref": "S3FileStorageBucket"
},
"/*"
]
]
}
},
...
Now I've been able to do S3.waitFor() to poll for file/object under the bucket with only a single API call and get the result only when it's ready, or throw error when the resource is not ready after a specific timeout.
That way, the client implementation will be much simpler. As it doesn't have to implement poll by itself.
Hope that someone find it usefull. Thank you.

Resources