Try to upload file from lambda tmp folder to s3 bucket - node.js

I am new to lambda function and I am trying to upload file to s3 bucket from lambda function.
creating multiple zip file to tmp folder and after that I want to upload that zip file to s3 bucket, I have given permission to access s3 bucket to lambda function its not showing any error
I tried different approach to resolve this but not able to fixed it.
import fs from 'fs';
import AWS from 'aws-sdk';
import delay from "delay";
const s3 = new AWS.S3({
accessKeyId: "***",
secretAccessKey: "***",
region: "***"
});
const uploadFullfile = () =>{
// reg ex to match
var re = /\.zip$/;
// ensure that this file is in the directory of the files you want to run the cronjob on
fs.readdir("/tmp/",function(err: any, files: any) {
if (err) {
console.log( "Could not list the directory.", err);
process.exit( 1 )
}
var matches = files.filter( function(text: any) { return re.test(text) } )
var numFiles = matches.length;
if ( numFiles ) {
// Read in the file, convert it to base64, store to S3
for(let i = 0; i < numFiles; i++ ) {
uploadCandidate(matches[i])
}
}
})
const uploadCandidate = (fileName:any) => {
fs.readFile('/tmp/'+fileName, async(err:any, data:any) => {
console.log("entry",fileName);
if (err) throw err;
console.log("params")
await s3.putObject({
Bucket: 'testbucket', // pass your bucket name
Key: fileName, // file will be saved as testBucket/contacts.csv
ContentType: 'application/zip',
Body: data,
},function (resp) {
console.log('Done');
});
//delay(1000);
//fs.unlink('/tmp/'+fileName, function(){
// console.log("deleting file");
// console.log('/tmp/'+fileName);
//});
});
}
}
export default uploadFullfile;
I am not getting any error and I have give permission to access s3 bucket
output I am getting
2021-01-14T17:22:38.354+05:30 2021-01-14T11:52:38.354Z *** INFO entry state_fullfile_2021-01-14-11:01:03_part0.zip
2021-01-14T17:22:38.354+05:30 2021-01-14T11:52:38.354Z *** INFO params
2021-01-14T17:22:38.375+05:30 2021-01-14T11:52:38.374Z *** INFO entry association_fullfile_2021-01-14-11:01:03_part5.zip
2021-01-14T17:22:38.375+05:30 2021-01-14T11:52:38.375Z *** INFO params
2021-01-14T17:22:38.378+05:30 2021-01-14T11:52:38.378Z *** INFO entry remark_table_fullfile_2021-01-14-11:01:03_part1.zip
2021-01-14T17:22:38.378+05:30 2021-01-14T11:52:38.378Z **** INFO params
2021-01-14T17:22:38.394+05:30 END RequestId: ****
2021-01-14T17:22:38.394+05:30 REPORT RequestId: *** Duration: 83.91 ms Billed Duration: 84 ms Memory Size: 1024 MB Max Memory Used: 322 MB

did you tried increasing the lambda function time and tried?

This Problem occurs due to permission issue of VPC endpoint. Here is the solution
new-vpc-endpoint-for-amazon-s3

Related

AWS lambda never completes but doesn't appear to timeout either

I'm attempting to create a simple application. A user emails an attachment to a special inbox, AWS SES gets the email and stores it in S3, a lambda function is triggered and the lambda finds the email in S3, parses out the attachment (in this case a jpg) and then stores it in a different S3 bucket. Finally, the application creates a new row in an Airtable with the image as an attachment.
When I invoked this function locally using serverless, everything works fine. The email with the image are already stored in an S3 bucket so I've created a mock which passes the key explicitly to my lambda. However, when I deploy the application and send a test email, the following happens:
Email is stored in S3 bucket 'to-airtable-temp'
Lambda function is called
Before the email can be found and the attachment striped off and stored in a seperate S3 bucket the function just stops. No error message or timeout. It just stops. Cloudwatch logs look like the following:
START RequestId: 6a0364ae-0879-4ee1-9dcd-c8747de1a650 Version: $LATEST
2020-02-17T07:39:55.465Z 6a0364ae-0879-4ee1-9dcd-c8747de1a650 INFO {
s3SchemaVersion: '1.0',
configurationId: 'emailtoairtable-dev-parse-224c3963d3a3f9c35018ae93a9fffea4',
bucket: {
name: 'to-airtable-temp',
ownerIdentity: { principalId: 'AI5RGHFD5AFHE' },
arn: 'arn:aws:s3:::to-airtable-temp'
},
object: {
key: 'mtvuuiokqj55l2a8b0qser7tn9dhfignoh9c1vg1',
size: 3804230,
eTag: 'c821fb0a2a9c3b060e20e7d177f8b972',
sequencer: '005E4A434810147365'
}
}
2020-02-17T07:39:55.465Z 6a0364ae-0879-4ee1-9dcd-c8747de1a650 INFO key mtvuuiokqj55l2a8b0qser7tn9dhfignoh9c1vg1
2020-02-17T07:39:55.465Z 6a0364ae-0879-4ee1-9dcd-c8747de1a650 INFO Key pushed to list. mtvuuiokqj55l2a8b0qser7tn9dhfignoh9c1vg1
END RequestId: 6a0364ae-0879-4ee1-9dcd-c8747de1a650
REPORT RequestId: 6a0364ae-0879-4ee1-9dcd-c8747de1a650 Duration: 1113.17 ms Billed Duration: 1200 ms Memory Size: 1024 MB Max Memory Used: 114 MB Init Duration: 119.56 ms
Here is my handler.js file:
'use strict';
module.exports.parse = async event => {
try {
const aws = require('aws-sdk');
const s3 = new aws.S3();
const simpleParser = require('mailparser').simpleParser;
const Airtable = require('airtable');
const dateformat = require('dateformat');
var base = new Airtable({ apiKey: process.env.airtableApiKey}).base(process.env.airtableBaseId);
var data = [];
var keys = [];
event["Records"].forEach(async record => {
console.log(record["s3"]);
console.log('key', record["s3"]["object"]["key"]);
keys.push(record["s3"]["object"]["key"]);
console.log('Key pushed to list. ', record["s3"]["object"]["key"]); // <-- this is the last line that I am sure processes because I see it in the CloudWatch logs.
var temp_data = await s3.getObject(
{
Bucket: 'to-airtable-temp',
Key: record["s3"]["object"]["key"]
}).promise();
console.log('temp_data', temp_data);
data.push(temp_data);
});
setTimeout( async function() {
// console.log('data', data[0].Body.toString());
let parsed = await simpleParser(data[0].Body.toString());
console.log(parsed);
// save the file to a public S3 bucket so it can be uploaded to airtable
parsed["attachments"].forEach(function(attachment) {
let now = new Date();
s3.upload({
Bucket: 'to-airtable-images',
Key: keys[0] + dateformat(now, "yyyy-mm-dd") + '.jpg',
Body: attachment.content,
ContentType: "image/jpeg"
},
function(error, data) {
if (error) {
throw error;
}
console.log('File uploaded successfully. ' +data.Location);
// Now upload to airtable
base('Input').create([
{
"fields": {"Attachments": [
{
"url": data.Location
}
]}
}
], function(err, records) {
if (err) {
console.error(err);
return;
}
records.forEach(function (record) {
console.log(record.getId());
});
});
});
});
return {
statusCode: 200,
body: JSON.stringify(
{
message: 'Go Serverless v1.0! Your function executed successfully!',
input: event,
data: JSON.stringify(data),
},
null,
2
),
};
}, 500); // I've tried increasing this time but it still hangs.
} catch (error) {
console.error(error);
}
};
You shouldn't use async/await with the forEach function. Using async/await with a forEach loop. Instead, use the more modern for of syntax:
for (let record of event["Records"]) {
// you can include await calls in this block
}

Reading a ZIP archive from S3, and writing uncompressed version to new bucket

I have an app where user can upload a ZIP archive of resources. My app handles the upload and saves this to S3. At some point I want to run a transformation that will read this S3 bucket unzip it, and write it to a new S3 bucket. This is all happening on a node service.
I am using the unzipper library to handle unzipping. Here is my initial code.
async function downloadFromS3() {
let s3 = new AWS.S3();
try {
const object = s3
.getObject({
Bucket: "zip-bucket",
Key: "Archive.zip"
})
.createReadStream();
object.on("error", err => {
console.log(err);
});
await streaming_unzipper(object, s3);
} catch (e) {
console.log(e);
}
}
async function streaming_unzipper(s3ObjectStream, s3) {
await s3.createBucket({ Bucket: "unzip-bucket" }).promise();
const unzipStream = s3ObjectStream.pipe(unzipper.Parse());
unzipStream.pipe(
stream.Transform({
objectMode: true,
transform: function(entry, e, next) {
const fileName = entry.path;
const type = entry.type; // 'Directory' or 'File'
const size = entry.vars.uncompressedSize; // There is also compressedSize;
if (type === "File") {
s3.upload(
{ Bucket: "unzip-bucket", Body: entry, Key: entry.path },
{},
function(err, data) {
if (err) console.error(err);
console.log(data);
entry.autodrain();
}
);
next();
} else {
entry.autodrain();
next();
}
}
})
);
This code is works but I feel like it could be optimized. Ideally I would like to pipe the download stream -> unzipper stream -> uploader stream. So that chunks are uploaded to S3 as they get unzipped, instead of waiting for the full fill uzip to finish then uploading.
The problem I am running into is that I need the file name (to set as an S3 key), which I only have after unzipping. Before I can start to upload.
Is there any good way to create a streaming upload to S3. Initiated with a temporaryId, that gets rewritten with the final final name after the full stream is finished.

Serverless lambda trigger read json file

I have lambda (Node) which has trigger to fire when a new JSON file added to our S3 bucket. Here is my lambda code
module.exports.bookInfo = (event, context) => {
console.log('Events ', JSON.stringify(event));
event.Records.forEach((record) => {
const filename = record.s3.object.key;
const bucketname = record.s3.bucket.name;
let logMsg = [];
const s3File = `BucketName: [${bucketname}] FileName: [${filename}]`;
console.log(s3File)
logMsg.push(`Lambda execution started for ${s3File}, Trying to download file from S3`);
try {
s3.getObject({
Bucket: bucketname,
Key: filename
}, function(err, data) {
logMsg.push('Data is ', JSON.stringify(data.Body))
if (err) {
logMsg.push('Generate Error :', err);
console.log(logMsg)
return null;
}
logMsg.push(`File downloaded successfully. Processing started for ${s3File}`);
logMsg.push('Data is ', JSON.stringify(data.Body))
});
} catch (e) {console.log(e)}
});
}
When i run this, i don't get file content and i suspect that lambda finishes execution before file read operation complete. I tried with async await without success. What i am missing here ? I was able to read small file of 1 kb but when my file grows like 100 MB, it causes issue.
Thanks in advance
I was able to do it through async/await. Here is my code
module.exports.bookInfo = (event, context) => {
event.Records.forEach(async(record) => {
const filename = record.s3.object.key;
const bucketname = record.s3.bucket.name;
const s3File = `BucketName: [${bucketname}] FileName: [${filename}]`;
logMsg.push(`Lambda execution started for ${s3File}, Trying to download file from S3`);
let response = await s3.getObject({
Bucket: bucketname,
Key: filename
}).promise();
})
}

Lambda function unable to create file on S3

I have written the following code to be used in AWS Lambda function. The objective is to
Fetch data from MSSQL DB
Create a file from this data in S3 bucket.
Create a file on FTP Server.
This whole thing is working just fine but as soon as I push this code to AWS Lambda, it fails. It is able to fetch the data but not able to create file on S3. Here is the complete code:
"use strict";
var express = require('express');
var app = express();
var config = require('./config/dev');
var sql = require("mssql");
var AWS = require('aws-sdk');
var PromiseFtp = require('promise-ftp');
var fs = require('fs');
exports.handler = (event, context, callback) => {
CreateFileOnS3("This is a sample text file created by lambda function", event, context, callback);
};
function CreateFileOnS3(fileData, event, context, callback) {
const fileName = generateFileName();
console.log('Sending file to S3...');
const s3 = new AWS.S3(config.awsCredentials);
const params = {
Bucket: config.app.s3Bucket,
Key: fileName,
Body: fileData
};
s3.upload(params, function (s3Err, data) {
if (s3Err) {
console.log('There was an error creating file on S3');
callback(true, 'There was an error creating file on S3');
}
else {
console.log(`File uploaded successfully at ${data.Location}`);
callback(null, 'File uploaded successfully at ${data.Location}');
}
});
}
function generateFileName() {
var _d = new Date(),
y = _d.getFullYear(),
m = _d.getMonth() + 1,
d = _d.getDate();
return y + '-' + (m < 10 ? '0' + m : m) + '-' + (d < 10 ? '0' + d : d) + '.txt';
}
Here is what the CloudWatch log says:
10:38:51
START RequestId: 0965c2ef-94a2-439e-b61b-83f17b5ad3d3 Version: $LATEST
10:38:51
2019-04-22T10:38:51.737Z 0965c2ef-94a2-439e-b61b-83f17b5ad3d3
Fetching data from database...
10:38:53
2019-04-22T10:38:53.364Z 0965c2ef-94a2-439e-b61b-83f17b5ad3d3
Sending file to S3...
10:39:21
END RequestId: 0965c2ef-94a2-439e-b61b-83f17b5ad3d3
10:39:21
REPORT RequestId: 0965c2ef-94a2-439e-b61b-83f17b5ad3d3 Duration: 30030.33 ms Billed Duration: 30000 ms Memory Size: 512 MB Max Memory Used: 95 MB
10:39:21
2019-04-22T10:39:21.760Z 0965c2ef-94a2-439e-b61b-83f17b5ad3d3 Task timed out after 30.03 seconds
Memory: 2048MB Timeout: 60Sec
EDIT: Updated code only sends the string to be created as text file on S3 which is also not working.
Check the timeout that's set on the lambda script. Increase it to 2 minutes.

AWS SDK in node js - Get object from sub directory

I'm using the aws-sdk for Nodejs and I'm trying to get an object that's located in a sub-directory.
But I don't get any files back.
The file path is: mybucket/subfoldername/file.txt
getObject = function getListObjects(bucketName, bucketPath, fileName) {
return new Promise((resolve, reject) => {
let params = {Key: fileName, Bucket: bucketName+bucketPath};
s3.getObject(params, function (error, data) {
if (error){
console.log(`Can't connect to S3. S3 Path = [${fileName}] to Bucket = [${params.Bucket}]`, error);
return reject(error);
} else {
console.log(`Found files in S3. S3 Path =[${fileName}] to Bucket = [${params.Bucket}]`);
return resolve(data.Contents);
}
});
});
};
The arguments I pass are:
bucketName: "mybucket"
bucketPath: "/subfoldername"
fileName: "file.txt"
Please advise.
The Keyargument should include the "path" string, too, as S3 bucket files are referenced by its complete path:
let params = { Key: `${bucketPath}/${fileName}`, Bucket: bucketName };
I've taken the liberty of using template quotes there (``). Also, notice how I've added a "/" separator, please check that it's not already included in bucketPath.

Resources