Serverless lambda trigger read json file - node.js

I have lambda (Node) which has trigger to fire when a new JSON file added to our S3 bucket. Here is my lambda code
module.exports.bookInfo = (event, context) => {
console.log('Events ', JSON.stringify(event));
event.Records.forEach((record) => {
const filename = record.s3.object.key;
const bucketname = record.s3.bucket.name;
let logMsg = [];
const s3File = `BucketName: [${bucketname}] FileName: [${filename}]`;
console.log(s3File)
logMsg.push(`Lambda execution started for ${s3File}, Trying to download file from S3`);
try {
s3.getObject({
Bucket: bucketname,
Key: filename
}, function(err, data) {
logMsg.push('Data is ', JSON.stringify(data.Body))
if (err) {
logMsg.push('Generate Error :', err);
console.log(logMsg)
return null;
}
logMsg.push(`File downloaded successfully. Processing started for ${s3File}`);
logMsg.push('Data is ', JSON.stringify(data.Body))
});
} catch (e) {console.log(e)}
});
}
When i run this, i don't get file content and i suspect that lambda finishes execution before file read operation complete. I tried with async await without success. What i am missing here ? I was able to read small file of 1 kb but when my file grows like 100 MB, it causes issue.
Thanks in advance

I was able to do it through async/await. Here is my code
module.exports.bookInfo = (event, context) => {
event.Records.forEach(async(record) => {
const filename = record.s3.object.key;
const bucketname = record.s3.bucket.name;
const s3File = `BucketName: [${bucketname}] FileName: [${filename}]`;
logMsg.push(`Lambda execution started for ${s3File}, Trying to download file from S3`);
let response = await s3.getObject({
Bucket: bucketname,
Key: filename
}).promise();
})
}

Related

Use AWS Lambda to get CloudWatch Alarm States

I have updated my original post as have got a bit further and have the querying of my CW Alarms part of my code working. The below now outputs the state of my CW Alarms in the console, and in the format I want. What I'm now trying to do is take the output and upload this as a text file to an S3 bucket. Is this possible?
CW Alarm Code
import { CloudWatchClient, DescribeAlarmsCommand } from "#aws-sdk/client-cloudwatch";
const REGION = "eu-west-2";
const cwclient = new CloudWatchClient({ region: REGION });
export const handler = async() => {
const cwparams = {};
const cw = new DescribeAlarmsCommand(cwparams);
try {
const cwdata = await cwclient.send(cw);
cwdata.MetricAlarms.forEach(function (item) {
console.log('\n%j', {alarmname:item.AlarmName,alarmstate:item.StateValue});
});
} catch (error) {
}
};
Output
Function Logs
START RequestId: xxxxxxxxxxxxxxxxxxx Version: $LATEST
2022-11-30T09:48:34.655Z xxxxxxxxxxxxxxxxxxx INFO
{"alarmname":"my-alarm-1","alarmstate":"OK"}
2022-11-30T09:48:34.655Z xxxxxxxxxxxxxxxxxxx INFO
{"alarmname":"my-alarm-2","alarmstate":"OK"}
END RequestId: xxxxxxxxxxxxxxxxxxx
I have looked at the sdk for the s3 PutObjectCommand and have tested the below, which allows me to upload a file with some text content, but I'm not sure how I can combine my CW Alarm data with this code, so that the "Body" of the text file is my CW Alarm data.
S3 Code
import { S3Client, PutObjectCommand } from "#aws-sdk/client-s3";
export const handler = async() => {
const bucketName = "mybucket";
const keyName = "test.json";
const s3 = new S3Client({});
const s3putCommand = new PutObjectCommand({
Bucket: bucketName,
Key: keyName,
Body: "Hello" // I would like this to be my CW Alarm data
});
try {
await s3.send(s3putCommand);
console.log('Successfully uploaded data to ' + bucketName + '/' + keyName);
} catch (error) {
}
};
Output
Function Logs
START RequestId: xxxxxxxxxxxxxxxxxxx Version: $LATEST
2022-11-30T09:56:45.585Z xxxxxxxxxxxxxxxxxxx INFO Successfully uploaded data to mybucket/test.json
END RequestId: xxxxxxxxxxxxxxxxxxx
My goal is to end up with the test.json file looking like this:
{"alarmname":"my-alarm-1","alarmstate":"OK"} {"alarmname":"my-alarm-2","alarmstate":"OK"}
Thanks.
You are using an outdated AWS SDK for JavaScript. Refer to the new AWS Code Library for the latest recommended SDK to use here:
URL is:
https://docs.aws.amazon.com/code-library/latest/ug/javascript_3_cloudwatch_code_examples.html
With help from a colleague I have found the answer to this. As long as in your lambda function your index file is named "index.mjs"
import { S3Client, PutObjectCommand } from "#aws-sdk/client-s3";
import { CloudWatchClient, DescribeAlarmsCommand } from "#aws-sdk/client-cloudwatch";
import { Upload } from "#aws-sdk/lib-storage";
const REGION = "eu-west-2";
const cwclient = new CloudWatchClient({ region: REGION });
export const handler = async () => {
const cwparams = {};
const cw = new DescribeAlarmsCommand(cwparams);
const alarmData = [];
const bucketName = "mybucket";
const keyName = "test.json";
const s3 = new S3Client({});
try {
const cwdata = await cwclient.send(cw);
cwdata.MetricAlarms.forEach(function (item) {
alarmData.push({
alarmname: item.AlarmName,
alarmstate: item.StateValue,
});
});
} catch (error) {}
const s3putCommand = new Upload({
client: s3,
params: {
Bucket: bucketName,
Key: keyName,
Body: JSON.stringify(alarmData),
},
});
try {
const data = await s3putCommand.done();
console.log(data);
} catch (error) {
console.log(error);
}
};

How do I upload a large Audio file longer than 30sec direct from the browser to AwS S3?

I would like to save audio recording to S3. I am using the functions below to load direct to awsS3 direct from the browser. It works for short audio recordings of up to around 25 seconds but fails for larger files.
Currently the functions is as follows: I speak into the microphone using recorder.js. Once the recording is complete I press stop which then saves the file to AWS
From the browser:
getSignedRequest(file,fileLoc);
function getFetchSignedRequest(file,fileLoc){
const fetchUrl = `/xxxxxxxxx?file-name=${file.name}&file-type=${file.type}&fileLoc=${fileLoc}`;
fetch(fetchUrl )
.then((response) => {
console.log('response',response)
if(!response.ok){
console.log('Network response was not OK',response.ok)
} else {
putAudioFetchFile(file, response.signedRequest, response.url)
}
})
.catch((error) => {
console.error('Could not get signed URL:', error);
})
}
This send a get request to the NodeJs server which calls this :
const aws = require('aws-sdk');
const fs = require('fs');
aws.config.region = 'xxxxxx';
const S3_BUCKET = process.env.AWS_S3_BUCKET
this.uploadToAWSDrive =
async function uploadToAWSDrive(req,res){
const s3 = new aws.S3();
const URL_EXPIRATION_SECONDS = 3000;
const subFolderName = req.query['fileLoc'];
const fileName = req.query['file-name'];
const fileType = req.query['file-type'];
const fileLocName = subFolderName + fileName;
const s3Params = {
Bucket: S3_BUCKET,
Key: fileLocName,
Expires: URL_EXPIRATION_SECONDS,
ContentType: fileType,
ACL: 'public-read'
};
await s3.getSignedUrl('putObject', s3Params, (err, data) => {
if(err){
console.log(err);
return res.end();
}
const returnData = {
signedRequest: data,
url: `https://${S3_BUCKET}.s3.amazonaws.com/${fileLocName}`
};
console.log('audio uploaded',returnData)
res.write(JSON.stringify(returnData));
res.end();
});
}
Which then calls this:
function uploadFile(file, signedRequest, url){
const xhr = new XMLHttpRequest();
xhr.open('PUT', signedRequest);
xhr.onreadystatechange = () => {
if(xhr.readyState === 4){
if(xhr.status === 200){
console.log('destination url= ', url,xhr.readyState,xhr.status)
}
else{
alert('Could not upload file.');
}
}
};
xhr.send(file);
}
This then sends the file to the awsS3 server. Ok for audio less than 30secs, but fails for longer audio files.
What do I need to do to enable this to work with audio files of greater than 20secs and upto 3 mins?
Any help most appreciated
Not very elegant but the issue was resolved by adding a timer to the origanal function call. A function that followed also needed to be delayed to I think allow processor time. I am sure there will be better ways to do this.
setTimeout( getSignedRequest( myAudioFile,fileLoc), proccessTime) ;

Read data from .xlsx file on S3 using Nodejs Lambda

I'm still new in NodeJs and AWS, so forgive me if this is a noob question.
I am trying to read the data from an excel file (.xlsx). The lambda function receives the extension of the file type.
Here is my code:
exports.handler = async (event, context, callback) => {
console.log('Received event:', JSON.stringify(event, null, 2));
if (event.fileExt === undefined) {
callback("400 Invalid Input");
}
let returnData = "";
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3();
switch(event.fileExt)
{
case "plain":
case "txt":
// Extract text
const params = {Bucket: 'filestation', Key: 'MyTXT.'+event.fileExt};
try {
await s3.getObject(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else{ // successful response
returnData = data.Body.toString('utf-8');
context.done(null, returnData);
}
}).promise();
} catch (error) {
console.log(error);
return;
}
break;
case "xls":
case "xlsx":
returnData = "Excel";
// Extract text
const params2 = {Bucket: 'filestation', Key: 'MyExcel.'+event.fileExt};
const readXlsxFile = require("read-excel-file/node");
try {
const doc = await s3.getObject(params2);
const parsedDoc = await readXlsxFile(doc);
console.log(parsedDoc)
} catch (err) {
console.log(err);
const message = `Error getting object.`;
console.log(message);
throw new Error(message);
}
break;
case "docx":
returnData = "Word doc";
// Extract text
break;
default:
callback("400 Invalid Operator");
break;
}
callback(null, returnData);
};
The textfile part works. But the xlsx part makes the function time out.
I did install the read-excel-file dependency and uploaded the zip so that I have access to it.
But the function times out with this message:
"errorMessage": "2020-11-02T13:06:50.948Z 120bfb48-f29c-4e3f-9507-fc88125515fd Task timed out after 3.01 seconds"
Any help would be appreciated! Thanks for your time.
using the xlsx npm library. here's how we did it.
assuming the file is under the root project path.
const xlsx = require('xlsx');
// read your excel file
let readFile = xlsx.readFile('file_example_XLSX_5000.xlsx')
// get first-sheet's name
let sheetName = readFile.SheetNames[0];
// convert sheets to JSON. Best if sheet has a headers specified.
console.log(xlsx.utils.sheet_to_json(readFile.Sheets[sheetName]));
You need to install xlsx (SheetJs) library into the project:
npm install xlsx
and then import the "read" function into the lambda, get the s3 object's body and send to xlsx like this:
const { read } = require('sheetjs-style');
const aws = require('aws-sdk');
const s3 = new aws.S3({ apiVersion: '2006-03-01' });
exports.handler = async (event) => {
const bucketName = 'excel-files';
const fileKey = 'Demo Data.xlsx';
// Simple GetObject
let file = await s3.getObject({Bucket: bucketName, Key: fileKey}).promise();
const wb = read(file.Body);
const response = {
statusCode: 200,
body: JSON.stringify({
read: wb.Sheets,
}),
};
return response;
};
(of course, you can receive the bucket and filekey from parameters if you send them...)
Very Important: Use the READ (not the readFile) function and send the Body property (with capital "B") as a paremeter
I changed the timeout to 20 seconds and it works. Only one issue remains: const parsedDoc = await readXlsxFile(doc); wants to receive a string (filepath) and not a file.
Solved by using xlsx NPM library. Using a stream and giving it buffers.

multiple Images are not fully uploaded to S3 upon first lambda call

i have issue with uploading multiple files to s3.
what my lambda is doing:
1. uploading single file to s3 ( always work).
2. resizing the file to 4 new sizes (using sharp).
3. upload the resized files to s3.
the problem : sometimes only 2 or 3 out of 4 resized files are uploaded.
the surprising thing is that i noticed that on the next upload - the missing files from previous upload are added.
have no errors, i was thinking that this can be async issue so i awaited the right places to make it Synchronous.
will appreciate any help.
async function uploadImageArrToS3(resizeImagesResponse) {
return new Promise(async function (resolve, reject) {
var params = {
Bucket: bucketName,
ACL: 'public-read'
};
let uploadImgArr = resizeImagesResponse.map(async (buffer) => {
params.Key = buffer.imgParamsArray.Key;
params.Body = buffer.imgParamsArray.Body;
params.ContentType = buffer.imgParamsArray.ContentType;
let filenamePath = await s3.putObject(params, (e, d) => {
if (e) {
reject(e);
} else {
d.name = params.ContentType;
return (d.name);
}
}).params.Key
let parts = filenamePath.split("/");
let fileName = parts[parts.length - 1];
return {
fileName: fileName,
width: buffer.width
};
});
await Promise.all(uploadImgArr).then(function (resizedFiles) {
console.log('succesfully resized the image!');
resolve(resizedFiles);
});
})
}

Javascript AWS SDK S3 upload method with Body stream generating empty file

I'm trying to use the method upload from s3 using a ReadableStream from the module fs.
The documentation says that a ReadableStream can be used at Bodyparam:
Body — (Buffer, Typed Array, Blob, String, ReadableStream) Object data.
Also the upload method description is:
Uploads an arbitrarily sized buffer, blob, or stream, using intelligent concurrent handling of parts if the payload is large enough.
Also, here: Upload pdf generated to AWS S3 using nodejs aws sdk the #shivendra says he can use a ReadableStream and it works.
This is my code:
const fs = require('fs')
const S3 = require('aws-sdk/clients/s3')
const s3 = new S3()
const send = async () => {
const rs = fs.createReadStream('/home/osman/Downloads/input.txt')
rs.on('open', () => {
console.log('OPEN')
})
rs.on('end', () => {
console.log('END')
})
rs.on('close', () => {
console.log('CLOSE')
})
rs.on('data', (chunk) => {
console.log('DATA: ', chunk)
})
console.log('START UPLOAD')
const response = await s3.upload({
Bucket: 'test-bucket',
Key: 'output.txt',
Body: rs,
}).promise()
console.log('response:')
console.log(response)
}
send().catch(err => { console.log(err) })
It's getting this output:
START UPLOAD
OPEN
DATA: <Buffer 73 6f 6d 65 74 68 69 6e 67>
END
CLOSE
response:
{ ETag: '"d41d8cd98f00b204e9800998ecf8427e"',
Location: 'https://test-bucket.s3.amazonaws.com/output.txt',
key: 'output.txt',
Key: 'output.txt',
Bucket: 'test-bucket' }
The problem is that my file generated at S3 (output.txt) has 0 Bytes.
Someone know what am I doing wrong?
If I pass a buffer on Body it works.
Body: Buffer.alloc(8 * 1024 * 1024, 'something'),
But it's not what I want to do. I'd like to do this using a stream to generate a file and pipe a stream to S3 as long as I generate it.
It's an API interface issue using NodeJS ReadableStreams.
Just comment the code related to listen event 'data', solves the problem.
const fs = require('fs')
const S3 = require('aws-sdk/clients/s3')
const s3 = new S3()
const send = async () => {
const rs = fs.createReadStream('/home/osman/Downloads/input.txt')
rs.on('open', () => {
console.log('OPEN')
})
rs.on('end', () => {
console.log('END')
})
rs.on('close', () => {
console.log('CLOSE')
})
// rs.on('data', (chunk) => {
// console.log('DATA: ', chunk)
// })
console.log('START UPLOAD')
const response = await s3.upload({
Bucket: 'test-bucket',
Key: 'output.txt',
Body: rs,
}).promise()
console.log('response:')
console.log(response)
}
send().catch(err => { console.log(err) })
Though it's an strange API, when we listen to 'data' event, the ReadableStream starts the flowing mode (listening to an event changing publisher/EventEmitter state? Yes, very error prone...). For some reason the S3 need a paused ReadableStream. If whe put rs.on('data'...) after await s3.upload(...) it works. If we put rs.pause() after rs.on('data'...) and befote await s3.upload(...), it works too.
Now, what does it happen? I don't know yet...
But the problem was solved, even it isn't completely explained.
Check if file /home/osman/Downloads/input.txt actually exists and accessible by node.js process
Consider to use putObject method
Example:
const fs = require('fs');
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3();
s3.putObject({
Bucket: 'test-bucket',
Key: 'output.txt',
Body: fs.createReadStream('/home/osman/Downloads/input.txt'),
}, (err, response) => {
if (err) {
throw err;
}
console.log('response:')
console.log(response)
});
Not sure how this will work with async .. await, better to make upload to AWS:S3 work first, then change the flow.
UPDATE:
Try to implement upload directly via ManagedUpload
const fs = require('fs');
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3();
const upload = new S3.ManagedUpload({
service: s3,
params: {
Bucket: 'test-bucket',
Key: 'output.txt',
Body: fs.createReadStream('/home/osman/Downloads/input.txt')
}
});
upload.send((err, response) => {
if (err) {
throw err;
}
console.log('response:')
console.log(response)
});

Resources