Read a file line by line using Lambda / S3 - node.js

I want to read a file line by line located on S3. I tried the following code which I found searching online, but the Lambda function is exiting without invoking any of the readline callbacks. What am I doing wrong?
const aws = require('aws-sdk');
const s3 = new aws.S3({ apiVersion: '2006-03-01' });
const readline = require('readline');
exports.handler = async (event, context, callback) => {
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
const params = {
Bucket: bucket,
Key: key,
};
const s3ReadStream = s3.getObject(params).createReadStream();
const rl = readline.createInterface({
input: s3ReadStream,
terminal: false
});
rl.on('line', (line) => {
console.log(`Line from file: ${line}`);
});
rl.on('error', () => {
console.log('error');
});
rl.on('close', function () {
console.log('closed');
context.succeed();
});
console.log('done');
};

I've found the problem. It's being awhile that I haven't coded on Lambda and I thought it would only exit when context was called. I'm now waiting for the promise to be resolved (or rejected which I'll implement later).
const aws = require('aws-sdk');
const s3 = new aws.S3({ apiVersion: '2006-03-01' });
const readline = require('readline');
exports.handler = async (event, context, callback) => {
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
const params = {
Bucket: bucket,
Key: key,
};
const s3ReadStream = s3.getObject(params).createReadStream();
const rl = readline.createInterface({
input: s3ReadStream,
terminal: false
});
let myReadPromise = new Promise((resolve, reject) => {
rl.on('line', (line) => {
console.log(`Line from file: ${line}`);
});
rl.on('error', () => {
console.log('error');
});
rl.on('close', function () {
console.log('closed');
resolve();
});
});
try { await myReadPromise; }
catch(err) {
console.log('an error has occurred');
}
console.log('done reading!');
};

getObject doesn't just return the object that was stored S3. It return a JSON object whose Body field holds the blob of the object stored to S3. See also in the Response part of the documentation here.

Related

How to read JSON from S3 by AWS Lambda Node.js 18.x runtime?

#TBA gave the solution.
The root cause is not by runtime. It came from SDK v3.
Point: Do not update the code with mixed things (like both of runtime & SDK version together 🥲)
Thanks again, TBA.
I was using Node.js 14.x version runtime Lambda to read some json file from S3.
Brief code is below
const AWS = require("aws-sdk");
const s3 = new AWS.S3();
exports.handler = (event) => {
const { bucketName, objKey } = event
const params = {
Bucket: bucketName,
Key: objKey
};
return new Promise((resolve) => {
s3.getObject(params, async (err, data) =>{
if (err) console.log(err, err.stack);
else {
const contents = JSON.parse(data.Body)
resolve(contents);
}
});
})
};
and it returned the json data as I expected.
And today I tried to create a new lambda with runtime Node.js 18.x but it returned null or some errors...
Q) Could you give me some advice to solve this 🥲 ?
+) I used same json file for each lambda
+) Not sure why, but in my case, data.Body.toString() didn't work (I saw some answers in stackoverflow provide that and tried but no lucks)
Thanks in advance!
Case A (returns null)
import { S3Client, GetObjectCommand } from "#aws-sdk/client-s3";
const s3Client = new S3Client({ region: "ap-northeast-2" });
export const handler = (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
return new Promise((resolve) => {
s3Client.send(getObjCommand, async (err, data) =>{
if (err) console.log(err, err.stack);
else {
const list = JSON.parse(data.Body)
resolve(list);
}
});
})
};
Case B (returns "Unexpected token o in JSON at position 1")
export const handler = async (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
const response = await s3Client.send(getObjCommand)
console.log("JSON.parse(response.Body)", JSON.parse(response.Body))
};
Case C (returns "TypeError: Converting circular structure to JSON")
export const handler = async (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
try {
const response = await s3Client.send(getObjCommand)
return JSON.stringify(response.Body)
} catch(err) {
console.log("error", err)
return err
}
};

Why doesn't this CSV parsing Node Lambda log anything?

Runtime env is Node 14 (AWS Lambda function). The S3 bucket and Lambda function are in the same region, and the I have confirmed that the Lambda function is able to get the object from S3 (i.e. permissions does not seem to be an issue). The Lambda triggers upon PUT of an object (a very simple CSV file) into the S3 bucket. No errors or exceptions appear in the CloudWatch log stream.
package.json
{
"dependencies": {
"#fast-csv/parse": "4.3.6"
}
}
index.js
const aws = require('aws-sdk');
const s3 = new aws.S3({region: 'us-east-2'});
const fs = require('fs');
const csv = require('#fast-csv/parse');
exports.handler = async (event, context) => {
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
const params = {
Bucket: bucket,
Key: key,
};
const file = await s3.getObject(params).promise();
fs.createReadStream(file.Body).pipe(csv.parse())
.on('error', (error) => console.error(error))
.on('data', (row) => console.log(row))
.on('end', (rowCount) => console.log(`Parsed ${rowCount} rows`));
};
I also tried the following variation, which had the same outcome:
index.js (variant)
const aws = require('aws-sdk');
const s3 = new aws.S3({region: 'us-east-2'});
const fs = require('fs');
const csv = require('fast-csv');
exports.handler = async (event, context) => {
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
const params = {
Bucket: bucket,
Key: key,
};
const file = await s3.getObject(params).promise();
const stream = fs.createReadStream(file.Body);
csv.parseStream(stream)
.on('data', (data) => {
console.info('Data: ' + JSON.stringify(data));
})
.on('data-invalid', (data) => {
console.error('Invalid batch row ' + data);
})
.on('end', () => {
console.info('End of Stream');
})
.on('error', (error) => {
let message = "Error in csv stream processing";
console.error(message, ":", error);
}
);
};
Note: I already tried simply doing await s3.getObject(params).createReadStream(), but that results in undefined whereas getting the object via promise() first gets the object data.
I've been wrestling with this for hours, so any help is appreciated. Thanks!

AWS Lambda Custom Nodejs Container Shows Runtime Error

I have built a AWS Lambda function with custom container image. I am trying to convert an excel file to pdf with Libreoffice - getting the file from S3 and saving it to a file and converting it to pdf and then uploading it back to S3.
Here the code.
const fs = require('fs');
const getStream = require('get-stream');
const { Readable } = require('stream')
const { S3Client, GetObjectCommand, PutObjectCommand } = require("#aws-sdk/client-s3");
const libre = require('libreoffice-convert');
const path = require('path');
exports.handler = async (event) => {
const bucket = event.queryStringParameters.bucket;
const file = event.queryStringParameters.file;
const convertedFile = event.queryStringParameters.convertedFile;
if (event.queryStringParameters['warmup'] !== undefined) {
return {
result: true,
message: 'warmed up'
}
}
const client = new S3Client({ region: "ap-south-1" });
const command = new GetObjectCommand({ Bucket: bucket, Key: file });
const response = await client.send(command);
const objectData = response.Body;
const writeStream = fs.createWriteStream("/tmp/sample.xlsx");
objectData.pipe(writeStream);
var end = new Promise((resolve, reject) => {
objectData.on('close', resolve(true));
objectData.on('end', resolve(true));
objectData.on('error', reject(false));
});
let completed = await end;
if (completed) {
const extend = '.pdf'
const outputPath = `/tmp/sample${extend}`;
const enterPath = '/tmp/sample.xlsx';
var readingFile = new Promise((resolve, reject) => {
fs.readFile(enterPath, (err, data)=>{
if (err) {
reject(false);
}
resolve(data);
});
});
var fileData = await readingFile;
var converting = new Promise((resolve, reject) => {
libre.convert(fileData, extend, undefined, (err, done) => {
if (err) {
reject(false)
}
fs.writeFileSync(outputPath, done);
resolve(true)
});
})
var converted = await converting;
if (converted) {
var convertedFileStream = fs.createReadStream(outputPath);
const uploadCommand = new PutObjectCommand({ Bucket: bucket, Key: convertedFile, Body: convertedFileStream });
const lastResponse = await client.send(uploadCommand);
const returnResponse = {
result: true,
message: 'success',
bucket: event.queryStringParameters.bucket,
file: event.queryStringParameters.file,
convertedFile: event.queryStringParameters.convertedFile
};
if (event.queryStringParameters['returnEvent'] !== undefined) {
returnResponse['returnEvent'] = event;
}
return returnResponse;
}
}
return completed;
};
However, I am getting this error at time. Sometimes, it is success, but, sometimes it throws this error.
{
"errorType": "Error",
"errorMessage": "false",
"stack": [
"Error: false",
" at _homogeneousError (/function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:56:16)",
" at postError (/function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:72:34)",
" at done (/function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:99:13)",
" at fail (/function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:113:13)",
" at /function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:148:24",
" at processTicksAndRejections (internal/process/task_queues.js:97:5)"
]
}
I dont know Nodejs on a great deal so I think if the code is not written the correct way. Any ideas what I am doing wrong here ?
Like #hoangdv when I logged errors I came to know that the file saving to the disk was not correct. So, I changed the area of the code where it saves to like this and then it worked.
const client = new S3Client({ region: "ap-south-1" });
const command = new GetObjectCommand({ Bucket: bucket, Key: file });
const { Body } = await client.send(command);
await new Promise((resolve, reject) => {
Body.pipe(fs.createWriteStream(filePath))
.on('error', err => reject(err))
.on('close', () => resolve())
})
const excelFile = fs.readFileSync(filePath);

Unable to read the data from csv file in aws Lambda

I am trying to read the csv file from S3 in Lambda using NodeJS.
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const csv = require('csv-parser');
const bucket = '';
const objectkey = '';
const params = { Bucket: bucket, Key: objectkey }
const results = [];
exports.handler = async function (event) {
try {
const file = s3.getObject(params).createReadStream();
file
.pipe(csv())
.on('data', function (data) {
results.push(data)
})
.on('end', () => {
console.log(results);
})
} catch (err) {
console.log(err);
}
}
I am getting the result as null instead of csv data
you did not return the data at any point in your code. you need to call .succeed() once done.
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const csv = require('csv-parser');
const bucket = '';
const objectkey = '';
const params = { Bucket: bucket, Key: objectkey }
const results = [];
exports.handler = async function (event, ctx, callback) {
try {
const file = s3.getObject(params).createReadStream();
file
.pipe(csv())
.on('data', function (data) {
results.push(data)
})
.on('end', () => {
console.log(results);
callback(null, results);
})
} catch (err) {
console.log(err);
callback(Error(err))
}
}

unable to read lines of log file

I have an AWS S3 bucket with a bunch of log files in them. I have a lambda function using node.js 8.10 runtime that reads each line of the log file. This is what I have:
const readline = require('readline');
exports.handler = async (event) => {
try {
let bucket = event.Records[0].s3.bucket.name;
let key = event.Records[0].s3.object.key;
// documentation for this method:
// https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#getObject-property
let readStream = S3.getObject({ Bucket: bucket, Key: key }).createReadStream(); // 1
const rl = readline.createInterface({
input: readStream
});
rl.on('line', async (line) => {
console.log(line);
// process line as needed
});
} catch (err) {
console.log(err);
return err;
}
};
In the snippet above I'm printing the line in the log file to the console just for testing, but I don't see any output.
But if I refactor the code like this it works:
const stream = require('stream');
const bufferStream = new stream.PassThrough();
const readline = require('readline');
exports.handler = async (event) => {
try {
// retrieving first record only just for
// testing
let bucket = event.Records[0].s3.bucket.name;
let key = event.Records[0].s3.object.key;
// data.Body is a Buffer
let data = await S3.getObject({ Bucket: bucket, Key: key }).promise();
bufferStream.end(data.Body); // 2
const rl = readline.createInterface({
input: bufferStream
});
rl.on('line', (line) => {
console.log(line);
// process line as needed
});
} catch (err) {
console.log(err);
return err;
}
};
For the line marked 2, the getObject function returns a buffer and is converted to a stream.
Is it possible to do this without using a buffer? My thinking is if the log file is very large it is inefficient to convert a buffer to a stream. I'm wondering if I can use a stream directly like the line marked 1.
EDIT:
I did some more testing and got it to work but without an async lambda function. Here it is:
exports.handler = function (event, context, callback) {
// for testing I'm looking at the first record
let bucket = event.Records[0].s3.bucket.name;
let key = event.Records[0].s3.object.key;
const readStream = S3.getObject({ Bucket: bucket, Key: key }).createReadStream();
const rl = readline.createInterface({
input: readStream,
crlfDelay: Infinity
});
rl.on('line', (line) => {
console.log(line);
});
}
Does anyone know why this refactored code works, but not with async lambda ?

Resources