Why doesn't this CSV parsing Node Lambda log anything? - node.js

Runtime env is Node 14 (AWS Lambda function). The S3 bucket and Lambda function are in the same region, and the I have confirmed that the Lambda function is able to get the object from S3 (i.e. permissions does not seem to be an issue). The Lambda triggers upon PUT of an object (a very simple CSV file) into the S3 bucket. No errors or exceptions appear in the CloudWatch log stream.
package.json
{
"dependencies": {
"#fast-csv/parse": "4.3.6"
}
}
index.js
const aws = require('aws-sdk');
const s3 = new aws.S3({region: 'us-east-2'});
const fs = require('fs');
const csv = require('#fast-csv/parse');
exports.handler = async (event, context) => {
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
const params = {
Bucket: bucket,
Key: key,
};
const file = await s3.getObject(params).promise();
fs.createReadStream(file.Body).pipe(csv.parse())
.on('error', (error) => console.error(error))
.on('data', (row) => console.log(row))
.on('end', (rowCount) => console.log(`Parsed ${rowCount} rows`));
};
I also tried the following variation, which had the same outcome:
index.js (variant)
const aws = require('aws-sdk');
const s3 = new aws.S3({region: 'us-east-2'});
const fs = require('fs');
const csv = require('fast-csv');
exports.handler = async (event, context) => {
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
const params = {
Bucket: bucket,
Key: key,
};
const file = await s3.getObject(params).promise();
const stream = fs.createReadStream(file.Body);
csv.parseStream(stream)
.on('data', (data) => {
console.info('Data: ' + JSON.stringify(data));
})
.on('data-invalid', (data) => {
console.error('Invalid batch row ' + data);
})
.on('end', () => {
console.info('End of Stream');
})
.on('error', (error) => {
let message = "Error in csv stream processing";
console.error(message, ":", error);
}
);
};
Note: I already tried simply doing await s3.getObject(params).createReadStream(), but that results in undefined whereas getting the object via promise() first gets the object data.
I've been wrestling with this for hours, so any help is appreciated. Thanks!

Related

How to read JSON from S3 by AWS Lambda Node.js 18.x runtime?

#TBA gave the solution.
The root cause is not by runtime. It came from SDK v3.
Point: Do not update the code with mixed things (like both of runtime & SDK version together 🥲)
Thanks again, TBA.
I was using Node.js 14.x version runtime Lambda to read some json file from S3.
Brief code is below
const AWS = require("aws-sdk");
const s3 = new AWS.S3();
exports.handler = (event) => {
const { bucketName, objKey } = event
const params = {
Bucket: bucketName,
Key: objKey
};
return new Promise((resolve) => {
s3.getObject(params, async (err, data) =>{
if (err) console.log(err, err.stack);
else {
const contents = JSON.parse(data.Body)
resolve(contents);
}
});
})
};
and it returned the json data as I expected.
And today I tried to create a new lambda with runtime Node.js 18.x but it returned null or some errors...
Q) Could you give me some advice to solve this 🥲 ?
+) I used same json file for each lambda
+) Not sure why, but in my case, data.Body.toString() didn't work (I saw some answers in stackoverflow provide that and tried but no lucks)
Thanks in advance!
Case A (returns null)
import { S3Client, GetObjectCommand } from "#aws-sdk/client-s3";
const s3Client = new S3Client({ region: "ap-northeast-2" });
export const handler = (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
return new Promise((resolve) => {
s3Client.send(getObjCommand, async (err, data) =>{
if (err) console.log(err, err.stack);
else {
const list = JSON.parse(data.Body)
resolve(list);
}
});
})
};
Case B (returns "Unexpected token o in JSON at position 1")
export const handler = async (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
const response = await s3Client.send(getObjCommand)
console.log("JSON.parse(response.Body)", JSON.parse(response.Body))
};
Case C (returns "TypeError: Converting circular structure to JSON")
export const handler = async (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
try {
const response = await s3Client.send(getObjCommand)
return JSON.stringify(response.Body)
} catch(err) {
console.log("error", err)
return err
}
};

NodeJS piping data to AWS S3 TypeError: dest.on is not a function

Please can I get some help as to why my code is throwing a Pipe error, using a PassThrough: TypeError: dest.on is not a function
I originally thought it was because I was not returning the PassThrough, as outlined in the other post
Now I'm not so sure? Thanks in advance
const { google } = require('googleapis')
const auth = require('./googleOAuth')
const aws = require('aws-sdk')
const fs = require('fs')
const stream = require('stream')
// AWS S3 bucket name to upload to
const awsBucketName = 'my-backup'
// get AWS keys stored in local file and pass through to AWS auth
const getAWSKeys = async () => {
const awsKeys = await auth.read('./cred/awskeys.json').then(result => {return result})
aws.config.update({
accessKeyId: awsKeys.keys.aws_access_key_id,
secretAccessKey: awsKeys.keys.aws_secret_access_key
})
}
// upload a file to AWS S3 by passing the file stream from getGFileContent into the 'body' parameter of the upload
const s3Upload = async () => {
await getAWSKeys()
let pass = new stream.PassThrough()
let params = {
Bucket: awsBucketName, // bucket-name
Key: 'filePath.jpg', // file will be saved as bucket-name/[uniquekey.csv]
Body: pass // file data passed through stream
}
new aws.S3().upload(params).promise()
.then(() => console.log(`Successfully uploaded data to bucket`))
.catch( err => console.log(`Error, unable to upload to S3: ${err}`))
return pass
}
// download gFile, non google docs files. Downloaded as a stream of data and pipped into the awsUpload function
const getGFileContent = async () => {
const gKeys = await auth.get()
const drive = google.drive({version: 'v3', auth: gKeys})
let params = {fileId: '1bNr_ZM90fM0EnPcFPfdd2LnB7Z2Tts3LiQ', mimeType: "image/jpeg", alt: 'media'}
return drive.files.get(params, {responseType: 'stream'})
.then(res => {
return new Promise((resolve, reject) => {
res.data
.on('end', () => {resolve()})
.on('error', err => {reject(`Error downloading Google docs file: ${err}`)})
.pipe(s3Upload())
})
})
}
getGFileContent()

Unable to read the data from csv file in aws Lambda

I am trying to read the csv file from S3 in Lambda using NodeJS.
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const csv = require('csv-parser');
const bucket = '';
const objectkey = '';
const params = { Bucket: bucket, Key: objectkey }
const results = [];
exports.handler = async function (event) {
try {
const file = s3.getObject(params).createReadStream();
file
.pipe(csv())
.on('data', function (data) {
results.push(data)
})
.on('end', () => {
console.log(results);
})
} catch (err) {
console.log(err);
}
}
I am getting the result as null instead of csv data
you did not return the data at any point in your code. you need to call .succeed() once done.
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const csv = require('csv-parser');
const bucket = '';
const objectkey = '';
const params = { Bucket: bucket, Key: objectkey }
const results = [];
exports.handler = async function (event, ctx, callback) {
try {
const file = s3.getObject(params).createReadStream();
file
.pipe(csv())
.on('data', function (data) {
results.push(data)
})
.on('end', () => {
console.log(results);
callback(null, results);
})
} catch (err) {
console.log(err);
callback(Error(err))
}
}

Read a file line by line using Lambda / S3

I want to read a file line by line located on S3. I tried the following code which I found searching online, but the Lambda function is exiting without invoking any of the readline callbacks. What am I doing wrong?
const aws = require('aws-sdk');
const s3 = new aws.S3({ apiVersion: '2006-03-01' });
const readline = require('readline');
exports.handler = async (event, context, callback) => {
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
const params = {
Bucket: bucket,
Key: key,
};
const s3ReadStream = s3.getObject(params).createReadStream();
const rl = readline.createInterface({
input: s3ReadStream,
terminal: false
});
rl.on('line', (line) => {
console.log(`Line from file: ${line}`);
});
rl.on('error', () => {
console.log('error');
});
rl.on('close', function () {
console.log('closed');
context.succeed();
});
console.log('done');
};
I've found the problem. It's being awhile that I haven't coded on Lambda and I thought it would only exit when context was called. I'm now waiting for the promise to be resolved (or rejected which I'll implement later).
const aws = require('aws-sdk');
const s3 = new aws.S3({ apiVersion: '2006-03-01' });
const readline = require('readline');
exports.handler = async (event, context, callback) => {
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
const params = {
Bucket: bucket,
Key: key,
};
const s3ReadStream = s3.getObject(params).createReadStream();
const rl = readline.createInterface({
input: s3ReadStream,
terminal: false
});
let myReadPromise = new Promise((resolve, reject) => {
rl.on('line', (line) => {
console.log(`Line from file: ${line}`);
});
rl.on('error', () => {
console.log('error');
});
rl.on('close', function () {
console.log('closed');
resolve();
});
});
try { await myReadPromise; }
catch(err) {
console.log('an error has occurred');
}
console.log('done reading!');
};
getObject doesn't just return the object that was stored S3. It return a JSON object whose Body field holds the blob of the object stored to S3. See also in the Response part of the documentation here.

stream the contents of an S3 object into hash algorithm node.js

I'm new to node.js and I'm trying to write a AWS lambda function that would stream the content of an s3 object into the node's crypto module to create a md5 checksum value of the s3 object. Not sure why but everytime I run the code it would generate different hash values on the console.log. can anyone point me in the right direction to fix my code? appreciate the help!
var crypto = require('crypto');
var fs = require('fs');
var AWS = require('aws-sdk');
var s3 = new AWS.S3();
exports.handler = (event, context, callback) => {
var params = {
Bucket: 'bucket_name',
Key: 'key',
};
var hash = crypto.createHash('md5');
var stream = s3.getObject(params, function(err, data) {
if (err){
console.log(err);
return;
}
}).createReadStream();
stream.on('data', function (data) {
hash.update(data, 'utf-8')
})
stream.on('end', function () {
console.log(hash.digest('hex'))
})
};
You were close. You are mixing the "callback" style method signature with a "createReadStream" signature. Try this:
const crypto = require('crypto');
const fs = require('fs');
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
exports.handler = (event, context, callback) => {
let params = {
Bucket: 'bucket_name',
Key: 'key',
};
let hash = crypto.createHash('md5');
let stream = s3.getObject(params).createReadStream();
stream.on('data', (data) => {
hash.update(data);
});
stream.on('end', () => {
let digest = hash.digest('hex');
console.log(digest);
callback(null, digest);
});
};
Not directly an answer, but you can also add the md5 has as a ETag when uploading a file to S3.
const crypt = require('crypto');
const fs = require('fs').promises;
const aws = require('aws-sdk');
async function uploadFileToS3WithMd5Hash(bucket, filename, s3Key = null) {
const data = await fs.readFile(filename);
const md5Base64 = crypt.createHash("md5").update(data).digest('base64');
if (!s3Key) {
s3Key = filename;
}
/** Should you want to get the MD5 in hex format: */
// const md5Hex = Buffer.from(md5Base64, 'base64').toString('hex');
return new Promise((res, rej) => {
const s3 = new aws.S3();
s3.putObject({
Bucket: bucket,
Key: s3Key,
Body: data,
ContentMD5: md5Base64,
}, (err, resp) => err ? rej(err) : res(resp));
})
}
uploadFileToS3WithMd5Hash('your-own-bucket', 'file.txt')
.then(console.log)
.catch(console.error);
So by checking the ETag for an object on S3, you would get the hex-string of the files MD5 hash.
In some cases (see this post by Dennis), MD5 checksum is computed automatically upon upload.

Resources