Read data from .xlsx file on S3 using Nodejs Lambda

Read data from .xlsx file on S3 using Nodejs Lambda - node.js

I'm still new in NodeJs and AWS, so forgive me if this is a noob question.
I am trying to read the data from an excel file (.xlsx). The lambda function receives the extension of the file type.
Here is my code:
exports.handler = async (event, context, callback) => {
console.log('Received event:', JSON.stringify(event, null, 2));
if (event.fileExt === undefined) {
callback("400 Invalid Input");
}
let returnData = "";
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3();
switch(event.fileExt)
{
case "plain":
case "txt":
// Extract text
const params = {Bucket: 'filestation', Key: 'MyTXT.'+event.fileExt};
try {
await s3.getObject(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else{ // successful response
returnData = data.Body.toString('utf-8');
context.done(null, returnData);
}
}).promise();
} catch (error) {
console.log(error);
return;
}
break;
case "xls":
case "xlsx":
returnData = "Excel";
// Extract text
const params2 = {Bucket: 'filestation', Key: 'MyExcel.'+event.fileExt};
const readXlsxFile = require("read-excel-file/node");
try {
const doc = await s3.getObject(params2);
const parsedDoc = await readXlsxFile(doc);
console.log(parsedDoc)
} catch (err) {
console.log(err);
const message = `Error getting object.`;
console.log(message);
throw new Error(message);
}
break;
case "docx":
returnData = "Word doc";
// Extract text
break;
default:
callback("400 Invalid Operator");
break;
}
callback(null, returnData);
};
The textfile part works. But the xlsx part makes the function time out.
I did install the read-excel-file dependency and uploaded the zip so that I have access to it.
But the function times out with this message:
"errorMessage": "2020-11-02T13:06:50.948Z 120bfb48-f29c-4e3f-9507-fc88125515fd Task timed out after 3.01 seconds"
Any help would be appreciated! Thanks for your time.

using the xlsx npm library. here's how we did it.
assuming the file is under the root project path.
const xlsx = require('xlsx');
// read your excel file
let readFile = xlsx.readFile('file_example_XLSX_5000.xlsx')
// get first-sheet's name
let sheetName = readFile.SheetNames[0];
// convert sheets to JSON. Best if sheet has a headers specified.
console.log(xlsx.utils.sheet_to_json(readFile.Sheets[sheetName]));

You need to install xlsx (SheetJs) library into the project:
npm install xlsx
and then import the "read" function into the lambda, get the s3 object's body and send to xlsx like this:
const { read } = require('sheetjs-style');
const aws = require('aws-sdk');
const s3 = new aws.S3({ apiVersion: '2006-03-01' });
exports.handler = async (event) => {
const bucketName = 'excel-files';
const fileKey = 'Demo Data.xlsx';
// Simple GetObject
let file = await s3.getObject({Bucket: bucketName, Key: fileKey}).promise();
const wb = read(file.Body);
const response = {
statusCode: 200,
body: JSON.stringify({
read: wb.Sheets,
}),
};
return response;
};
(of course, you can receive the bucket and filekey from parameters if you send them...)
Very Important: Use the READ (not the readFile) function and send the Body property (with capital "B") as a paremeter

I changed the timeout to 20 seconds and it works. Only one issue remains: const parsedDoc = await readXlsxFile(doc); wants to receive a string (filepath) and not a file.

Solved by using xlsx NPM library. Using a stream and giving it buffers.

Related

How to upload a string of JSON data into GCS from NodeJS?

I am getting result set from BigQuery and looping through it so I have string (JSON data) in one of the column that needs to be uploaded to GCS bucket as a file.
File content would be something like
{
"name":"sharath",
"country":"India"
}
I tried using file.save() method, also passthroughStream but nothing happened (not even erroring out)
file.save() :
for (row of rows) {
const contents = row.JSON_Content;
const file = storage.bucket(gcsBucket).file("/" + process.env.FILE_TMP_PATH + fileName + '*.json');
file.save(contents).then(() => console.messages.push(`file uploaded`));
}
passthroughStream :
for (row of rows) {
const passthroughStream = new stream.PassThrough();
passthroughStream.write(contents);
passthroughStream.end();
passthroughStream.pipe(file.createWriteStream())
.on('error', (err) =>{
throw new Error(`File upload failed with error: ${err.message}`);
})
.on('finish', () => {
// The file upload is complete
});
}
Nothing is working out. These didn't create any file in GCS bucket. I referred this document
My overall code looks like:
//import libraries...
const xxx = {
myFunction: async () => {
try{
...get data from BigQuery...
...loop through resultset...
...code not working is illustrated above...
}catch(err){
throw new Error('error occured');
}
}
module.exports = xxx;

For save data to file, try to stream it (createWriteStream):
const fs = require('fs');
const stream = fs.createWriteStream("/" + process.env.FILE_TMP_PATH + fileName + '*.json', {flags:'a'});
for (row of rows) {
stream.write(row.JSON_Content);
}
stream.end();

How do I upload a large Audio file longer than 30sec direct from the browser to AwS S3?

I would like to save audio recording to S3. I am using the functions below to load direct to awsS3 direct from the browser. It works for short audio recordings of up to around 25 seconds but fails for larger files.
Currently the functions is as follows: I speak into the microphone using recorder.js. Once the recording is complete I press stop which then saves the file to AWS
From the browser:
getSignedRequest(file,fileLoc);
function getFetchSignedRequest(file,fileLoc){
const fetchUrl = `/xxxxxxxxx?file-name=${file.name}&file-type=${file.type}&fileLoc=${fileLoc}`;
fetch(fetchUrl )
.then((response) => {
console.log('response',response)
if(!response.ok){
console.log('Network response was not OK',response.ok)
} else {
putAudioFetchFile(file, response.signedRequest, response.url)
}
})
.catch((error) => {
console.error('Could not get signed URL:', error);
})
}
This send a get request to the NodeJs server which calls this :
const aws = require('aws-sdk');
const fs = require('fs');
aws.config.region = 'xxxxxx';
const S3_BUCKET = process.env.AWS_S3_BUCKET
this.uploadToAWSDrive =
async function uploadToAWSDrive(req,res){
const s3 = new aws.S3();
const URL_EXPIRATION_SECONDS = 3000;
const subFolderName = req.query['fileLoc'];
const fileName = req.query['file-name'];
const fileType = req.query['file-type'];
const fileLocName = subFolderName + fileName;
const s3Params = {
Bucket: S3_BUCKET,
Key: fileLocName,
Expires: URL_EXPIRATION_SECONDS,
ContentType: fileType,
ACL: 'public-read'
};
await s3.getSignedUrl('putObject', s3Params, (err, data) => {
if(err){
console.log(err);
return res.end();
}
const returnData = {
signedRequest: data,
url: `https://${S3_BUCKET}.s3.amazonaws.com/${fileLocName}`
};
console.log('audio uploaded',returnData)
res.write(JSON.stringify(returnData));
res.end();
});
}
Which then calls this:
function uploadFile(file, signedRequest, url){
const xhr = new XMLHttpRequest();
xhr.open('PUT', signedRequest);
xhr.onreadystatechange = () => {
if(xhr.readyState === 4){
if(xhr.status === 200){
console.log('destination url= ', url,xhr.readyState,xhr.status)
}
else{
alert('Could not upload file.');
}
}
};
xhr.send(file);
}
This then sends the file to the awsS3 server. Ok for audio less than 30secs, but fails for longer audio files.
What do I need to do to enable this to work with audio files of greater than 20secs and upto 3 mins?
Any help most appreciated

Not very elegant but the issue was resolved by adding a timer to the origanal function call. A function that followed also needed to be delayed to I think allow processor time. I am sure there will be better ways to do this.
setTimeout( getSignedRequest( myAudioFile,fileLoc), proccessTime) ;

The first argument must be of type string or an instance of Buffer, ArrayBuffer, or Array or an Array-like Object. Received undefined

I'm trying to upload a file (pdf/jpg) using a Lambda function written in NodeJS by triggering the request from Postman but I'm getting the following error:-
2022-02-02T15:09:51.135Z 743939db-7511-4003-8e49-40c95ada47b4 ERROR Invoke Error
{
"errorType": "TypeError",
"errorMessage": "The first argument must be of type string or an instance of Buffer, ArrayBuffer, or Array or an Array-like Object. Received undefined",
"code": "ERR_INVALID_ARG_TYPE",
"stack": [
"TypeError [ERR_INVALID_ARG_TYPE]: The first argument must be of type string or an instance of Buffer, ArrayBuffer, or Array or an Array-like Object. Received undefined",
" at new NodeError (internal/errors.js:322:7)",
" at Function.from (buffer.js:334:9)",
" at Runtime.exports.lambdaHandler [as handler] (/var/task/app.js:68:23)"
]
}
The following is a chunk of event object getting logged on the CloudWatch:-
2022-02-02T20:39:52.136+05:30
Copy
info: Event:: {"body":"{\n \"base64String\": \"/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAcHBwcIBwgJCQgMDAsMDBEQDg4QERoSFBIUEhonGB0YGB0YJyMqIiAiKiM+MSsrMT5IPDk8SFdOTldtaG2Pj8ABBwcHBwgHCAkJCAwMCwwMERAODhARGhIUEhQSGicYHRgYHRgnIyoiICIqIz4xKysxPkg8OTxIV05OV21obY+PwP/CABEICHAPAAMBIgACEQEDEQH/xAAcAAEAAgMBAQEAAAAAAAAAAAAABwgBBQYEAwL/2gAIAQEAAAAAsiAA
Lambda (NodeJS code):-
'use-strict'
const AWS = require("aws-sdk");
const logger = require('./logger').logger;
const moment = require('moment');
const fileType = ('file-type');
const { Buffer } = require('buffer');
//const { fileTypeFromFile } = 'file-type';
const ddbTable = process.env.RUNTIME_DDB_TABLE_FREE_USER_DOCUMENT;
const s3TempBucket = process.env.RUNTIME_S3_TEMP_BUCKET;
const s3 = new AWS.S3();
const getFile = (fileMime, buffer, userId) => {
let fileExt = fileMime.ext;
let hash = sha1(new Buffer(new Date().toString()));
let now = moment().format('YYYY-MM-DD HH:mm:ss');
let filePath = hash + '/';
let fileName = unixTime(now) + '.' + fileExt;
let fileFullName = filePath + fileName;
let fileFullPath = s3TempBucket + userId + fileFullName;
const params = {
Body: buffer,
Bucket: s3TempBucket,
Key: fileName
};
let uploadFile = {
size: buffer.toString('ascii').length,
type: fileMime.mime,
name: fileName,
fullPath: fileFullPath
}
return {
'params': params,
'uploadFile': uploadFile
}
}
exports.lambdaHandler = async (event, context) => {
logger.info("Event::", event);
logger.info('Uploading file to bucket::', s3TempBucket);
let body, data;
let statusCode = 200;
const headers = {
'Content-Type': 'application/json',
'Access-Control-Allow-Headers': 'Content-Type',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': '*'
};
let request = JSON.parse(event.body);
let base64String = await request.base64String;
logger.info("base64String::", base64String);
let buffer = Buffer.from(base64String, 'base64');
//let buffer = new Buffer(base64String, 'base64');
let fileMime = fileType(buffer);
logger.info(fileMime);
if (fileMime === null) {
return context.fail('String supplied is not file type');
}
//let file = getFile(fileMime, buffer, user.id);
let file = getFile(fileMime, buffer, 'b06eb6f4-0ff0-5cb5-a41c-e000af66c8e9');
let params = file.params;
try {
//await new Promise((resolve, reject) => {
s3.putObject(params, (err, results) => {
if (err) reject(err);
else {
console.log(results);
body = results;
resolve(results)
}
});
// });
} catch (err) {
logger.info(err);
statusCode = 400;
body = err.message;
return err;
} finally {
body = JSON.stringify(data);
}
return {
statusCode,
body,
headers
};
}
The base64String is coming as undefined not sure why as I can see clearly in the event object?:-
let buffer = Buffer.from(base64String, 'base64');
Please assist, thanks
Postman request:-

If you use API Gateway, you don't need base64 encode because API Gateway does automatically.
A sample is provided in AWS.
Select Create function
Select Browse serverless app repository
Find "uploader: Serverless web application for uploading files to
S3"
Deploy
uploader's github
This creates API gateway and NodeJS Lambda. (You need to provide S3 bucket.)
The instruction says that to upload a file, open InvokeURL in browser and drag drop a file. You can do this with Postman too as follows.
Input POST InvokeURL/api/file/text.pdf.
Set body KEY to File and input text.pdf. Select the pdf file as VALUE.
Send
You can find the code in index.js and extract what you need.

Download pdf files from external url's - Heroku, NodeJS, Angular 7

I am trying to download multiple pdf files from external sources to my nodejs server (in Heroku) temporarily and upload it to AWS S3 bucket.
I have tried multiple methods all of which works fine in my local machine but not in Heroku Dyno NodeJS Server. I am unable to even create folder in Heroku. I guess due to limited permission.
In Node
1) using var download = require('download-file') (using this currently in below code)
2) axios
3) res.download()
Download Files Code
const downloadFiles = async (unique_files) => {
for (let index = 0; index < unique_files.length; index++) {
let file_ext = unique_files[index].substr(unique_files[index].length - 4);
if(file_ext == ".pdf") {
await downloadzz(unique_files[index])
}
}
}
function downloadzz(link) {
download(link, function(err){
if (err) throw err
console.log("DOWNLOAD Complete");
});
}
Upload Files Code
const uploadFiles = async (unique_files) => {
for (let index = 0; index < unique_files.length; index++) {
let file_ext = unique_files[index].substr(unique_files[index].length - 4);
if(file_ext == ".pdf") {
await uploadzz(unique_files[index])
}
}
}
function uploadzz(link) {
fs.readFile(require('path').resolve(__dirname+'/../external-pdfs/', link.slice(link.lastIndexOf('/') + 1)), function (err, data) {
params = {Bucket: pdfBucket, Key: link.slice(link.lastIndexOf('/') + 1), Body: data, ACL: "public-read" };
s3.putObject(params, function(err, data) {
if (err) {
console.log("Failed Upload", err);
} else {
console.log("Successfully uploaded data to bucket", data);
}
});
});
}
I don't get any error but no folder seem to exist with a name external-pdfs on heroku server.
I am open for better solutions: for example, directly uploading file from external url to s3...
How can I in read file from a external url and directly upload to AWS S3 bucket?

You can use axios. Setting the responseType as stream, you can get the file data and pass it as the body. Here it's an example code to get the pdf from a URL and uploading its info directly to S3:
const AWS = require('aws-sdk');
const axios = require('axios');
AWS.config.loadFromPath('./config.json');
const s3 = new AWS.S3({apiVersion: '2006-03-01'});
const URL = "<YOUR_URL>";
const uploadPdfToS3 = async () => {
try{
const {data, headers} = await axios.get(URL, {responseType: 'stream'});
// Create params for putObject call
const objectParams = {
Bucket: "<YOUR_BUCKET>",
Key: "<YOUR_KEY>",
ContentLength: headers['content-length'],
Body: data
};
// Create object upload promise
await s3.putObject(objectParams).promise();
} catch(err){
console.log("ERROR --->" + err)
}
}

In Angular, we can use FileSaver library to save the pdf file from library.
Find the below sample code to do this way.
enter image description here

minizip-asm extract function takes forever to execute

I am trying to fetch an AES encrypted password protected zip file from a google storage and extract a csv file from it. I am using google cloud functions for it with node 6.
I've tried using minizip-asm.js library to extract the file. It works intermittently. I am a newbie when it comes to node so would really appreciate some help :).
Here's the relevant piece of code which might help. Could someone help me figure out what's going wrong here.
exports.processFile = (event, callback) => {
const file = event.data;
const filename = file.name;
const projectId = "abc1234";
const bucketName = "abc_reports";
const Storage = require('#google-cloud/storage');
const storage = Storage({
projectId: projectId
});
const folder = storage.bucket(bucketName);
const minizip = require('minizip-asm.js');
if (file.metageneration === '1' && filename.match(".zip") != null) {
// metageneration attribute is updated on metadata changes.
// on create value is 1
console.log(`File ${file.name} uploaded.`);
folder.file(filename).download().then(function(data) {
console.log('Download of file complete');
//create csv file
var csvName = filename.split(".zip")[0] + ".csv"
var mz = new minizip(data[0]);
console.log(data[0]);
console.log(mz.list());
var extract = mz.extract(mz.list()[0].filepath,{
password: 'ABC#123'
})
console.log("extracted");
//write unzipped contents to file
folder.file(csvName).save(extract, function(err) {
if (!err) {
console.log("unzipped csv");
}
else console.log("Error in saving csv : "+err);
});
});
});
}
callback(null, 'Success!');
};
Thanks for the help.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Read data from .xlsx file on S3 using Nodejs Lambda - node.js

I changed the timeout to 20 seconds and it works. Only one issue remains: const parsedDoc = await readXlsxFile(doc); wants to receive a string (filepath) and not a file.

Solved by using xlsx NPM library. Using a stream and giving it buffers.

Related

How to upload a string of JSON data into GCS from NodeJS?

How do I upload a large Audio file longer than 30sec direct from the browser to AwS S3?

The first argument must be of type string or an instance of Buffer, ArrayBuffer, or Array or an Array-like Object. Received undefined

Download pdf files from external url's - Heroku, NodeJS, Angular 7

minizip-asm extract function takes forever to execute

Categories

Resources