minizip-asm extract function takes forever to execute - node.js

I am trying to fetch an AES encrypted password protected zip file from a google storage and extract a csv file from it. I am using google cloud functions for it with node 6.
I've tried using minizip-asm.js library to extract the file. It works intermittently. I am a newbie when it comes to node so would really appreciate some help :).
Here's the relevant piece of code which might help. Could someone help me figure out what's going wrong here.
exports.processFile = (event, callback) => {
const file = event.data;
const filename = file.name;
const projectId = "abc1234";
const bucketName = "abc_reports";
const Storage = require('#google-cloud/storage');
const storage = Storage({
projectId: projectId
});
const folder = storage.bucket(bucketName);
const minizip = require('minizip-asm.js');
if (file.metageneration === '1' && filename.match(".zip") != null) {
// metageneration attribute is updated on metadata changes.
// on create value is 1
console.log(`File ${file.name} uploaded.`);
folder.file(filename).download().then(function(data) {
console.log('Download of file complete');
//create csv file
var csvName = filename.split(".zip")[0] + ".csv"
var mz = new minizip(data[0]);
console.log(data[0]);
console.log(mz.list());
var extract = mz.extract(mz.list()[0].filepath,{
password: 'ABC#123'
})
console.log("extracted");
//write unzipped contents to file
folder.file(csvName).save(extract, function(err) {
if (!err) {
console.log("unzipped csv");
}
else console.log("Error in saving csv : "+err);
});
});
});
}
callback(null, 'Success!');
};
Thanks for the help.

Related

How to upload a string of JSON data into GCS from NodeJS?

I am getting result set from BigQuery and looping through it so I have string (JSON data) in one of the column that needs to be uploaded to GCS bucket as a file.
File content would be something like
{
"name":"sharath",
"country":"India"
}
I tried using file.save() method, also passthroughStream but nothing happened (not even erroring out)
file.save() :
for (row of rows) {
const contents = row.JSON_Content;
const file = storage.bucket(gcsBucket).file("/" + process.env.FILE_TMP_PATH + fileName + '*.json');
file.save(contents).then(() => console.messages.push(`file uploaded`));
}
passthroughStream :
for (row of rows) {
const passthroughStream = new stream.PassThrough();
passthroughStream.write(contents);
passthroughStream.end();
passthroughStream.pipe(file.createWriteStream())
.on('error', (err) =>{
throw new Error(`File upload failed with error: ${err.message}`);
})
.on('finish', () => {
// The file upload is complete
});
}
Nothing is working out. These didn't create any file in GCS bucket. I referred this document
My overall code looks like:
//import libraries...
const xxx = {
myFunction: async () => {
try{
...get data from BigQuery...
...loop through resultset...
...code not working is illustrated above...
}catch(err){
throw new Error('error occured');
}
}
module.exports = xxx;
For save data to file, try to stream it (createWriteStream):
const fs = require('fs');
const stream = fs.createWriteStream("/" + process.env.FILE_TMP_PATH + fileName + '*.json', {flags:'a'});
for (row of rows) {
stream.write(row.JSON_Content);
}
stream.end();

Firebase Storage + docxtemplater in nodejs

I am having trouble loading a firebase storage document in node js (preferably in binary) so that I can generate a docxtemplater document on it. I'm quite new to docxtemplater and would really like to use it for my webapp
Is this something that can be done?
Below is the code I get but I dont think it's loading the document from my firebase storage properly:
const functions = require('firebase-functions');
const admin = require('firebase-admin');
const {Storage} = require('#google-cloud/storage');
var PizZip = require('pizzip');
var Docxtemplater = require('docxtemplater');
admin.initializeApp();
const BUCKET = 'gs://mpcwapp.appspot.com';
const https = require('https');
const storage = new Storage({
projectId: 'myapp' });
const cors = require('cors')({origin: true});
exports.test2 = functions.https.onCall((data, context) => {
// The error object contains additional information when logged with JSON.stringify (it contains a properties object containing all suberrors).
function replaceErrors(key, value) {
if (value instanceof Error) {
return Object.getOwnPropertyNames(value).reduce(function(error, key) {
error[key] = value[key];
return error;
}, {});
}
return value;
}
function errorHandler(error) {
console.log(JSON.stringify({error: error}, replaceErrors));
if (error.properties && error.properties.errors instanceof Array) {
const errorMessages = error.properties.errors.map(function (error) {
return error.properties.explanation;
}).join("\n");
console.log('errorMessages', errorMessages);
// errorMessages is a humanly readable message looking like this :
// 'The tag beginning with "foobar" is unopened'
}
throw error;
}
//Load the docx file as a binary
let file_name = 'input.docx';
const myFile =storage.bucket(BUCKET).file(file_name);
var content = myFile.createReadStream();
var zip = new PizZip(content);
var doc;
try {
doc = new Docxtemplater(zip);
} catch(error) {
// Catch compilation errors (errors caused by the compilation of the template : misplaced tags)
errorHandler(error);
}
//set the templateVariables
doc.setData({
first_name: 'John',
last_name: 'Doe',
phone: '0652455478',
description: 'New Website'
});
try {
// render the document (replace all occurences of {first_name} by John, {last_name} by Doe, ...)
doc.render();
}
catch (error) {
// Catch rendering errors (errors relating to the rendering of the template : angularParser throws an error)
errorHandler(error);
}
var buf = doc.getZip()
.generate({type: 'nodebuffer'});
buf.pipe(myFile.createWriteStream());
});
Any help will be appreciated I am really stuck.
First, if you're deploying your code logic in Firebase Functions, make sure it's inside your list of exports.
I tried to reproduce the behavior of your code and noticed that the root cause of the error is because of this part in your code:
var content = myFile.createReadStream();
var zip = new PizZip(content);
pizzip appears to accept a Buffer input according to this documentation. However, createReadStream() returns ReadableStream so there's a mismatch between the required parameters.
There are two solutions I can think of:
First, download and store the file (in /tmp). Then read the file using fs.readFileSync().
Skip saving the file to the file system and get the buffer of the file object.
For the 2nd option, you need to understand how streams work. This answer can give you a good head start. As example, you can get the buffer from ReadableStream like this:
const remoteFile = storage.bucket("bucket-name").file("file-name")
const readable = remoteFile.createReadStream()
var buffers = [];
readable.on('data', (buffer) => {
buffers.push(buffer)
});
readable.on('end', () => {
var buffer = Buffer.concat(buffers);
var zip = new PizZip(buffer);
var doc;
try {
doc = new Docxtemplater(zip);
} catch(error) {
errorHandler(error);
}
// ... rest of your code
});

Read data from .xlsx file on S3 using Nodejs Lambda

I'm still new in NodeJs and AWS, so forgive me if this is a noob question.
I am trying to read the data from an excel file (.xlsx). The lambda function receives the extension of the file type.
Here is my code:
exports.handler = async (event, context, callback) => {
console.log('Received event:', JSON.stringify(event, null, 2));
if (event.fileExt === undefined) {
callback("400 Invalid Input");
}
let returnData = "";
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3();
switch(event.fileExt)
{
case "plain":
case "txt":
// Extract text
const params = {Bucket: 'filestation', Key: 'MyTXT.'+event.fileExt};
try {
await s3.getObject(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else{ // successful response
returnData = data.Body.toString('utf-8');
context.done(null, returnData);
}
}).promise();
} catch (error) {
console.log(error);
return;
}
break;
case "xls":
case "xlsx":
returnData = "Excel";
// Extract text
const params2 = {Bucket: 'filestation', Key: 'MyExcel.'+event.fileExt};
const readXlsxFile = require("read-excel-file/node");
try {
const doc = await s3.getObject(params2);
const parsedDoc = await readXlsxFile(doc);
console.log(parsedDoc)
} catch (err) {
console.log(err);
const message = `Error getting object.`;
console.log(message);
throw new Error(message);
}
break;
case "docx":
returnData = "Word doc";
// Extract text
break;
default:
callback("400 Invalid Operator");
break;
}
callback(null, returnData);
};
The textfile part works. But the xlsx part makes the function time out.
I did install the read-excel-file dependency and uploaded the zip so that I have access to it.
But the function times out with this message:
"errorMessage": "2020-11-02T13:06:50.948Z 120bfb48-f29c-4e3f-9507-fc88125515fd Task timed out after 3.01 seconds"
Any help would be appreciated! Thanks for your time.
using the xlsx npm library. here's how we did it.
assuming the file is under the root project path.
const xlsx = require('xlsx');
// read your excel file
let readFile = xlsx.readFile('file_example_XLSX_5000.xlsx')
// get first-sheet's name
let sheetName = readFile.SheetNames[0];
// convert sheets to JSON. Best if sheet has a headers specified.
console.log(xlsx.utils.sheet_to_json(readFile.Sheets[sheetName]));
You need to install xlsx (SheetJs) library into the project:
npm install xlsx
and then import the "read" function into the lambda, get the s3 object's body and send to xlsx like this:
const { read } = require('sheetjs-style');
const aws = require('aws-sdk');
const s3 = new aws.S3({ apiVersion: '2006-03-01' });
exports.handler = async (event) => {
const bucketName = 'excel-files';
const fileKey = 'Demo Data.xlsx';
// Simple GetObject
let file = await s3.getObject({Bucket: bucketName, Key: fileKey}).promise();
const wb = read(file.Body);
const response = {
statusCode: 200,
body: JSON.stringify({
read: wb.Sheets,
}),
};
return response;
};
(of course, you can receive the bucket and filekey from parameters if you send them...)
Very Important: Use the READ (not the readFile) function and send the Body property (with capital "B") as a paremeter
I changed the timeout to 20 seconds and it works. Only one issue remains: const parsedDoc = await readXlsxFile(doc); wants to receive a string (filepath) and not a file.
Solved by using xlsx NPM library. Using a stream and giving it buffers.

Retrieve the attributes from a file using nodejs?

Hi I'm looking for an nodejs code which would probably return the attributes of each file in a folder. I developed the code to retrieve all the file name in a folder and another code to list data's of filename provide by us. But actually I need to return all the files names in a folder with its column name. I'm new to nodejs so someone help me please.
LISTING DATA CODE:
const AWS = require('aws-sdk');
const neatCsv = require('neat-csv');
var s3 = new AWS.S3({});
exports.handler = (event,context,callback)=>{
const params = {
Bucket:'ml-framework-api',
Key: wavicle.csv
};
s3.getObject(params,async(err, result) => {
if (err){
return console.error(err);
}
neatCsv(result.Body).then((parsedData) => {
callback(null,parsedData);
})
})
}
LISTING FILE IN S3:
const AWS = require('aws-sdk')
const s3 = new AWS.S3({
accessKeyId:'-------------',
secretAccessKey:'-------------------',
region:'ap-south-1'
})
const params = {
Bucket:'wavicle'
}
s3.listObjects(params,(err,data)=>{
if(err){
return console.log(err)
}
console.log(data)
})
It's best to start with node's file system api documentation.
Here is a simple example of how to get information about files of a folder (there are many ways, this is quickly from the example in the documentation above):
const fsp = require("fs/promises");
async function dirFilesInfo(path) {
const dir = await fsp.opendir(path);
for await (const dirEntry of dir) {
const fileInfo = await fsp.stat("./" + dirEntry.name);
console.log(dirEntry.name, fileInfo);
}
}
dirFilesInfo("./").catch(console.error);

How to read a txt file, stored in a google storage bucket, from a cloud function, in node

I've written some node.js code which is sitting in the source of a Cloud Function. When it runs I want to read a text file from a google storage bucket, and process it.
The Code runs fine when running locally, but for some reason doesn't work when running in the Cloud Function. I would expect something written out from the console logs.
I can't see any errors, as I thought it might be a permissions problem (might be looking in the wrong place though).
Any ideas?
The awaits and async's were just because I wanted it to wait for the response before continuing, but that seems to have no effect on it either.
const fileName = 'testData.txt';
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
const bucket = storage.bucket('my_bucket_name');
const remoteFile = bucket.file(fileName);
await remoteFile.download(async function(err, contents) {
console.log("file err: "+err);
console.log("file data: "+contents);
});
What you can do is to verify that the runtime account for the function has the necessary permissions to access the bucket. In general the runtime account is PROJECT_ID#appspot.gserviceaccount.com and add at least the Storage Object Viewer (you can check more roles here).
Then, test the function again. If something goes wrong, please check the logs of the function.
EDIT
Not sure, but maybe seems to be something with the code. I've used the following to test the function and works perfect:
index.js:
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
const bucket = storage.bucket('bucket_name');
const fileName = 'test.txt';
const remoteFile = bucket.file(fileName);
exports.helloWorld = (req, res) => {
console.log('Reading File');
var archivo = remoteFile.createReadStream();
console.log('Concat Data');
var buf = '';
archivo.on('data', function(d) {
buf += d;
}).on('end', function() {
console.log(buf);
console.log("End");
res.send(buf);
});
};
package.json:
{
"name": "sample-http",
"version": "0.0.1",
"dependencies": {
"#google-cloud/storage": "^4.7.0"
}
}
async function readStorageFile(obj){
try{
obj.Result = ''
if (obj.filename===undefined) return
bucket = gcs.bucket('gs:yourinfo');
//Get File
await bucket.file(obj.filename).download()
.then(async data => {
obj.data = data
obj.Result = 'OK'
return
})
.catch(err => {
return
})
return
}
catch (err){
return
}
}
obj = {filename:'TEST1.json'}
await readStorageFile(obj,'testing')
if (obj.Result==='OK') { console.log('obj.data='+obj.data) }
else { console.log('Not Found')}
return

Resources