Firebase Storage + docxtemplater in nodejs - node.js

I am having trouble loading a firebase storage document in node js (preferably in binary) so that I can generate a docxtemplater document on it. I'm quite new to docxtemplater and would really like to use it for my webapp
Is this something that can be done?
Below is the code I get but I dont think it's loading the document from my firebase storage properly:
const functions = require('firebase-functions');
const admin = require('firebase-admin');
const {Storage} = require('#google-cloud/storage');
var PizZip = require('pizzip');
var Docxtemplater = require('docxtemplater');
admin.initializeApp();
const BUCKET = 'gs://mpcwapp.appspot.com';
const https = require('https');
const storage = new Storage({
projectId: 'myapp' });
const cors = require('cors')({origin: true});
exports.test2 = functions.https.onCall((data, context) => {
// The error object contains additional information when logged with JSON.stringify (it contains a properties object containing all suberrors).
function replaceErrors(key, value) {
if (value instanceof Error) {
return Object.getOwnPropertyNames(value).reduce(function(error, key) {
error[key] = value[key];
return error;
}, {});
}
return value;
}
function errorHandler(error) {
console.log(JSON.stringify({error: error}, replaceErrors));
if (error.properties && error.properties.errors instanceof Array) {
const errorMessages = error.properties.errors.map(function (error) {
return error.properties.explanation;
}).join("\n");
console.log('errorMessages', errorMessages);
// errorMessages is a humanly readable message looking like this :
// 'The tag beginning with "foobar" is unopened'
}
throw error;
}
//Load the docx file as a binary
let file_name = 'input.docx';
const myFile =storage.bucket(BUCKET).file(file_name);
var content = myFile.createReadStream();
var zip = new PizZip(content);
var doc;
try {
doc = new Docxtemplater(zip);
} catch(error) {
// Catch compilation errors (errors caused by the compilation of the template : misplaced tags)
errorHandler(error);
}
//set the templateVariables
doc.setData({
first_name: 'John',
last_name: 'Doe',
phone: '0652455478',
description: 'New Website'
});
try {
// render the document (replace all occurences of {first_name} by John, {last_name} by Doe, ...)
doc.render();
}
catch (error) {
// Catch rendering errors (errors relating to the rendering of the template : angularParser throws an error)
errorHandler(error);
}
var buf = doc.getZip()
.generate({type: 'nodebuffer'});
buf.pipe(myFile.createWriteStream());
});
Any help will be appreciated I am really stuck.

First, if you're deploying your code logic in Firebase Functions, make sure it's inside your list of exports.
I tried to reproduce the behavior of your code and noticed that the root cause of the error is because of this part in your code:
var content = myFile.createReadStream();
var zip = new PizZip(content);
pizzip appears to accept a Buffer input according to this documentation. However, createReadStream() returns ReadableStream so there's a mismatch between the required parameters.
There are two solutions I can think of:
First, download and store the file (in /tmp). Then read the file using fs.readFileSync().
Skip saving the file to the file system and get the buffer of the file object.
For the 2nd option, you need to understand how streams work. This answer can give you a good head start. As example, you can get the buffer from ReadableStream like this:
const remoteFile = storage.bucket("bucket-name").file("file-name")
const readable = remoteFile.createReadStream()
var buffers = [];
readable.on('data', (buffer) => {
buffers.push(buffer)
});
readable.on('end', () => {
var buffer = Buffer.concat(buffers);
var zip = new PizZip(buffer);
var doc;
try {
doc = new Docxtemplater(zip);
} catch(error) {
errorHandler(error);
}
// ... rest of your code
});

Related

How to parse XML feed URL and store items in Firestore using cloud functions?

I have been given an assignment to fetch a JSON API, and also parse an XML feed URL and store their responses inside separate Firestore collections. I am not really good at cloud functions, but after lots of research, I have written the cloud function code below for the JSON API and it works well.
const functions = require("firebase-functions");
const axios = require("axios");
const admin = require("firebase-admin");
const api_token = "XXXXXXX";
const includes = "XXXXXX";
const url = "https://XXXXXXXXXXXXXX.com/?api_token=" + api_token + includes;
exports.allLeagues = functions.region('europe-west1').https.onRequest(async (req, res) => {
try {
let response = await axios.get(url);
var data = response.data.data;
for (let leagueData of data) {
await admin.firestore().collection("leagues").doc(leagueData.id.toString()).collection("all_data").doc(leagueData.id.toString()).set({
id : leagueData.id,
name : leagueData.name,
logo_path : leagueData.logo_path,
is_cup : leagueData.is_cup
});
}
console.log("Table complete...");
console.log("successful");
return res.status(200).json({ message: "successful" });
} catch(error) {
console.log("Error encountered: "+error);
return res.status(500).json({ error });
}
});
I am through with the JSON API. But for the XML feed, I don't know where to start. I have done lots of research to no avail. I found this on Stackoverflow but it doesn't address my need. Assuming this is my feed: https://www.feedforall.com/sample.xml , please how do I parse it and save the items inside Firestore?
Kindly help.
Thank you.
You can use rss-parser that can be used to fetch data from RSS feeds or parse from XML strings as shown below:
// npm install rss-parser
const Parser = require("rss-parser");
const parser = new Parser();
exports.rssFeedParser = functions.https.onRequest(
async (request, response) => {
const rssUrl = "https://www.feedforall.com/sample.xml";
const feed = await parser.parseURL(rssUrl);
const { items } = feed;
const batch = db.batch();
items.forEach((item) => {
const docRef = db.collection("rss").doc();
// restructure item if needed
batch.set(docRef, item);
});
await batch.commit();
response.send("Done");
}
);
Do note that you can add up to 500 documents only using Batched Writes as in the answer above. If your feed can return more than that, then you should create multiple batches of 500 or add them individually.

How to load docx files on firebase storage and then save to firebase storage nodejs

I am working with docxtemplater with nodejs and have read the documentation from the link below:
https://docxtemplater.readthedocs.io/en/latest/generate.html#node
Unlike with the provided documentation, I am trying to load a document from my firebase storage called
'tag-example.docx'
and have the docxtemplater run on the tags on there. The generated document is then saved back to my firebase storage. Simply put:
Load 'tag-example.docx' from firebase storage;
docxtemplater does its thing on the document;
revised output saved to firebase storage.
My issue is that I keep getting the error message below:
Unhandled error TypeError: Cannot read property 'toLowerCase' of undefined
at Object.exports.checkSupport (/workspace/node_modules/pizzip/js/utils.js:293:32)
at ZipEntries.prepareReader (/workspace/node_modules/pizzip/js/zipEntries.js:275:11)
at ZipEntries.load (/workspace/node_modules/pizzip/js/zipEntries.js:295:10)
at new ZipEntries (/workspace/node_modules/pizzip/js/zipEntries.js:32:10)
at PizZip.module.exports [as load] (/workspace/node_modules/pizzip/js/load.js:25:20)
at new PizZip (/workspace/node_modules/pizzip/js/index.js:41:10)
at /workspace/index.js:66:11
at func (/workspace/node_modules/firebase-functions/lib/providers/https.js:336:32)
at processTicksAndRejections (internal/process/task_queues.js:95:5)
Is there a way to solve this issue? Is this because I am not loading the document as a binary like in the example? Can this even be done with firebase storage?
const functions = require('firebase-functions');
const admin = require('firebase-admin');
const {Storage} = require('#google-cloud/storage');
var PizZip = require('pizzip');
var Docxtemplater = require('docxtemplater');
admin.initializeApp();
const BUCKET = 'gs://myapp.appspot.com';
const https = require('https');
const storage = new Storage({
projectId: 'myapp' });
const cors = require('cors')({origin: true});
exports.test2 = functions.https.onCall((data, context) => {
// The error object contains additional information when logged with JSON.stringify (it contains a properties object containing all suberrors).
function replaceErrors(key, value) {
if (value instanceof Error) {
return Object.getOwnPropertyNames(value).reduce(function(error, key) {
error[key] = value[key];
return error;
}, {});
}
return value;
}
function errorHandler(error) {
console.log(JSON.stringify({error: error}, replaceErrors));
if (error.properties && error.properties.errors instanceof Array) {
const errorMessages = error.properties.errors.map(function (error) {
return error.properties.explanation;
}).join("\n");
console.log('errorMessages', errorMessages);
// errorMessages is a humanly readable message looking like this :
// 'The tag beginning with "foobar" is unopened'
}
throw error;
}
//Load the docx file as a binary
let file_name = 'tag-example.docx';
const myFile =storage.bucket(BUCKET).file(file_name);
var content = myFile.createReadStream();
var zip = new PizZip(content);
var doc;
try {
doc = new Docxtemplater(zip);
} catch(error) {
// Catch compilation errors (errors caused by the compilation of the template : misplaced tags)
errorHandler(error);
}
//set the templateVariables
doc.setData({
first_name: 'John',
last_name: 'Doe',
phone: '0652455478',
description: 'New Website'
});
try {
// render the document (replace all occurences of {first_name} by John, {last_name} by Doe, ...)
doc.render();
}
catch (error) {
// Catch rendering errors (errors relating to the rendering of the template : angularParser throws an error)
errorHandler(error);
}
var buf = doc.getZip()
.generate({type: 'nodebuffer'});
// buf and then save to firebase storage.
buf.pipe(myFile.createWriteStream());
});
this error message is from pizzip, not directly from docxtemplater.
It happens when the argument given to pizzip is invalid.
In your case, you did :
var content = myFile.createReadStream();
var zip = new PizZip(content);
and the problem is that content is I think a specific object, a stream, and not something that has been finished reading.
You need to first resolve the content to a string or a buffer and then you can do :
var zip = new PizZip(buffer);

How to get an docx file generated in node saved to firebase storage

Hi I am quite new to docxtemplater but I absolutely love how it works. Right now I seem to be able to generate a new docx document as follows:
const functions = require('firebase-functions');
const admin = require('firebase-admin');
const {Storage} = require('#google-cloud/storage');
var PizZip = require('pizzip');
var Docxtemplater = require('docxtemplater');
admin.initializeApp();
const BUCKET = 'gs://myapp.appspot.com';
exports.test2 = functions.https.onCall((data, context) => {
// The error object contains additional information when logged with JSON.stringify (it contains a properties object containing all suberrors).
function replaceErrors(key, value) {
if (value instanceof Error) {
return Object.getOwnPropertyNames(value).reduce(function(error, key) {
error[key] = value[key];
return error;
}, {});
}
return value;
}
function errorHandler(error) {
console.log(JSON.stringify({error: error}, replaceErrors));
if (error.properties && error.properties.errors instanceof Array) {
const errorMessages = error.properties.errors.map(function (error) {
return error.properties.explanation;
}).join("\n");
console.log('errorMessages', errorMessages);
// errorMessages is a humanly readable message looking like this :
// 'The tag beginning with "foobar" is unopened'
}
throw error;
}
let file_name = 'example.docx';// this is the file saved in my firebase storage
const File = storage.bucket(BUCKET).file(file_name);
const read = File.createReadStream();
var buffers = [];
readable.on('data', (buffer) => {
buffers.push(buffer);
});
readable.on('end', () => {
var buffer = Buffer.concat(buffers);
var zip = new PizZip(buffer);
var doc;
try {
doc = new Docxtemplater(zip);
doc.setData({
first_name: 'Fred',
last_name: 'Flinstone',
phone: '0652455478',
description: 'Web app'
});
try {
doc.render();
doc.pipe(remoteFile2.createReadStream());
}
catch (error) {
errorHandler(error);
}
} catch(error) {
errorHandler(error);
}
});
});
My issue is that i keep getting an error that doc.pipe is not a function. I am quite new to nodejs but is there a way to have the newly generated doc after doc.render() to be saved directly to the firebase storage?
Taking a look at the type of doc, we find that is a Docxtemplater object and find that doc.pipe is not a function of that class. To get the file out of Docxtemplater, we need to use doc.getZip() to return the file (this will be either a JSZip v2 or Pizzip instance based on what we passed to the constructor). Now that we have the zip's object, we need to generate the binary data of the zip - which is done using generate({ type: 'nodebuffer' }) (to get a Node.JS Buffer containing the data). Unfortunately because the docxtemplater library doesn't support JSZip v3+, we can't make use of the generateNodeStream() method to get a stream to use with pipe().
With this buffer, we can either reupload it to Cloud Storage or send it back to the client that is calling the function.
The first option is relatively simple to implement:
import { v4 as uuidv4 } from 'uuid';
/* ... */
const contentBuffer = doc.getZip()
.generate({type: 'nodebuffer'});
const targetName = "compiled.docx";
const targetStorageRef = admin.storage().bucket()
.file(targetName);
await targetStorageRef.save(contentBuffer);
// send back the bucket-name pair to the caller
return { bucket: targetBucket, name: targetName };
However, to send back the file itself to the client isn't as easy because this involves switching to using a HTTP Event Function (functions.https.onRequest) because a Callable Cloud Function can only return JSON-compatible data. Here we have a middleware function that takes a callable's handler function but supports returning binary data to the client.
import * as functions from "firebase-functions";
import * as admin from "firebase-admin";
import corsInit from "cors";
admin.initializeApp();
const cors = corsInit({ origin: true }); // TODO: Tighten
function callableRequest(handler) {
if (!handler) {
throw new TypeError("handler is required");
}
return (req, res) => {
cors(req, res, (corsErr) => {
if (corsErr) {
console.error("Request rejected by CORS", corsErr);
res.status(412).json({ error: "cors", message: "origin rejected" });
return;
}
// for validateFirebaseIdToken, see https://github.com/firebase/functions-samples/blob/main/authorized-https-endpoint/functions/index.js
validateFirebaseIdToken(req, res, () => { // validateFirebaseIdToken won't pass errors to `next()`
try {
const data = req.body;
const context = {
auth: req.user ? { token: req.user, uid: req.user.uid } : null,
instanceIdToken: req.get("Firebase-Instance-ID-Token"); // this is used with FCM
rawRequest: req
};
let result: any = await handler(data, context);
if (result && typeof result === "object" && "buffer" in result) {
res.writeHead(200, [
["Content-Type", res.contentType],
["Content-Disposition", "attachment; filename=" + res.filename]
]);
res.end(result.buffer);
} else {
result = functions.https.encode(result);
res.status(200).send({ result });
}
} catch (err) {
if (!(err instanceof HttpsError)) {
// This doesn't count as an 'explicit' error.
console.error("Unhandled error", err);
err = new HttpsError("internal", "INTERNAL");
}
const { status } = err.httpErrorCode;
const body = { error: err.toJSON() };
res.status(status).send(body);
}
});
});
};
})
functions.https.onRequest(callableRequest(async (data, context) => {
/* ... */
const contentBuffer = doc.getZip()
.generate({type: "nodebuffer"});
const targetName = "compiled.docx";
return {
buffer: contentBuffer,
contentType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
filename: targetName
}
}));
In your current code, there are a number of odd segments where you have nested try-catch blocks and variables in different scopes. To help combat this, we can make use of File#download() that returns a Promise that resolves with the file contents in a Node.JS Buffer and File#save() that returns a Promise that resolves when the given Buffer is uploaded.
Rolling this together for reuploading to Cloud Storage gives:
// This code is based off the examples provided for docxtemplater
// Copyright (c) Edgar HIPP [Dual License: MIT/GPLv3]
import * as functions from "firebase-functions";
import * as admin from "firebase-admin";
import PizZip from "pizzip";
import Docxtemplater from "docxtemplater";
admin.initializeApp();
// The error object contains additional information when logged with JSON.stringify (it contains a properties object containing all suberrors).
function replaceErrors(key, value) {
if (value instanceof Error) {
return Object.getOwnPropertyNames(value).reduce(
function (error, key) {
error[key] = value[key];
return error;
},
{}
);
}
return value;
}
function errorHandler(error) {
console.log(JSON.stringify({ error: error }, replaceErrors));
if (error.properties && error.properties.errors instanceof Array) {
const errorMessages = error.properties.errors
.map(function (error) {
return error.properties.explanation;
})
.join("\n");
console.log("errorMessages", errorMessages);
// errorMessages is a humanly readable message looking like this :
// 'The tag beginning with "foobar" is unopened'
}
throw error;
}
exports.test2 = functions.https.onCall(async (data, context) => {
const file_name = "example.docx"; // this is the file saved in my firebase storage
const templateRef = await admin.storage().bucket()
.file(file_name);
const template_content = (await templateRef.download())[0];
const zip = new PizZip(template_content);
let doc;
try {
doc = new Docxtemplater(zip);
} catch (error) {
// Catch compilation errors (errors caused by the compilation of the template : misplaced tags)
errorHandler(error);
}
doc.setData({
first_name: "Fred",
last_name: "Flinstone",
phone: "0652455478",
description: "Web app",
});
try {
doc.render();
} catch (error) {
errorHandler(error);
}
const contentBuffer = doc.getZip().generate({ type: "nodebuffer" });
// do something with contentBuffer
// e.g. reupload to Cloud Storage
const targetStorageRef = admin.storage().bucket().file("compiled.docx");
await targetStorageRef.save(contentBuffer);
return { bucket: targetStorageRef.bucket.name, name: targetName };
});
In addition to returning a bucket-name pair to the caller, you may also consider returning an access URL to the caller. This could be a signed url that can last for up to 7 days, a download token URL (like getDownloadURL(), process described here) that can last until the token is revoked, Google Storage URI (gs://BUCKET_NAME/FILE_NAME) (not an access URL, but can be passed to a client SDK that can access it if the client passes storage security rules) or access it directly using its public URL (after the file has been marked public).
Based on the above code, you should be able to merge in returning the file directly yourself.

Read data from .xlsx file on S3 using Nodejs Lambda

I'm still new in NodeJs and AWS, so forgive me if this is a noob question.
I am trying to read the data from an excel file (.xlsx). The lambda function receives the extension of the file type.
Here is my code:
exports.handler = async (event, context, callback) => {
console.log('Received event:', JSON.stringify(event, null, 2));
if (event.fileExt === undefined) {
callback("400 Invalid Input");
}
let returnData = "";
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3();
switch(event.fileExt)
{
case "plain":
case "txt":
// Extract text
const params = {Bucket: 'filestation', Key: 'MyTXT.'+event.fileExt};
try {
await s3.getObject(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else{ // successful response
returnData = data.Body.toString('utf-8');
context.done(null, returnData);
}
}).promise();
} catch (error) {
console.log(error);
return;
}
break;
case "xls":
case "xlsx":
returnData = "Excel";
// Extract text
const params2 = {Bucket: 'filestation', Key: 'MyExcel.'+event.fileExt};
const readXlsxFile = require("read-excel-file/node");
try {
const doc = await s3.getObject(params2);
const parsedDoc = await readXlsxFile(doc);
console.log(parsedDoc)
} catch (err) {
console.log(err);
const message = `Error getting object.`;
console.log(message);
throw new Error(message);
}
break;
case "docx":
returnData = "Word doc";
// Extract text
break;
default:
callback("400 Invalid Operator");
break;
}
callback(null, returnData);
};
The textfile part works. But the xlsx part makes the function time out.
I did install the read-excel-file dependency and uploaded the zip so that I have access to it.
But the function times out with this message:
"errorMessage": "2020-11-02T13:06:50.948Z 120bfb48-f29c-4e3f-9507-fc88125515fd Task timed out after 3.01 seconds"
Any help would be appreciated! Thanks for your time.
using the xlsx npm library. here's how we did it.
assuming the file is under the root project path.
const xlsx = require('xlsx');
// read your excel file
let readFile = xlsx.readFile('file_example_XLSX_5000.xlsx')
// get first-sheet's name
let sheetName = readFile.SheetNames[0];
// convert sheets to JSON. Best if sheet has a headers specified.
console.log(xlsx.utils.sheet_to_json(readFile.Sheets[sheetName]));
You need to install xlsx (SheetJs) library into the project:
npm install xlsx
and then import the "read" function into the lambda, get the s3 object's body and send to xlsx like this:
const { read } = require('sheetjs-style');
const aws = require('aws-sdk');
const s3 = new aws.S3({ apiVersion: '2006-03-01' });
exports.handler = async (event) => {
const bucketName = 'excel-files';
const fileKey = 'Demo Data.xlsx';
// Simple GetObject
let file = await s3.getObject({Bucket: bucketName, Key: fileKey}).promise();
const wb = read(file.Body);
const response = {
statusCode: 200,
body: JSON.stringify({
read: wb.Sheets,
}),
};
return response;
};
(of course, you can receive the bucket and filekey from parameters if you send them...)
Very Important: Use the READ (not the readFile) function and send the Body property (with capital "B") as a paremeter
I changed the timeout to 20 seconds and it works. Only one issue remains: const parsedDoc = await readXlsxFile(doc); wants to receive a string (filepath) and not a file.
Solved by using xlsx NPM library. Using a stream and giving it buffers.

minizip-asm extract function takes forever to execute

I am trying to fetch an AES encrypted password protected zip file from a google storage and extract a csv file from it. I am using google cloud functions for it with node 6.
I've tried using minizip-asm.js library to extract the file. It works intermittently. I am a newbie when it comes to node so would really appreciate some help :).
Here's the relevant piece of code which might help. Could someone help me figure out what's going wrong here.
exports.processFile = (event, callback) => {
const file = event.data;
const filename = file.name;
const projectId = "abc1234";
const bucketName = "abc_reports";
const Storage = require('#google-cloud/storage');
const storage = Storage({
projectId: projectId
});
const folder = storage.bucket(bucketName);
const minizip = require('minizip-asm.js');
if (file.metageneration === '1' && filename.match(".zip") != null) {
// metageneration attribute is updated on metadata changes.
// on create value is 1
console.log(`File ${file.name} uploaded.`);
folder.file(filename).download().then(function(data) {
console.log('Download of file complete');
//create csv file
var csvName = filename.split(".zip")[0] + ".csv"
var mz = new minizip(data[0]);
console.log(data[0]);
console.log(mz.list());
var extract = mz.extract(mz.list()[0].filepath,{
password: 'ABC#123'
})
console.log("extracted");
//write unzipped contents to file
folder.file(csvName).save(extract, function(err) {
if (!err) {
console.log("unzipped csv");
}
else console.log("Error in saving csv : "+err);
});
});
});
}
callback(null, 'Success!');
};
Thanks for the help.

Resources