Delete a Document with all Subcollections and Nested Subcollections in Firestore - node.js

How can you delete a Document with all it's collections and nested subcollections? (inside the functions environment)
In the RTDB you can ref.child('../someNode).setValue(null) and that completes the desired behavior.
I can think of two ways you could achieve the desired delete behavior, both with tremendously ghastly drawbacks.
Create a 'Super' function that will spider every document and delete them in a batch.
This function would be complicated, brittle to changes, and might take a lengthy execution time.
Add 'onDelete' triggers for each Document type, and make it delete any direct subcollections. You'll call delete on the root document, and the deletion calls will propagate down the 'tree'. This is sluggish, scales atrociously and is costly due to the colossal load of function executions.
Imagine you would have to delete a 'GROUP' and all it's children. It would be deeply chaotic with #1 and pricey with #2 (1 function call per doc)
groups > GROUP > projects > PROJECT > files > FILE > assets > ASSET
> urls > URL
> members > MEMBER
> questions > QUESTION > answers > ANSWER > replies > REPLY
> comments > COMMENT
> resources > RESOURCE > submissions > SUBMISSION
> requests > REQUEST
Is there a superior/favored/cleaner way to delete a document and all it's nested subcollections?
It ought to be possible considering you can do it from the console.

according to firebase documentation:
https://firebase.google.com/docs/firestore/solutions/delete-collections
Deleting collection with nested subcollections might be done easy and neat with node-JS on the server side.
const client = require('firebase-tools');
await client.firestore
.delete(collectionPath, {
project: process.env.GCLOUD_PROJECT,
recursive: true,
yes: true
});

Unfortunately, your analysis is spot on and indeed this use case does require a lot of ceremony. According to official documentation, there is no support for deep deletes in a single shot in firestore neither via client libraries nor rest-api nor the cli tool.
The cli is open sourced and its implementation lives here: https://github.com/firebase/firebase-tools/blob/master/src/firestore/delete.js. They basically implemented option 1. you described in your question, so you can take some inspiration from there.
Both options 1. and 2. are far from ideal situation and to make your solution 100% reliable you will need to keep a persistent queue with deletion tasks, as any error in the long running procedure will leave your system in some ill-defined state.
I would discourage to go with raw option 2. as recursive cloud function calls may very easily went wrong - for example, hitting max. limits.
In case the link changed, below the full source of https://github.com/firebase/firebase-tools/blob/master/src/firestore/delete.js:
"use strict";
var clc = require("cli-color");
var ProgressBar = require("progress");
var api = require("../api");
var firestore = require("../gcp/firestore");
var FirebaseError = require("../error");
var logger = require("../logger");
var utils = require("../utils");
/**
* Construct a new Firestore delete operation.
*
* #constructor
* #param {string} project the Firestore project ID.
* #param {string} path path to a document or collection.
* #param {boolean} options.recursive true if the delete should be recursive.
* #param {boolean} options.shallow true if the delete should be shallow (non-recursive).
* #param {boolean} options.allCollections true if the delete should universally remove all collections and docs.
*/
function FirestoreDelete(project, path, options) {
this.project = project;
this.path = path;
this.recursive = Boolean(options.recursive);
this.shallow = Boolean(options.shallow);
this.allCollections = Boolean(options.allCollections);
// Remove any leading or trailing slashes from the path
if (this.path) {
this.path = this.path.replace(/(^\/+|\/+$)/g, "");
}
this.isDocumentPath = this._isDocumentPath(this.path);
this.isCollectionPath = this._isCollectionPath(this.path);
this.allDescendants = this.recursive;
this.parent = "projects/" + project + "/databases/(default)/documents";
// When --all-collections is passed any other flags or arguments are ignored
if (!options.allCollections) {
this._validateOptions();
}
}
/**
* Validate all options, throwing an exception for any fatal errors.
*/
FirestoreDelete.prototype._validateOptions = function() {
if (this.recursive && this.shallow) {
throw new FirebaseError("Cannot pass recursive and shallow options together.");
}
if (this.isCollectionPath && !this.recursive && !this.shallow) {
throw new FirebaseError("Must pass recursive or shallow option when deleting a collection.");
}
var pieces = this.path.split("/");
if (pieces.length === 0) {
throw new FirebaseError("Path length must be greater than zero.");
}
var hasEmptySegment = pieces.some(function(piece) {
return piece.length === 0;
});
if (hasEmptySegment) {
throw new FirebaseError("Path must not have any empty segments.");
}
};
/**
* Determine if a path points to a document.
*
* #param {string} path a path to a Firestore document or collection.
* #return {boolean} true if the path points to a document, false
* if it points to a collection.
*/
FirestoreDelete.prototype._isDocumentPath = function(path) {
if (!path) {
return false;
}
var pieces = path.split("/");
return pieces.length % 2 === 0;
};
/**
* Determine if a path points to a collection.
*
* #param {string} path a path to a Firestore document or collection.
* #return {boolean} true if the path points to a collection, false
* if it points to a document.
*/
FirestoreDelete.prototype._isCollectionPath = function(path) {
if (!path) {
return false;
}
return !this._isDocumentPath(path);
};
/**
* Construct a StructuredQuery to find descendant documents of a collection.
*
* See:
* https://firebase.google.com/docs/firestore/reference/rest/v1beta1/StructuredQuery
*
* #param {boolean} allDescendants true if subcollections should be included.
* #param {number} batchSize maximum number of documents to target (limit).
* #param {string=} startAfter document name to start after (optional).
* #return {object} a StructuredQuery.
*/
FirestoreDelete.prototype._collectionDescendantsQuery = function(
allDescendants,
batchSize,
startAfter
) {
var nullChar = String.fromCharCode(0);
var startAt = this.parent + "/" + this.path + "/" + nullChar;
var endAt = this.parent + "/" + this.path + nullChar + "/" + nullChar;
var where = {
compositeFilter: {
op: "AND",
filters: [
{
fieldFilter: {
field: {
fieldPath: "__name__",
},
op: "GREATER_THAN_OR_EQUAL",
value: {
referenceValue: startAt,
},
},
},
{
fieldFilter: {
field: {
fieldPath: "__name__",
},
op: "LESS_THAN",
value: {
referenceValue: endAt,
},
},
},
],
},
};
var query = {
structuredQuery: {
where: where,
limit: batchSize,
from: [
{
allDescendants: allDescendants,
},
],
select: {
fields: [{ fieldPath: "__name__" }],
},
orderBy: [{ field: { fieldPath: "__name__" } }],
},
};
if (startAfter) {
query.structuredQuery.startAt = {
values: [{ referenceValue: startAfter }],
before: false,
};
}
return query;
};
/**
* Construct a StructuredQuery to find descendant documents of a document.
* The document itself will not be included
* among the results.
*
* See:
* https://firebase.google.com/docs/firestore/reference/rest/v1beta1/StructuredQuery
*
* #param {boolean} allDescendants true if subcollections should be included.
* #param {number} batchSize maximum number of documents to target (limit).
* #param {string=} startAfter document name to start after (optional).
* #return {object} a StructuredQuery.
*/
FirestoreDelete.prototype._docDescendantsQuery = function(allDescendants, batchSize, startAfter) {
var query = {
structuredQuery: {
limit: batchSize,
from: [
{
allDescendants: allDescendants,
},
],
select: {
fields: [{ fieldPath: "__name__" }],
},
orderBy: [{ field: { fieldPath: "__name__" } }],
},
};
if (startAfter) {
query.structuredQuery.startAt = {
values: [{ referenceValue: startAfter }],
before: false,
};
}
return query;
};
/**
* Query for a batch of 'descendants' of a given path.
*
* For document format see:
* https://firebase.google.com/docs/firestore/reference/rest/v1beta1/Document
*
* #param {boolean} allDescendants true if subcollections should be included,
* #param {number} batchSize the maximum size of the batch.
* #param {string=} startAfter the name of the document to start after (optional).
* #return {Promise<object[]>} a promise for an array of documents.
*/
FirestoreDelete.prototype._getDescendantBatch = function(allDescendants, batchSize, startAfter) {
var url;
var body;
if (this.isDocumentPath) {
url = this.parent + "/" + this.path + ":runQuery";
body = this._docDescendantsQuery(allDescendants, batchSize, startAfter);
} else {
url = this.parent + ":runQuery";
body = this._collectionDescendantsQuery(allDescendants, batchSize, startAfter);
}
return api
.request("POST", "/v1beta1/" + url, {
auth: true,
data: body,
origin: api.firestoreOrigin,
})
.then(function(res) {
// Return the 'document' property for each element in the response,
// where it exists.
return res.body
.filter(function(x) {
return x.document;
})
.map(function(x) {
return x.document;
});
});
};
/**
* Progress bar shared by the class.
*/
FirestoreDelete.progressBar = new ProgressBar("Deleted :current docs (:rate docs/s)", {
total: Number.MAX_SAFE_INTEGER,
});
/**
* Repeatedly query for descendants of a path and delete them in batches
* until no documents remain.
*
* #return {Promise} a promise for the entire operation.
*/
FirestoreDelete.prototype._recursiveBatchDelete = function() {
var self = this;
// Tunable deletion parameters
var readBatchSize = 7500;
var deleteBatchSize = 250;
var maxPendingDeletes = 15;
var maxQueueSize = deleteBatchSize * maxPendingDeletes * 2;
// All temporary variables for the deletion queue.
var queue = [];
var numPendingDeletes = 0;
var pagesRemaining = true;
var pageIncoming = false;
var lastDocName;
var failures = [];
var retried = {};
var queueLoop = function() {
if (queue.length == 0 && numPendingDeletes == 0 && !pagesRemaining) {
return true;
}
if (failures.length > 0) {
logger.debug("Found " + failures.length + " failed deletes, failing.");
return true;
}
if (queue.length <= maxQueueSize && pagesRemaining && !pageIncoming) {
pageIncoming = true;
self
._getDescendantBatch(self.allDescendants, readBatchSize, lastDocName)
.then(function(docs) {
pageIncoming = false;
if (docs.length == 0) {
pagesRemaining = false;
return;
}
queue = queue.concat(docs);
lastDocName = docs[docs.length - 1].name;
})
.catch(function(e) {
logger.debug("Failed to fetch page after " + lastDocName, e);
pageIncoming = false;
});
}
if (numPendingDeletes > maxPendingDeletes) {
return false;
}
if (queue.length == 0) {
return false;
}
var toDelete = [];
var numToDelete = Math.min(deleteBatchSize, queue.length);
for (var i = 0; i < numToDelete; i++) {
toDelete.push(queue.shift());
}
numPendingDeletes++;
firestore
.deleteDocuments(self.project, toDelete)
.then(function(numDeleted) {
FirestoreDelete.progressBar.tick(numDeleted);
numPendingDeletes--;
})
.catch(function(e) {
// For server errors, retry if the document has not yet been retried.
if (e.status >= 500 && e.status < 600) {
logger.debug("Server error deleting doc batch", e);
// Retry each doc up to one time
toDelete.forEach(function(doc) {
if (retried[doc.name]) {
logger.debug("Failed to delete doc " + doc.name + " multiple times.");
failures.push(doc.name);
} else {
retried[doc.name] = true;
queue.push(doc);
}
});
} else {
logger.debug("Fatal error deleting docs ", e);
failures = failures.concat(toDelete);
}
numPendingDeletes--;
});
return false;
};
return new Promise(function(resolve, reject) {
var intervalId = setInterval(function() {
if (queueLoop()) {
clearInterval(intervalId);
if (failures.length == 0) {
resolve();
} else {
reject("Failed to delete documents " + failures);
}
}
}, 0);
});
};
/**
* Delete everything under a given path. If the path represents
* a document the document is deleted and then all descendants
* are deleted.
*
* #return {Promise} a promise for the entire operation.
*/
FirestoreDelete.prototype._deletePath = function() {
var self = this;
var initialDelete;
if (this.isDocumentPath) {
var doc = { name: this.parent + "/" + this.path };
initialDelete = firestore.deleteDocument(doc).catch(function(err) {
logger.debug("deletePath:initialDelete:error", err);
if (self.allDescendants) {
// On a recursive delete, we are insensitive to
// failures of the initial delete
return Promise.resolve();
}
// For a shallow delete, this error is fatal.
return utils.reject("Unable to delete " + clc.cyan(this.path));
});
} else {
initialDelete = Promise.resolve();
}
return initialDelete.then(function() {
return self._recursiveBatchDelete();
});
};
/**
* Delete an entire database by finding and deleting each collection.
*
* #return {Promise} a promise for all of the operations combined.
*/
FirestoreDelete.prototype.deleteDatabase = function() {
var self = this;
return firestore
.listCollectionIds(this.project)
.catch(function(err) {
logger.debug("deleteDatabase:listCollectionIds:error", err);
return utils.reject("Unable to list collection IDs");
})
.then(function(collectionIds) {
var promises = [];
logger.info("Deleting the following collections: " + clc.cyan(collectionIds.join(", ")));
for (var i = 0; i < collectionIds.length; i++) {
var collectionId = collectionIds[i];
var deleteOp = new FirestoreDelete(self.project, collectionId, {
recursive: true,
});
promises.push(deleteOp.execute());
}
return Promise.all(promises);
});
};
/**
* Check if a path has any children. Useful for determining
* if deleting a path will affect more than one document.
*
* #return {Promise<boolean>} a promise that retruns true if the path has
* children and false otherwise.
*/
FirestoreDelete.prototype.checkHasChildren = function() {
return this._getDescendantBatch(true, 1).then(function(docs) {
return docs.length > 0;
});
};
/**
* Run the delete operation.
*/
FirestoreDelete.prototype.execute = function() {
var verifyRecurseSafe;
if (this.isDocumentPath && !this.recursive && !this.shallow) {
verifyRecurseSafe = this.checkHasChildren().then(function(multiple) {
if (multiple) {
return utils.reject("Document has children, must specify -r or --shallow.", { exit: 1 });
}
});
} else {
verifyRecurseSafe = Promise.resolve();
}
var self = this;
return verifyRecurseSafe.then(function() {
return self._deletePath();
});
};
module.exports = FirestoreDelete;

For those who don't want or can't use cloud functions, I found a recursiveDelete function in the admin sdk:
https://googleapis.dev/nodejs/firestore/latest/Firestore.html#recursiveDelete
// Recursively delete a reference and log the references of failures.
const bulkWriter = firestore.bulkWriter();
bulkWriter
.onWriteError((error) => {
if (error.failedAttempts < MAX_RETRY_ATTEMPTS) {
return true;
} else {
console.log('Failed write at document: ', error.documentRef.path);
return false;
}
});
await firestore.recursiveDelete(docRef, bulkWriter);

i don't know how much helpful for you but test it and compare the execution time which i get use it from fire store doc
/** Delete a collection in batches to avoid out-of-memory errors.
* Batch size may be tuned based on document size (atmost 1MB) and application requirements.
*/
void deleteCollection(CollectionReference collection, int batchSize) {
try {
// retrieve a small batch of documents to avoid out-of-memory errors
ApiFuture<QuerySnapshot> future = collection.limit(batchSize).get();
int deleted = 0;
// future.get() blocks on document retrieval
List<QueryDocumentSnapshot> documents = future.get().getDocuments();
for (QueryDocumentSnapshot document : documents) {
document.getReference().delete();
++deleted;
}
if (deleted >= batchSize) {
// retrieve and delete another batch
deleteCollection(collection, batchSize);
}
} catch (Exception e) {
System.err.println("Error deleting collection : " + e.getMessage());
}
}

As mentioned above, you need to write good bit of code for this. For each document that is to be deleted you need to check if it has one or more collections. If it does, then you need to queue those up for deletion too. I wrote the code below to do this. It's not tested to be scalable to large data sets, which is fine for me as I'm using it to clean up after small scale integration tests. If you need something more scalable, feel free to take this as a starting point and play around with batching more.
class FirebaseDeleter {
constructor(database, collections) {
this._database = database;
this._pendingCollections = [];
}
run() {
return new Promise((resolve, reject) => {
this._callback = resolve;
this._database.getCollections().then(collections => {
this._pendingCollections = collections;
this._processNext();
});
});
}
_processNext() {
const collections = this._pendingCollections;
this._pendingCollections = [];
const promises = collections.map(collection => {
return this.deleteCollection(collection, 10000);
});
Promise.all(promises).then(() => {
if (this._pendingCollections.length == 0) {
this._callback();
} else {
process.nextTick(() => {
this._processNext();
});
}
});
}
deleteCollection(collectionRef, batchSize) {
var query = collectionRef;
return new Promise((resolve, reject) => {
this.deleteQueryBatch(query, batchSize, resolve, reject);
});
}
deleteQueryBatch(query, batchSize, resolve, reject) {
query
.get()
.then(snapshot => {
// When there are no documents left, we are done
if (snapshot.size == 0) {
return 0;
}
// Delete documents in a batch
var batch = this._database.batch();
const collectionPromises = [];
snapshot.docs.forEach(doc => {
collectionPromises.push(
doc.ref.getCollections().then(collections => {
collections.forEach(collection => {
this._pendingCollections.push(collection);
});
})
);
batch.delete(doc.ref);
});
// Wait until we know if all the documents have collections before deleting them.
return Promise.all(collectionPromises).then(() => {
return batch.commit().then(() => {
return snapshot.size;
});
});
})
.then(numDeleted => {
if (numDeleted === 0) {
resolve();
return;
}
// Recurse on the next process tick, to avoid
// exploding the stack.
process.nextTick(() => {
this.deleteQueryBatch(query, batchSize, resolve, reject);
});
})
.catch(reject);
}
}

Solution using Node.js Admin SDK
export const deleteDocument = async (doc: FirebaseFirestore.DocumentReference) => {
const collections = await doc.listCollections()
await Promise.all(collections.map(collection => deleteCollection(collection)))
await doc.delete()
}
export const deleteCollection = async (collection: FirebaseFirestore.CollectionReference) => {
const query = collection.limit(100)
while (true) {
const snap = await query.get()
if (snap.empty) {
return
}
await Promise.all(snap.docs.map(doc => deleteDocument(doc.ref)))
}
}

There is now a simple way delete a document and all of its subcollections using NodeJS.
This was made available in nodejs-firestore version v4.11.0.
From the docs:
recursiveDelete()
Recursively deletes all documents and subcollections at and under the specified level.
import * as admin from 'firebase-admin'
const ref = admin.firestore().doc('my_document')
admin.firestore().recursiveDelete(ref)

You can write a handler which will recursive delete all nested descendants when triggers onDelete Firestore event.
Example of handler:
const deleteDocumentWithDescendants = async (documentSnap: FirebaseFirestore.QueryDocumentSnapshot) => {
return documentSnap.ref.listCollections().then((subCollections) => {
subCollections.forEach((subCollection) => {
return subCollection.get().then((snap) => {
snap.forEach((doc) => {
doc.ref.delete();
deleteDocumentWithDescendants(doc);
});
});
});
});
};
// On any document delete
export const onDocumentDelete = async (documentSnap: FirebaseFirestore.QueryDocumentSnapshot) => {
await deleteDocumentWithDescendants(documentSnap);
};
Tie it up with firestore event:
exports.onDeleteDocument = functions.firestore.document('{collectionId}/{docId}')
.onDelete(onDocumentDelete);

// You can add all the collection hierarchy to object
private collectionsHierarchy = {
groups: [
[
'groups',
'projects',
'files',
'assets',
'urls',
'members'
]
]
};
async deleteDocument(rootDocument: string) {
// if (!rootDocument.startsWith(`groups/${this.groupId()}`)) {
// rootDocument = `groups/${this.groupId()}/${rootDocument}`;
// }
const batchSize: number = 100;
let root = await this.db
.doc(rootDocument)
.get()
.toPromise();
if (!root.exists) {
return;
}
const segments = rootDocument.split('/');
const documentCollection = segments[segments.length - 2];
const allHierarchies = this.collectionsHierarchy[documentCollection];
for (let i = 0; i < allHierarchies.length; i = i + 1) {
const hierarchy = allHierarchies[i];
const collectionIndex = hierarchy.indexOf(documentCollection) + 1;
const nextCollections: [] = hierarchy.slice(collectionIndex);
const stack = [`${root.ref.path}/${nextCollections.shift()}`];
while (stack.length) {
const path = stack.pop();
const collectionRef = this.db.firestore.collection(path);
const query = collectionRef.orderBy('__name__').limit(batchSize);
let deletedIems = await this.deleteQueryBatch(query, batchSize);
const nextCollection = nextCollections.shift();
deletedIems = deletedIems.map(di => `${di}/${nextCollection}`);
stack.push(...deletedIems);
}
}
await root.ref.delete();
}
private async deleteQueryBatch(
query: firebase.firestore.Query,
batchSize: number
) {
let deletedItems: string[] = [];
let snapshot = await query.get();
if (snapshot.size === 0) {
return deletedItems;
}
const batch = this.db.firestore.batch();
snapshot.docs.forEach(doc => {
deletedItems.push(doc.ref.path);
batch.delete(doc.ref);
});
await batch.commit();
if (snapshot.size === 0) {
return deletedItems;
}
const result = await this.deleteQueryBatch(query, batchSize);
return [...deletedItems, ...result];
}

Another solution using Node.js Admin SDK with Batch.
const traverseDocumentRecursively = async (
docRef: FirebaseFirestore.DocumentReference<FirebaseFirestore.DocumentData>,
accumulatedRefs: FirebaseFirestore.DocumentReference<FirebaseFirestore.DocumentData>[],
) => {
const collections = await docRef.listCollections();
if (collections.length > 0) {
for (const collection of collections) {
const snapshot = await collection.get();
for (const doc of snapshot.docs) {
accumulatedRefs.push(doc.ref);
await traverseDocumentRecursively(doc.ref, accumulatedRefs);
}
}
}
};
import { chunk } from 'lodash';
const doc = admin.firestore().collection('users').doc('001');
const accumulatedRefs: FirebaseFirestore.DocumentReference<FirebaseFirestore.DocumentData>[] = [];
await traverseDocumentRecursively(doc, accumulatedRefs);
await Promise.all(
// Each transaction or batch of writes can write to a maximum of 500 documents
chunk(accumulatedRefs, 500).map((chunkedRefs) => {
const batch = admin.firestore().batch();
for (const ref of chunkedRefs) {
batch.delete(ref);
}
return batch.commit();
}),
);

Not sure if this is helpful for anyone here, but I am frequently facing the error "Fatal error deleting docs <list of docs>" when using firebase-tools.firestore.delete method (firebase-tools version 9.22.0).
I am currently handling these deletion failures using the returned error message in order to avoid rewriting the code cited at Oleg Bondarenko's answer. It uses admin.firestore to effectively delete the failed docs.
It's a poor solution since it relies on the error message, but at least it doesn't force us to copy the whole FirestoreDelete code to modify just a few lines of it:
firebase_tools.firestore
.delete(path, {
project: JSON.parse(process.env.FIREBASE_CONFIG!).projectId,
recursive: true,
yes: true,
token: getToken(),
})
.catch((err: Error) => {
if (err.name == "FirebaseError") {
// If recursive delete fails to delete some of the documents,
// parse the failures from the error message and delete it manually
const failedDeletingDocs = err.message.match(
/.*Fatal error deleting docs ([^\.]+)/
);
if (failedDeletingDocs) {
const docs = failedDeletingDocs[1].split(", ");
const docRefs = docs.map((doc) =>
firestore.doc(doc.slice(doc.search(/\(default\)\/documents/) + 19))
);
firestore
.runTransaction(async (t) => {
docRefs.forEach((doc) => t.delete(doc));
return docs;
})
.then((docs) =>
console.log(
"Succesfully deleted docs after failing: " + docs.join(", ")
)
)
.catch((err) => console.error(err));
}
}
});

If you are looking to delete user data, a solution to consider in 2022 is the Delete User Data Firebase Extension.
Once this is active, you can simply delete the user from Firebase Auth to trigger the recursive deletion of the user documents:
import admin from "firebase-admin";
admin.auth().deleteUser(userId);

You can call firebase.firestore().doc("whatever").set() and that will delete everything in that document.
The only way .set does not erase everything is if you set the merge flag to true.
See Firestore Documentation on Add Data
var cityRef = db.collection('cities').doc('BJ');
var setWithMerge = cityRef.set({
capital: true
}, { merge: true });

Related

Node ExcelJS writing file and saving to S3 heap out of memory issue

I have a download center module where I am generating multiple excel files S3 links at the same against the passed filters from frontend using mongodb aggregations. It all works fine however it is taking in a lot of memory and sometimes report generation crashes due to javascript heap out of memory error. I need to know how I can resolve this heap out of memory error. I am using latest version of exceljs(4.3.0) and using LoopbackJS 3(deprecated) as Node framework. Here are some code snippets which might help you guys with figuring out the issue.
const excelReport = new ExcelReport(report.name);
excelReport.setHeaders(headers);
const aggregation = executeEventBaseAggregation(
"RiderDeliveryBatch",
pipeline
);
aggregation.event.on("data", function (x) {
let rows = {};
rows["Date Created At"] = utils.localDateTime(x.parcel.createdAt);
rows["Pickup Date"] = utils.localDateTime(x.parcelStatuses.find(
(y) =>
y.statusRepositoryId.toString() ===
statuses.find((val) => val.key === "parcel-picked-up").id.toString()
).createdAt);
rows["CN"] = x.parcel._id;
rows["Vendor Order ID"] = x.parcel.vendorParcelId;
rows["Vendor Name"] = x.vendor.name;
rows["Consignee First Name"] = x.customerData.firstName;
rows["Consignee Last Name"] = x.customerData.lastName;
rows["Address"] = x.customerData.address;
rows["Origin City"] = getCities(x.parcel.originCityId);
rows["Destination City"] = cities.find(
(y) => y.id.toString() === x.customerData.cityId.toString()
).name;
rows["Destination subCity"] = x.customerData["subCity"] ? x.customerData["subCity"] : "";
rows["COD"] = x.parcel.amount;
rows["Attempts"] = confirmDispatched.length;
excelReport.addRow(rows);
});
aggregation.event.on("error", function (e) {
console.log("aggregation error");
updateDownloadCenterReport(downloadCenterReport, {
status: "error",
e,
});
});
aggregation.event.on("done", function () {
console.log("aggregation done");
excelReport
.upload()
.then(async (link) => {
await updateDownloadCenterReport(downloadCenterReport, {
excelLink: link,
status: "success",
pdfLink: "",
});
longRunningTaskEnd();
})
.catch(async (error) => {
await updateDownloadCenterReport(downloadCenterReport, {
status: "error",
error,
});
longRunningTaskEnd();
});
});
ExcelJS util module
class ExcelReport {
/**
* Initializes excel sheet with provided worksheet name
* #param {string} worksheetName
*/
constructor(worksheetName) {
this.workbook = new Excel.Workbook();
this.worksheet = this.workbook.addWorksheet(worksheetName);
}
/**
*
* #param {{header:string;key:string;width:number}[]} headers
*/
setHeaders(headers) {
this.worksheet.columns = headers;
return this;
}
/**
*
* #param {{}} row
*/
addRow(row) {
this.worksheet.addRow(row);
return this;
}
/**
*
* #param {{}[]} rows
*/
addRows(rows) {
this.worksheet.addRows(rows);
return this;
}
/**
* uploads the file to AWS
* #returns {Promise<string>}
*/
async upload() {
const aws = new FileUploadService(
awsConfig.accessKeyId,
awsConfig.secretAccessKey,
awsConfig.bucketName
);
const bufferFile = await this.workbook.xlsx.writeBuffer();
const link = await aws.uploadBuffer(
"download-center",
bufferFile,
"xlsx",
this.worksheet.name
);
return link;
}
}

Puppeteer create PDF files from HTML data hangs Windows 10 system

I created an App that processes students results by extracting data from multiple excel workbooks. The problem is that using Puppeteer to generate the PDF files, throws the system into a loop till it hangs the system.
Actually, I have tested same codes below using PhantomJs which is bundled as pdf-creator-node, and was able to generate 150 PDF files comfortably in 3 minutes. The only challenge I dumped PhantomJs is that all the styling in the CSS file was not included, even when I inserted it as an inline style in the header, suing replace function of JS. Another, is that PhantomJs is no longer in active development. I searched the web, and found out that only Puppeteer is the valid solution with active development and support too.
I tried using page.close() at the end of pdfCreator() which is in a loop, and browser.close() at the end of pdfGenerator(). What I am doing wrong?
Here below are the codes in the server.js and PdfGenerator.js files, with a sample of the ERROR, and screenshot of my Task Manager after the system crawled out of hanging state. For HTML generation, I used Mustache. I excluded some lines of codes in server.js because the total character count was over 60k.
server.js
// [codes were removed here]
if(getCode == 'compute-result') {
// declare variable
let setData = null;
let setTitle = 'Results Computation...';
let setArgs = getArgs;
// dataFromFile = ReadFile(pathCodeTextFile);
// setArgs = Number(dataFromFile);
setCode = 'compute-result';
let setView = [];
let setNext = true;
let countTerms = [];
// if(getArg > 0) {
// Final Result computation
const getJson = ReadFile(pathJsonResults);
// const getCtrl = ReadFile(pathJsonCtrl);
const getResultObject = JSON.parse(getJson);
getResult = getResultObject;
const totalResults = getResult.firstTerm.length + getResult.secondTerm.length + getResult.thirdTerm.length;
if(setView.length < 1 && getResult != null) {
setData = 'PDFs for Students Results initiating...';
setView.unshift('Reading saved data...');
client.emit('query', {data: setData, title: setTitle, code: setCode, next: setNext, args: null, view: JSON.stringify(setView)});
}
Sleep(2000).then(() => {
if(getResult != null) {
setData = 'Students Results will be ready in a moment';
client.emit('query', {data: setData, title: setTitle, code: setCode, next: setNext, args: setArgs, view: JSON.stringify(setView)});
}
const wacthFiles = (file, className, termName, sessionName, completed, pdfList) => {
try {
if(typeof file == 'string' && !FileExists(pathJsonPdfList)) {
if(pdfList.length < 2){
setData = 'Saving PDFs to downladable files...';
}
if(className != null && termName != null && sessionName != null) {
setTitle = `${pdfList.length} Result PDF${pdfList.length > 1?'s':''}...`;
setView.unshift(file);
if(!countTerms.includes(termName)) {
countTerms.push(termName)
}
// setCode = -1000 - pdfList.length;
// console.log('PDF PROGRESS: ', `${pdfList.length} Result PDF${pdfList.length > 1?'s':''}... ${setCode}`);
// when all PDFs are created
if(completed) {
setTitle = setTitle.replace('...', ' [completed]');
setData = 'Result Download button is Active. You may click it now.';
setView.unshift('=== PDF GENERATION COMPLETED ===');
setView.unshift(`A total of ${pdfList.length} students' Results were generated`);
WriteFile(pathJsonPdfList, JSON.stringify(pdfList));
// set donwload button active
setCode = Number(codeTextFilePdfCompleted);
setNext = false;
getResult = null;
let termString = countTerms.toString();
termString = ReplaceAll(termString, '-term', '');
termString = ReplaceAll(termString, ',', '-');
const addTxt = `${className} _${termString} Term${countTerms.length>1?'s':''} (${sessionName})`;
WriteFile(pathCodeTextFile, addTxt);
// console.log('======== PDF GENERATION ENDS ================');
} else {
setCode = -1 * pdfList.length;
}
client.emit('query', {data: setData, title: setTitle, code: setCode, next: setNext, args: setArgs, view: JSON.stringify(setView)});
}
}
} catch (error) {
console.log('ERROR ON WATCHER: ', error);
}
}
if(!FileExists(pathJsonPdfList) && getResult !== null) {
PdfGenerator(getResult, wacthFiles);
}
// Watcher(pathWatchResults, setCode, wacthDir, 10000);
});
// }
}
}
} catch (error) {
})
client.on('disconnect', () => {
console.log('SERVER: Disconnected');
});
server.listen(portApi, () =>{
console.log('Server listens on port 8881')
});
// serve static files
app.use(express.static(pathPublic));
// [codes were removed here]
PdfGenerator.js
The problem lies in these functions: PdfGenerator & createPdf
'use strict';
process.setMaxListeners(Infinity) // fix for Puppeteer MaxListenerExceededWarning
const Puppeteer = require('puppeteer')
const {HtmlGenerator} = require('../components/HtmlGenerator')
const {WriteFile, FileExists, RandomNumber, RoundNumber, IsNumberFraction, ReadFile} = require('../components/Functions')
if (process.env.NODE_ENV !== 'production') {
require('dotenv').config();
}
const pathFirstTermResults = process.env.DIR_FIRST_TERM_RESULTS;
const pathSecondTermResults = process.env.DIR_SECOND_TERM_RESULTS;
const pathThirdTermResults = process.env.DIR_THIRD_TERM_RESULTS;
const publicDir = process.env.DIR_PUBLIC;
const cssFile = process.env.PATH_CSS_FILENAME;
const pathCssRaw = __dirname + '\\' + publicDir + '\\' + cssFile;
const pathCss = pathCssRaw.replace(`\\uploads`, '');
const tagCssReplace = process.env.TAG_CSS_REPLACE;
let jsonDir = process.env.PATH_JSON;
jsonDir = jsonDir.split('/').pop();
let htmlDir = process.env.DIR_HTML;
htmlDir = __dirname + '\\' + htmlDir.split('/').pop();
const htmlType1 = htmlDir + '\\' + process.env.HTML_TYPE1;
const htmlType2 = htmlDir + '\\' + process.env.HTML_TYPE2;
const htmlType3 = htmlDir + '\\' + process.env.HTML_TYPE3;
const pathJsonPdfList = './' + jsonDir + '/' + process.env.JSON_PDF_LIST_FILENAME;
const pathJsonPdfContent = __dirname + '\\' + jsonDir + '\\' + process.env.JSON_PDF_CONTENT;
const firstTermDir = 'first-term';
const secondTermDir = 'second-term';
const thirdTermDir = 'third-term';
let cumulativeFirstTermTotalList = {};
let cumulativeSecondTermTotalList = {};
let firstTermOnce = true;
let secondTermOnce = true;
let thirdTermOnce = true;
let isActive = false;
const getPath = (p, f) => {
let dir = pathFirstTermResults;
switch (p) {
case firstTermDir:
dir = pathFirstTermResults;
break;
case secondTermDir:
dir = pathSecondTermResults;
break;
case thirdTermDir:
dir = pathThirdTermResults;
break;
default:
break;
}
return dir + f
}
const resolution = {
x: 1920,
y: 1080
}
const args = [
'--disable-gpu',
`--window-size=${resolution.x},${resolution.y}`,
'--no-sandbox',
]
const createPdf = (page, content, templateType, filename, className, term, sessionName, isProcessActive, pdfFileList, cb) => {
let path, document, options;
path = getPath(term, filename);
if(path != null) {
let options = {
path: path,
format: 'A4',
printBackground: true,
margin: {
left: '0px',
top: '0px',
right: '0px',
bottom: '0px'
}
}
let templateData = '';
switch (templateType) {
case '1':
templateData = ReadFile(htmlType1);
break;
case '2':
templateData = ReadFile(htmlType2);
break;
case '3':
templateData = ReadFile(htmlType3);
break;
default:
templateData = ReadFile(htmlType1);
break;
}
(async() => {
const html = HtmlGenerator(content, templateData);
if(html != undefined && html !== '' && html != null) {
// create PDF file
cb(filename, className, term, sessionName, isProcessActive, pdfFileList);
// get style from .css & replace
const css = ReadFile(pathCss);
await page.setContent(html, { waitUntil: 'networkidle0'});
await page.addStyleTag(css);
await page.pdf(options);
page.close();
}
})()
}
}
const pdfGenerator = (json, cb) => {
let data = {};
let pdfFileList = [];
if(typeof json == 'string') {
data = JSON.parse(json)
} else {
data = json;
}
try {
// declare defaults
let filename = 'Student' + '.pdf';
let termName = firstTermDir;
const templateType = data.keys.templateType;
const session = data.classInfo.Session;
const sessionName = session.replace('/', '-');
const students = data.students;
const className = data.classInfo.Class_Name;
const recordFirstTerm = data.firstTerm;
const recordSecondTerm = data.secondTerm;
const recordThirdTerm = data.thirdTerm;
let pdfCreatedList = [];
let isReset = false;
let totalResultsExpected = Object.keys(recordFirstTerm).length + Object.keys(recordSecondTerm).length + Object.keys(recordThirdTerm).length;
let totalResultsCount = 0;
let jsonForPdf = {};
let record = {};
let sRecord, path, id, fName, lName;
// get each student
let logEndOnce = true;
let logBeforeOnce = true;
logBeforeOnce && console.log('============== *** ================');
logBeforeOnce && console.log('======== PDF GENERATION BEGINS ================');
const computeResult = (page, setTerm, setRecord, setReset) => {
const termName = setTerm;
const record = setRecord;
let isReset = setReset;
logBeforeOnce && console.log(`====== ${termName} RESULTS BEGINS ======`);
for(let elem of students){
id = elem.id;
fName = elem.firstName;
lName = elem.lastName;
filename = `${lName} ${fName} _${termName} ${sessionName}.pdf`;
// sRecord = record.filter(function (entry) { return entry[id] !== undefined; });
sRecord = record[id];
path = getPath(termName, filename);
// create pdf
if(!FileExists(path) && !FileExists(pathJsonPdfList)){
// generate final JSON for the student
// isReset = (pdfCreatedList.includes(id))? false: true;
jsonForPdf = finalJson(elem, sRecord, data, termName);
(pdfFileList.length < 1) && WriteFile(pathJsonPdfContent, JSON.stringify(jsonForPdf));
pdfFileList.push({
'term': termName,
'file': filename
});
totalResultsCount = pdfFileList.length;
const pdfDate = new Date();
console.log(`${filename} (${totalResultsCount}/${totalResultsExpected}) at ${pdfDate.getHours()}hr${pdfDate.getHours()>1?'s':''} - ${pdfDate.getMinutes()}min${pdfDate.getMinutes()>1?'s':''} - ${pdfDate.getSeconds()}sec${pdfDate.getSeconds()>1?'s':''}`);
isActive = (totalResultsExpected === totalResultsCount)? true: false;
logEndOnce = false;
// cb(filename, className, termName, sessionName, isActive, pdfFileList);
// WriteFile(path, null);
isReset = true;
createPdf(page, jsonForPdf, templateType, filename, className, termName, sessionName, isActive, pdfFileList, cb);
}
}
logBeforeOnce && console.log(`====== ${termName} RESULTS ENDS ======`);
}
// get each student result for First Term
const computeFirstTerm = (p) => {
return new Promise((resolve) => {
if(data.keys.firstTerm === '1') {
termName = firstTermDir;
record = recordFirstTerm;
pdfCreatedList = [];
isReset = false;
computeResult(p, termName, record, isReset)
}
resolve()
})
}
// get each student result for Second Term
const computeSecondTerm = (p) => {
return new Promise((resolve) => {
if(data.keys.secondTerm === '1') {
termName = secondTermDir;
record = recordSecondTerm;
pdfCreatedList = [];
isReset = false;
computeResult(p, termName, record, isReset)
}
resolve()
})
}
// get each student result for Third Term
const computeThirdTerm = (p) => {
return new Promise((resolve) => {
if(data.keys.thirdTerm === '1') {
termName = thirdTermDir;
record = recordThirdTerm;
pdfCreatedList = [];
isReset = false;
computeResult(p, termName, record, isReset)
}
resolve()
})
}
(async () => {
browser = await Puppeteer.launch({
headless: true,
handleSIGINT: false,
args: args,
});
const page = await browser.newPage();
await page.setViewport({
width: resolution.x,
height: resolution.y,
})
await computeFirstTerm(page);
await computeSecondTerm(page);
await computeThirdTerm(page);
browser.close()
})()
if(totalResultsExpected === totalResultsCount && totalResultsCount !== 0 && !logEndOnce) {
logEndOnce = true;
logBeforeOnce = false;
console.log('======== PDF GENERATION ENDS ================');
}
} catch (error) {
console.log('==== ERROR IN PDF GENERATION: ', error)
}
}
module.exports = {
PdfGenerator: pdfGenerator
}
ERROR
info Visit https://yarnpkg.com/en/docs/cli/run for documentation about this command.
lerna ERR! yarn run start stderr:
<--- Last few GCs --->
[9884:000002D68A73C6B0] 1665171 ms: Scavenge 44.1 (45.8) -> 43.2 (45.8) MB, 223.9 / 0.0 ms (average mu = 0.956, current mu = 0.952) allocation failure
[9884:000002D68A73C6B0] 1684089 ms: Scavenge 44.1 (45.8) -> 43.3 (45.8) MB, 587.3 / 0.0 ms (average mu = 0.956, current mu = 0.952) allocation failure
[9884:000002D68A73C6B0] 1749901 ms: Scavenge 44.2 (45.8) -> 43.3 (45.8) MB, 5099.0 / 0.0 ms (average mu = 0.956, current mu = 0.952) allocation failure
<--- JS stacktrace --->
FATAL ERROR: Committing semi space failed. Allocation failed - JavaScript heap out of memory
1: 00007FF6ED61013F
2: 00007FF6ED59F396
3: 00007FF6ED5A024D
4: 00007FF6EDED19EE
5: 00007FF6EDEBBECD
6: 00007FF6EDD5F61C
7: 00007FF6EDD6933F
8: 00007FF6EDD5BF19
9: 00007FF6EDD5A0D0
10: 00007FF6EDD7EA06
11: 00007FF6EDAB1CD5
12: 00007FF6EDF5F3E1
13: 00007FF6EDF602E9
14: 000002D68C4EF69E
error Command failed with exit code 134.
Screenshot of Task Manager, Chromium running multiple instances of over 50.
I appreciate any help. I hope this can be resolved to give me a smooth PDF generation.
Thank you.
Example solution (limiting parallel browsers)
I created you a PdfPrinter class which you can integrate into your setup. It allows you to limit the amount of parallel pdf generation jobs and allows setting a limit and manages opening/closing the browser for you. The PdfPrinter class is also highly coupled and needed some modification for using it as a general queue. Logicwise this can be modified to be a general queue.
You can try to integrate that into your code. This is a fully working test example with simplified pdfs (without the part of getting the actual data from the excel..)
As far as I understood your code, you do not need to pass the page around all your functions. First create your html + css and then use the pdfPrinter and let it handle page creation + browser launching..
(I like to code stuff like this so I went straight ahead..)
var puppeteer = require('puppeteer')
const defaultPrinterOptions = {
format: 'A4',
printBackground: true,
margin: {
left: '0px',
top: '0px',
right: '0px',
bottom: '0px'
}
}
class PdfPrinter {
maxBrowsers = 2
enqueuedPrintJobs = []
failedJobs = []
browserInstances = 0
// max browser instances in parallel
constructor(maxBrowsers) {
this.maxBrowsers = maxBrowsers
}
/**
*
* #param {*} html the html content to print
* #param {*} css to apply to the page
* #param {*} printOptions options passed to puppeteer
*/
// enqueues a print but the exact end moment cannot be known..
enqueuePrint = (html, css, path, done) => {
// merge custom options with defaultOptions..
const printOptions = {
...defaultPrinterOptions,
// add the path to the options.
path: path
}
// create a function which can be stored in an array
// it will later be grabbed by startPrinter() OR at the time any
// brwoser freed up..
// the function needs to be passed the actual used browser instance!
this.enqueuedPrintJobs.push(async(browser) => {
// catch the error which may be produced when printing something..
try {
// print the document
await this.print(browser, html, css, printOptions)
} catch (err) {
console.error('error when printing document..CLosing browser and starting a new job!!', printOptions.path)
console.error(err)
// store someting so you now what failed and coudl be retried or something..
this.failedJobs.push({ html, css, path: printOptions.path })
// puppeteer can run into erros too!!
// so close the browser and launch a new one!
await this.closeBrowser(browser)
browser = await this.launchBrowser()
}
// after the print, call done() so the promise is resovled in the right moment when
// this particular print has ended.!
done()
// start the next job right now if there are any left.
const job = this.enqueuedPrintJobs.shift()
if (!job) {
console.log('No print jobs available anymore. CLosing this browser instance.. Remaining browsers now:', this.maxBrowsers - this.browserInstances + 1)
await this.closeBrowser(browser)
return
}
// job is actually this function itself! It will be executed
// and automatically grab a new job after completion :)
// we pass the same browser instance to the next job!.
await job(browser)
})
// whenever a print job added make sure to start the printer
// this starts new browser instances if the limit is not exceeded resp. if no browser is instantiated yet,
// and does nothing if maximum browser count is reached..
this.tryStartPrinter()
}
// same as enqueuePrint except it wraps it in a promise so we can now the
// exact end moment and await it..
enqueuePrintPromise(html, css, path) {
return new Promise((resolve, reject) => {
try {
this.enqueuePrint(html, css, path, resolve)
} catch (err) {
console.error('unexpected error when setting up print job..', err)
reject(err)
}
})
}
// If browser instance limit is not reached will isntantiate a new one and run a print job with it.
// a print job will automatically grab a next job with the created browser if there are any left.
tryStartPrinter = async() => {
// Max browser count in use OR no jobs left.
if (this.browserInstances >= this.maxBrowsers || this.enqueuedPrintJobs.length === 0) {
return
}
// browser instances available!
// create a new one
console.log('launching new browser. Available after launch:', this.maxBrowsers - this.browserInstances - 1)
const browser = await this.launchBrowser()
// run job
const job = this.enqueuedPrintJobs.shift()
await job(browser)
}
closeBrowser = async(browser) => {
// decrement browsers in use!
// important to call before closing browser!!
this.browserInstances--
await browser.close()
}
launchBrowser = async() => {
// increment browsers in use!
// important to increase before actualy launching (async stuff..)
this.browserInstances++
// this code you have to adjust according your enviromnemt..
const browser = await puppeteer.launch({ headless: true })
return browser
}
// The actual print function which creates a pdf.
print = async(browser, html, css, printOptions) => {
console.log('Converting page to pdf. path:', printOptions.path)
// Run pdf creation in seperate page.
const page = await browser.newPage()
await page.setContent(html, { waitUntil: 'networkidle0' });
await page.addStyleTag({ content: css });
await page.pdf(printOptions);
await page.close();
}
}
// testing the PDFPrinter with some jobs.
// make sure to run the printer in an `async` function so u can
// use await...
const testPrinterQueue = async() => {
// config
const maxOpenedBrowsers = 5 // amount of browser instances which are allowed to be opened in parallel
const testJobCount = 100 // amount of test pdf jobs to be created
const destDir = 'C:\\somepath' // the directory to store the pdfs in..
// create sample jobs for testing...
const jobs = []
for (let i = 0; i < testJobCount; i++) {
jobs.push({
html: `<h1>job number [${i}]</h1>`,
css: 'h1 { background-color: red; }',
path: require('path').join(destDir, `pdf_${i}.pdf`)
})
}
// track time
const label = 'printed a total of ' + testJobCount + ' pdfs!'
console.time(label)
// run the actual pdf generation..
const printer = new PdfPrinter(maxOpenedBrowsers)
const jobProms = []
for (let job of jobs) {
// run jobs in parallel. Each job wil be runned async and return a Promise therefor
jobProms.push(
printer.enqueuePrintPromise(job.html, job.css, job.path)
)
}
console.log('All jobs enqueued!! Wating for finish now.')
// helper function which awaits all the print jobs, resp. an array of promises.
await Promise.all(jobProms)
console.timeEnd(label)
// failed jobs::
console.log('jobs failed:', printer.failedJobs)
// as file:
await require('fs').promises.writeFile('failed-jobs.json', JSON.stringify(printer.failedJobs))
}
testPrinterQueue().then(() => {
console.log('done with everyting..')
}).catch(err => {
console.error('unexpected error occured while printing all pages...', err)
})
You only need to adjust the destDir / openedBrowsers and testJobCount vars in the beginning of testPrinterQueue() for getting this to work.
What caused the problem in your code
Let's have a look at this piece
(async () => {
browser = await Puppeteer.launch({
headless: true,
handleSIGINT: false,
args: args,
});
const page = await browser.newPage();
await page.setViewport({
width: resolution.x,
height: resolution.y,
})
await computeFirstTerm(page);
await computeSecondTerm(page);
await computeThirdTerm(page);
browser.close()
})()
You created an anonymous function which is executed immediatly. Within the function all the statements are correctly awaited using await. But if you run this whole piece within a synchronious part of your application, the whole function will start immediatly but NOT been awaited before running next code.
Checkout this example:
//utility
function wait(ms) {
return new Promise(resolve => {
setTimeout(resolve, ms)
})
}
const AsyncFunction = async() => {
console.log('Async named function started')
// simulate execution time of 2 seconds
await wait(2000)
console.log('Async named function ended')
};
function SyncFunction() {
console.log('sync function started')
// example of async function execution within a sync function..
AsyncFunction();
// what you have done in your code:
(async() => {
console.log('Async anonymus function started')
await wait(3000)
console.log('Async anonymus function ended')
})()
// what
console.log('sync function ended.')
}
SyncFunction()
console.log('done')
Note the output:
Async named function started
Async anonymus function started
sync function ended. // => sync function already ended
done // sync function ended and code continues execution.
Async named function ended
Async anonymus function ended
To correctly await your async stuff you need to put your whole application in async scope:
//utility
function wait(ms) {
return new Promise(resolve => {
setTimeout(resolve, ms)
})
}
const AsyncFunction = async() => {
console.log('Async named function started')
// simulate execution time of 2 seconds
await wait(2000)
console.log('Async named function ended')
};
// this is now async!!
async function SyncFunction() {
console.log('sync function started')
// example of async function execution within a sync function..
await AsyncFunction();
// what you have done in your code:
await (async() => {
console.log('Async anonymus function started')
await wait(3000)
console.log('Async anonymus function ended')
})()
// what
console.log('sync function ended.')
}
SyncFunction().then(() => {
console.log('done')
}).catch(err => {
console.error('unexpected error occured..')
})
This output is what we want
sync function started
Async named function started
Async named function ended
Async anonymus function started
Async anonymus function ended
sync function ended.
done
Hope this helps you understand.
Feel free to leave a comment.

ESOCKETTIMEDOUT Cloud Functions for Firebase

I am using Cloud Functions for Firebase together with my Firebase Realtime Database in order to do some data management for my app.
One of my functions though seems to get terminated since it takes about 100-150 seconds to complete. This happens with error : ESOCKETTIMEDOUT.
Is there a way to prevent this?
Here is my function:
function getTopCarsForUserWithPreferences(userId, genres) {
const pathToCars = admin.database().ref('cars');
pathTocars.orderByChild("IsTop").equalTo(true).once("value").then(function(snapshot) {
return writeSuggestedCars(userId, genres, snapshot);
}).catch(reason => {
console.log(reason)
})
}
function writeSuggestedCars(userId, genres, snapshot) {
const carsToWrite = {};
var snapCount = 0
snapshot.forEach(function(carSnapshot) {
snapCount += 1
const carDict = carSnapshot.val();
const carGenres = carDict.taCarGenre;
const genre_one = genres[0];
const genre_two = genres[1];
if (carGenres[genre_one] === true ||carGenres[genre_two] == true) {
carsToWrite[carSnapshot.key] = carDict
}
if (snapshot.numChildren() - 1 == snapCount) {
const pathToSuggest = admin.database().ref('carsSuggested').child(userId);
pathToSuggest.set(carsToWrite).then(snap => {
}).catch(reason => {
console.log(reason)
});
}
});
}
The getTopCarsForUserWithPreferences gets called when a user adds preferences. Also the cars table has about 50k entries.
Well you need to return everytime you use a async task.
Edit: you return 'writeSuggestedCars' but I think it never returns a value. I do not have a compiler, but I thought it was return Promise.resolved(). Can you insert it where I putted 'HERE'?
Maybe this will work:
function getTopCarsForUserWithPreferences(userId, genres) {
const pathToCars = admin.database().ref('cars');
return pathTocars.orderByChild("IsTop").equalTo(true).once("value").then(function(snapshot) {
return writeSuggestedCars(userId, genres, snapshot);
}).catch(reason => {
console.log(reason)
})
}
function writeSuggestedCars(userId, genres, snapshot) {
const carsToWrite = {};
var snapCount = 0
snapshot.forEach(function(carSnapshot) {
snapCount += 1
const carDict = carSnapshot.val();
const carGenres = carDict.taCarGenre;
const genre_one = genres[0];
const genre_two = genres[1];
if (carGenres[genre_one] === true ||carGenres[genre_two] == true) {
carsToWrite[carSnapshot.key] = carDict
}
if (snapshot.numChildren() - 1 == snapCount) {
const pathToSuggest = admin.database().ref('carsSuggested').child(userId);
return pathToSuggest.set(carsToWrite).then(snap => {
// 'HERE' I think return promise/Promise.resolve() will work
}).catch(reason => {
console.log(reason)
});
}
});
}

nodejs process.exit() prevents function from executing properly

I am writing my first NodeJs script. Below is some code I have set up to test a database connection.
When I include process.exit() at the very end of the script, nothing is logged to the console - however, when I simply remove that line of code, the function logs the query results appropriately (output included also).
I am wondering why the process.exit() at the very end of the code prevents the function from functioning and if I am using the exit method wrong.
Code:
/*
* Runs every minute on an uptime agent
* Checks list of sites to see if they're up
*/
// strict mode (see http://stackoverflow.com/questions/8651415/what-is-strict-mode-and-how-is-it-used for information)
'use strict';
// cuz im lazy
String.prototype.lc = function() { return this.toLowerCase(); }
String.prototype.uc = function() { return this.toUpperCase(); }
/** EXCEPTIONS **/
var DbConnectError = function(m) {
this.name = 'DbConnectError';
this.message = m;
}
var DbConfigError = function(m) {
this.name = 'DbConfigError';
this.message = m;
}
/** DATABSE **/
/*
* change log
* 08/07/2015 Tyler J Barnes
* -- init dev
*/
var db = function() {
// error messages
this._errors = [];
// connection state
this._isConnected = false;
// db configuration
this._config = {
host: '',
user: '',
password: '',
database: ''
};
// reference
this._db = null;
// connection obj ref
this._con = null;
// sql statement
this._sqlStmt = '';
// is prepared statement -- needs data binded
this._isPrepared = false;
// data to bind to prepared stmts
this._sqlData = null;
// query result set
this._result = null;
/*
* initialize
* #param (object) : cofig prop and values
* #return void
*/
this.ini = function(config) {
// make sure config was passed
if(!config || (typeof config).lc() != 'object') {
throw new DbConnectError('Invalid DB Configuration');
}
// check for appropriate properties
// if exist, store value
for(var p in this._config) {
if(!(p in config)) {
this._errors.push('Missing database config: '+p+'\n');
} else {
this._config[p] = config[p];
}
}
// throw any errors before continue
if(this._errors.length > 0) {
var tmp = '';
for(var i = 0; i < this._errors.length; i++) {
tmp+=this._errors[i];
}
throw new DbConfigError(tmp);
}
this._db = require('mysql');
};
// create connection -- returns thread id
this.con = function() {
this._con = this._db.createConnection(this._config);
this._con.connect();
this._isConnected = true;
};
// sets sql statement
this.setSqlStmt = function(str, prepared, bindData) {
this._sqlStmt = str;
if(prepared) {
this._isPrepared = true;
this._sqlData = bindData;
} else {
this._isPrepared = false;
this._sqlData = null;
}
};
// kills connection
this.die = function() {
if(this._isConnected) {
this._con.end();
}
};
}
var c = {
host: 'asdfasdf',
user: 'asdfasdf',
password: 'asdfasdf',
database: 'asdfasdf'
};
var d = new db();
d.ini(c);
d.con();
d._con.query('SELECT * FROM Agents', function(err,rows,fields) {
if(err) console.log(err);
console.log(rows);
});
d._con.end();
// this is what upsets me
process.exit();
Output when process.exit() is removed:
[{agentid:1, host: 'asdfasdfadf'....etc}]
The database query operation is asynchronous. This means that it will only be concluded after the program executes all of the main module, including the exit system call.
Instead, you should only terminate the process when the results are obtained from the database:
var d = new db();
d.ini(c);
d.con();
d._con.query('SELECT * FROM Agents', function(err,rows,fields) {
if(err) console.log(err);
console.log(rows);
process.exit();
});
d._con.end();
This is one of the typical misunderstandings of how JavaScript's asynchronous function calls work. I would advise you to read on the subject. These two questions may have some useful content:
How does Asynchronous Javascript Execution happen? and when not to use return statement?
How does JavaScript handle AJAX responses in the background?

Is it possible to build a dynamic task list in nodejs Async (waterfall, series, etc...)

I am pulling information from some collections in mongo that contain node and edge data. First i must get the node so i can grab its edges. Once i have a list of edges i then go back out and grab more nodes (etc.. based on a depth value). The following code is an loose example of how i am attempting to use async.waterfall and the task list.
Initially i have only a single task but once i make my first query i add to the task array. Unfortunately this does not seem to register with async and it does not continue to process the tasks i am adding.
Is there a better way to do this?
var async = require('async')
var mongoose = require('mongoose')
var _ = requrie('underscore')
var Client = this.Mongo.connect(/*path to mongo*/)
var Node = mongoose.Schema({
id : String,
graph_id : String
})
var Edge = mongoose.Schema({
id : String,
source_id : String,
destination_id : String
})
var Nodes = Client.model('myNode', Node)
var Edges = Client.model('myEdge', Edge)
var funcs = []
var round = 1
var depth = 2
var query = {
node : {
id : '12345'
},
edge : {
id : '12345'
}
}
var addTask = function(Nodes, Edges, query, round, depth) {
return function(callback) {
queryData(Nodes, Edges, query, function(err, node_list) {
if(depth > round) {
round++
function_array.push(addTask(Nodes, Edges, query, round, depth))
}
})
}
}
var queryData = function(Nodes, Edges, query, cb) {
async.waterfall([
function(callback) {
Nodes.find(query.node, function(err, nodes) {
var node_keys = _.map(nodes, function(node) {
return node.id
})
callback(null, nodes, node_keys)
})
},
function(nodes, node_keys, callback) {
query.edge.$or = [ {'source_id' : {$in:node_keys}}, {'destination_id' : {$in:node_keys}} ]
Edges.find(query.edge, function(err, edges) {
var edge_keys = _.map(edges, function(edge) {
if(edge['_doc']['source_id'] != query.node.id) {
return edge['_doc']['source_id']
} else {
return edge['_doc']['destination_id']
}
callback(null, nodes, edges, node_keys, edge_keys)
})
})
}
], function(err, nodes, edges, node_keys, edge_keys) {
// update the results object then...
cb(null, _.uniq(edge_keys)
})
}
var function_array = []
function_array.push(addTask(Nodes, Edges, query, round, depth))
async.waterfall(function_array, function(err) {
Client.disconnect()
//this should have run more than just the initial task but does not
})
--------------------- UPDATE ---------------------------
So after playing around with trying to get Async waterfall or series to do this by adding trailing functions I decided to switch to using async.whilst and am now happy with the solution.
function GraphObject() {
this.function_array = []
}
GraphObject.prototype.doStuff = function() {
this.function_array.push(this.buildFunction(100))
this.runTasks(function(err) {
console.log('done with all tasks')
}
}
GraphObject.prototype.buildFunction = function(times) {
return function(cb) {
if(times != 0) {
this.function_array.push(this.buildFunction(times - 1))
}
cb(null)
}
}
GraphObject.prototype.runTasks = function(cb) {
var tasks_run = 0
async.whilst(
function(){
return this.function_array.length > 0
}.bind(this),
function(callback) {
var func = this.function_array.shift()
func.call(this, function(err) {
tasks_run++
callback(err)
})
}.bind(this),
function(err) {
console.log('runTasks ran '+tasks_run+' tasks')
if(err) {
cb(500)
}
cb(null)
}.bind(this)
)
}
A task in your function_array can only add a new task to the array provided it is NOT the last task in the array.
In your case, your function_array contained only 1 task. That task itself cannot add additional tasks since it's the last task.
The solution is to have 2 tasks in the array. A startTask to bootstrap the process, and a finalTask that is more of a dummy task. In that case,
function_array = [startTask, finalTask];
Then startTask would add taskA, taskB will add task C and eventually
function_array = [startTask, taskA, taskB, taskC, finalTask];
The sample code below that illustrates the concepts.
var async = require('async');
var max = 6;
var nodeTask = function(taskId, value, callback){
var r = Math.floor(Math.random() * 20) + 1;
console.log("From Node Task %d: %d", taskId, r);
// add an edge task
if (taskId < max) {
function_array.splice(function_array.length-1, 0, edgeTask);
}
callback(null, taskId + 1, value + r);
};
var edgeTask = function(taskId, value, callback){
var r = Math.floor(Math.random() * 20) + 1;
console.log("From Edge Task %d: %d", taskId, r);
// add a node task
if (taskId < max) {
function_array.splice(function_array.length-1, 0, nodeTask);
}
callback(null, taskId + 1, value + r);
};
var startTask = function(callback) {
function_array.splice(function_array.length-1, 0, nodeTask);
callback(null, 1, 0);
};
var finalTask = function(taskId, value, callback) {
callback(null, value);
};
var function_array = [startTask, finalTask];
async.waterfall(function_array, function (err, result) {
console.log("Sum is ", result);
});

Resources