Get all documents from Firestore Collection using Firebase Functions - node.js

I am trying to get all of the documents in one collection named 'repeated_tasks' in my Firebase Function.
I tried using the following code:
Is it possible to get all documents in a Firestore Cloud Function?, but this does not seem to work for me. I am trying to get the information, so that I can update every document in the collection, to set one field, to false. I have the following code:
exports.finishedUpdate = functions.pubsub.schedule('0 3 * * *').timeZone('Europe/Amsterdam').onRun((context) => {
// This is part of the above mentioned stack question
var citiesRef = database.collection('repeated_tasks');
const snapshot = citiesRef.get();
snapshot.forEach(doc => {
console.log(doc.id, '=>', doc.data());
});
// A way to update all of the documents in the repeated_tasks collection has to be found
// This part works, for only the two given document ids
var list = ['qfrxHTZAJZTJDQpA83fjsM03159438695', 'qfrxHTZAJZQTpM3pA83fjsM0315217389'];
for (var i = 0; i < list.length; i++) {
database.doc('repeated_tasks/' + list[i]).update({'finished': false});
}
return console.log("Done");
})
Help is much appreciated, as I can't seem to find any related information anywhere, except for the stack overflow page, which didn't help. I am using Node JS (Javascript) to set the functions.

By using the syntax of getting the information from Firestore, I was able to also update it in Firebase Functions and could update all, using the following code:
const reference = database.collection('repeated_tasks/');
const snapshot = await reference.where('finished', '==', true).get();
if (snapshot.empty) {
console.log('no matching documents');
return;
}
snapshot.forEach(doc => {
database.doc('repeated_tasks/' + doc.id).update({'finished': false});
});

Related

Correct way to organise this process in Node

I need some advice on how to structure this function as at the moment it is not happening in the correct order due to node being asynchronous.
This is the flow I want to achieve; I don't need help with the code itself but with the order to achieve the end results and any suggestions on how to make it efficient
Node routes a GET request to my controller.
Controller reads a .csv file on local system and opens a read stream using fs module
Then use csv-parse module to convert that to an array line by line (many 100,000's of lines)
Start a try/catch block
With the current row from the csv, take a value and try to find it in a MongoDB
If found, take the ID and store the line from the CSV and this id as a foreign ID in a separate database
If not found, create an entry into the DB and take the new ID and then do 6.
Print out to terminal the row number being worked on (ideally at some point I would like to be able to send this value to the page and have it update like a progress bar as the rows are completed)
Here is a small part of the code structure that I am currently using;
const fs = require('fs');
const parse = require('csv-parse');
function addDataOne(req, id) {
const modelOneInstance = new InstanceOne({ ...code });
const resultOne = modelOneInstance.save();
return resultOne;
}
function addDataTwo(req, id) {
const modelTwoInstance = new InstanceTwo({ ...code });
const resultTwo = modelTwoInstance.save();
return resultTwo;
}
exports.add_data = (req, res) => {
const fileSys = 'public/data/';
const parsedData = [];
let i = 0;
fs.createReadStream(`${fileSys}${req.query.file}`)
.pipe(parse({}))
.on('data', (dataRow) => {
let RowObj = {
one: dataRow[0],
two: dataRow[1],
three: dataRow[2],
etc,
etc
};
try {
ModelOne.find(
{ propertyone: RowObj.one, propertytwo: RowObj.two },
'_id, foreign_id'
).exec((err, searchProp) => {
if (err) {
console.log(err);
} else {
if (searchProp.length > 1) {
console.log('too many returned from find function');
}
if (searchProp.length === 1) {
addDataOne(RowObj, searchProp[0]).then((result) => {
searchProp[0].foreign_id.push(result._id);
searchProp[0].save();
});
}
if (searchProp.length === 0) {
let resultAddProp = null;
addDataTwo(RowObj).then((result) => {
resultAddProp = result;
addDataOne(req, resultAddProp._id).then((result) => {
resultAddProp.foreign_id.push(result._id);
resultAddProp.save();
});
});
}
}
});
} catch (error) {
console.log(error);
}
i++;
let iString = i.toString();
process.stdout.clearLine();
process.stdout.cursorTo(0);
process.stdout.write(iString);
})
.on('end', () => {
res.send('added');
});
};
I have tried to make the functions use async/await but it seems to conflict with the fs.openReadStream or csv parse functionality, probably due to my inexperience and lack of correct use of code...
I appreciate that this is a long question about the fundamentals of the code but just some tips/advice/pointers on how to get this going would be appreciated. I had it working when the data was sent one at a time via a post request from postman but can't implement the next stage which is to read from the csv file which contains many records
First of all you can make the following checks into one query:
if (searchProp.length === 1) {
if (searchProp.length === 0) {
Use upsert option in mongodb findOneAndUpdate query to update or upsert.
Secondly don't do this in main thread. Use a queue mechanism it will be much more efficient.
Queue which I personally use is Bull Queue.
https://github.com/OptimalBits/bull#basic-usage
This also provides the functionality you need of showing progress.
Also regarding using Async Await with ReadStream, a lot of example can be found on net such as : https://humanwhocodes.com/snippets/2019/05/nodejs-read-stream-promise/

Using batch to recursively update documents only works on small collection

I have a collection of teams containing around 80 000 documents. Every Monday I would like to reset the scores of every team using firebase cloud functions. This is my function:
exports.resetOrgScore = functions.runWith(runtimeOpts).pubsub.schedule("every monday 00:00").timeZone("Europe/Oslo").onRun(async (context) => {
let batch = admin.firestore().batch();
let count = 0;
let overallCount = 0;
const orgDocs = await admin.firestore().collection("teams").get();
orgDocs.forEach(async(doc) => {
batch.update(doc.ref, {score:0.0});
if (++count >= 500 || ++overallCount >= orgDocs.docs.length) {
await batch.commit();
batch = admin.firestore().batch();
count = 0;
}
});
});
I tried running the function in a smaller collection of 10 documents and it's working fine, but when running the function in the "teams" collection it returns "Cannot modify a WriteBatch that has been committed". I tried returning the promise like this(code below) but that doesn't fix the problem. Thanks in advance :)
return await batch.commit().then(function () {
batch = admin.firestore().batch();
count = 0;
return null;
});
There are three problems in your code:
You use async/await with forEach() which is not recommended: The problem is that the callback passed to forEach() is not being awaited, see more explanations here or here.
As detailed in the error you "Cannot modify a WriteBatch that has been committed". With await batch.commit(); batch = admin.firestore().batch(); it's exactly what you are doing.
As important, you don't return the promise returned by the asynchronous methods. See here for more details.
You'll find in the doc (see Node.js tab) a code which allows to delete, by recursively using a batch, all the docs of a collection. It's easy to adapt it to update the docs, as follows. Note that we use a dateUpdated flag to select the docs for each new batch: with the original code, the docs were deleted so no need for a flag...
const runtimeOpts = {
timeoutSeconds: 540,
memory: '1GB',
};
exports.resetOrgScore = functions
.runWith(runtimeOpts)
.pubsub
.schedule("every monday 00:00")
.timeZone("Europe/Oslo")
.onRun((context) => {
return new Promise((resolve, reject) => {
deleteQueryBatch(resolve).catch(reject);
});
});
async function deleteQueryBatch(resolve) {
const db = admin.firestore();
const snapshot = await db
.collection('teams')
.where('dateUpdated', '==', "20210302")
.orderBy('__name__')
.limit(499)
.get();
const batchSize = snapshot.size;
if (batchSize === 0) {
// When there are no documents left, we are done
resolve();
return;
}
// Delete documents in a batch
const batch = db.batch();
snapshot.docs.forEach((doc) => {
batch.update(doc.ref, { score:0.0, dateUpdated: "20210303" });
});
await batch.commit();
// Recurse on the next process tick, to avoid
// exploding the stack.
process.nextTick(() => {
deleteQueryBatch(resolve);
});
}
Note that the above Cloud Function is configured with the maximum value for the time out, i.e. 9 minutes.
If it appears that all your docs cannot be updated within 9 minutes, you will need to find another approach, for example using the Admin SDK from one of your server, or cutting the work into pieces and run the CF several times.

Firebase Cloud Function Increment Counter

Per Firebase cloud functions docs, "Events are delivered at least once, but a single event may result in multiple function invocations. Avoid depending on exactly-once mechanics, and write idempotent functions."
When looking at a firestore cloud function doc example below of a restaurant rating, they are using an increment counter to calculate the total number of ratings. What are some of the best ways to maintain the accuracy of this counter by using an idempotent function?
Is it reasonable to store the context.eventId in a subcollection document field and only execute the function if the new context.eventId is different?
function addRating(restaurantRef, rating) {
// Create a reference for a new rating, for use inside the transaction
var ratingRef = restaurantRef.collection('ratings').doc();
// In a transaction, add the new rating and update the aggregate totals
return db.runTransaction((transaction) => {
return transaction.get(restaurantRef).then((res) => {
if (!res.exists) {
throw "Document does not exist!";
}
// Compute new number of ratings
var newNumRatings = res.data().numRatings + 1;
// Compute new average rating
var oldRatingTotal = res.data().avgRating * res.data().numRatings;
var newAvgRating = (oldRatingTotal + rating) / newNumRatings;
// Commit to Firestore
transaction.update(restaurantRef, {
numRatings: newNumRatings,
avgRating: newAvgRating
});
transaction.set(ratingRef, { rating: rating });
});
});
}
Is it reasonable to store the context.eventId in a subcollection
document field and only execute the function if the new
context.eventId is different?
Yes, for your use case, using the Cloud Function eventId is the best solution to make you Cloud Function idempotent. I guess you have already watched this Firebase video.
In the Firebase doc from which you have taken the code in your question, you will find at the bottom, the similar code for a Cloud Function. I've adapted this code as follows, in order to check if a doc with ID = eventId exists in a dedicated ratingUpdateIds subcollection:
exports.aggregateRatings = functions.firestore
.document('restaurants/{restId}/ratings/{ratingId}')
.onWrite(async (change, context) => {
try {
// Get value of the newly added rating
const ratingVal = change.after.data().rating;
const ratingUpdateId = context.eventId;
// Get a reference to the restaurant
const restRef = db.collection('restaurants').doc(context.params.restId);
// Get a reference to the ratingUpdateId doc
const ratingUpdateIdRef = restRef.collection("ratingUpdateIds").doc(ratingUpdateId);
// Update aggregations in a transaction
await db.runTransaction(async (transaction) => {
const ratingUpdateIdDoc = await transaction.get(ratingUpdateIdRef);
if (ratingUpdateIdDoc.exists) {
// The CF is retried
throw "The CF is being retried";
}
const restDoc = await transaction.get(restRef);
// Compute new number of ratings
const newNumRatings = restDoc.data().numRatings + 1;
// Compute new average rating
const oldRatingTotal = restDoc.data().avgRating * restDoc.data().numRatings;
const newAvgRating = (oldRatingTotal + ratingVal) / newNumRatings;
// Update restaurant info and set ratingUpdateIdDoc
transaction
.update(restRef, {
avgRating: newAvgRating,
numRatings: newNumRatings
})
.set(ratingUpdateIdRef, { ratingUpdateId })
});
return null;
} catch (error) {
console.log(error);
return null;
}
});
PS: I made the assumption that the Cloud Function eventId can be used as a Firestore document ID. I didn't find any doc or info stating the opposite. In case using the eventId as an ID would be a problem, since you execute the transaction in a Cloud Function (and therefore use the Admin SDK), you could query the document based on a field value (where you store the eventId) instead of getting it through a Reference based on its ID.

Copy single document in Firestore and edit fields in new copy

I need a utility function that can copy a single document in Firestore from one collection to another.
This excellent answer provides a way to copy a collection. But I can't work out how to modify it to copy a single document.
For instance I have this current structure:
collection1/abc123000000
The document abc123000000 has fields name and email with content Joe Bloggs and joe#bloggs.com respectively.
I wish to copy the xyz123000001 document from collection1 and all its fields and data to a new document in collection2:
collection2/xyz910110000
I would happily just run the command from the terminal to achieve this.
Ideally of course, it would be useful to have a function that looped through and copied all documents from one collection to the other dependent on the content of a field!
Many thanks in advance for any help.
[Original question title edited to assist in future searches as extra info added into the answer.]
Yo can do this by reading the collection, iterating on it and for each element of the collection 1 write it in collection 2.
Here is a quick example:
function copy(db){
db.collection('collection1').get()
.then((snapshot) => {
snapshot.forEach((doc) => {
// We are iterating on the documents of the collection
let data = doc.data();
console.log(doc.id, '=>', doc.data());
if(<PUT_CONDITIONS_HERE>){
//we have read the document till here
let setDoc = db.collection('collection2').doc(doc.id).set(data);
setDoc.then(res => {
console.log('Set: ', res);
});
}
});
})
.catch((err) => {
console.log('Error getting documents', err);
});
}
For more examples on how to read and write using the nodejs CLI you can go to the Github repository here
Also this can be done with a query from collection one to filter at that level, and iterate over less files. However this depends on the conditions you have to determine if it needs to be copied or not.
Many thanks to José Soní and to Lahiru Chandima on this post about copying collections for giving me the key bits of information which allowed me to solve this - outstandingly helpful!
I've found it really frustrating not having all the bits of the puzzle to solve this issue...so I am posting a heavily commented version below which I hope will be of use to anyone coming after. Apologies to anyone who already knows all this stuff...this answer is not for you ;-)
// Create a file with this code.
// In your Firestore DB, create the destination collection.
// const firebaseUrl refers to your databaseUrl which you can find in the project settings of your Firebase console.
// Save as filename.js within the directory where you have initialised Firebase.
// Ensure Node.js is installed and that node is available, try node --version
// Then run node filename.js from the terminal.
const firebaseAdmin = require('firebase-admin');
const serviceAccount = '../../firebase-service-account-key.json';
const firebaseUrl = 'https://my-app.firebaseio.com';
firebaseAdmin.initializeApp({
credential: firebaseAdmin.credential.cert(require(serviceAccount)),
databaseURL: firebaseUrl
});
const db = firebaseAdmin.firestore();
function copy(db){
db.collection('collectionName').get()
.then((snapshot) => {
snapshot.forEach((doc) => {
// We are iterating on the documents of the collection
let data = doc.data();
console.log(doc.id, '=>', doc.data());
if(doc.id == 'randomDocIdAssignedByFirestore'){
// We have read the document till here
//From here: we create a new document in the collection
// Change some of the data in the fields in the new document
let id = 'newMeaningfulDocId'; // Instead of allowing Firestore to create a random ID
data.title = 'Meaningful Title'; // Write new data into the field called title
data.description = 'Meaningful Description'; // Write new data into the field called description
/*
We are using a Firestore Reference type field in our DB to reference a parent doc.
If you just used data.parent = '/category/OS_EnDept'; you would end up writing a string field type.
However, you want to use a Reference Type AND you want the parent to be collectionName
do it like this:
data.parent = doc;
We, however, want to be able to write in different parent collection names - hence the next line.
*/
data.parent = db.collection('collectionName').doc('desiredParentDocId')
let setDoc = db.collection('collectionName').doc(id).set(data);
setDoc.then(res => {
console.log('Set: ', res);
});
}
});
})
.catch((err) => {
console.log('Error getting documents', err);
});
}
// Call the function when we run this file...
copy(db);

Index messed up if I upload more than one file at once

I've got the following firebase function to run once a file is uploaded to firebase storage.
It basically gets its URL and saves a reference to it in firestore. I need to save them in a way so that I can query them randomly from my client. Indexes seem to be to best fit this requirement.
for the firestore reference I need the following things:
doc ids must go from 0 to n (n beeing the index of the last
document)
have a --stats-- doc keeping track of n (gets
incremented every time a document is uploaded)
To achieve this I've written the following node.js script:
const incrementIndex = admin.firestore.FieldValue.increment(1);
export const image_from_storage_to_firestore = functions.storage
.object()
.onFinalize(async object => {
const bucket = gcs.bucket(object.bucket);
const filePath = object.name;
const splittedPath = filePath!.split("/");
// se siamo nelle immagini
// path = emotions/$emotion/photos/$photographer/file.jpeg
if (splittedPath[0] === "emotions" && splittedPath[2] === "photos") {
const emotion = splittedPath[1];
const photographer = splittedPath[3];
const file = bucket.file(filePath!);
const indexRef = admin.firestore().collection("images")
.doc("emotions").collection(emotion).doc("--stats--");
const index = await indexRef.get().then((doc) => {
if (!doc.exists) {
return 0;
} else {
return doc.data()!.index;
}
});
if (index === 0) {
await admin.firestore().collection("images")
.doc("emotions")
.collection(emotion)
.doc("--stats--")
.set({index: 0});
}
console.log("(GOT INDEX): " + index);
let imageURL;
await file
.getSignedUrl({
action: "read",
expires: "03-09-2491"
})
.then(signedUrls => {
imageURL = signedUrls[0];
});
console.log("(GOT URL): " + imageURL);
var docRef = admin.firestore()
.collection("images")
.doc("emotions")
.collection(emotion)
.doc(String(index));
console.log("uploading...");
await indexRef.update({index: incrementIndex});
await docRef.set({ imageURL: imageURL, photographer: photographer });
console.log("finished");
return true;
}
return false;
});
Getting to the problem:
It works perfectly if I upload the files one by one.
It messes up the index if I upload more than one file at once, because two concurrent uploads will read the same index value from --stats-- and one will overwrite the other.
How would you solve this problem? would you use another approach instead of the indexed one?
You should use a Transaction in which you:
read the value of the index (from "--stats--" document),
write the new index and
write the value of the imageURL in the "emotion" doc.
See also the reference docs about transactions.
This way, if the index value is changed in the "--stats--" document while the Transaction is being executed, the Cloud Function can catch the Transaction failure and generates an error which finishes it.
In parallel, you will need to enable retries for this background Cloud Function, in order it is retried if the Transaction failed in a previous run.
See this documentation item https://firebase.google.com/docs/functions/retries, including the video from Doug Stevenson which is embedded in the doc.

Resources