How to workaround firestore batch limit in cloud functions [duplicate] - node.js

This question already has answers here:
How can I update more than 500 docs in Firestore using Batch?
(8 answers)
Closed 3 years ago.
I am trying to delete entire user records from firestore using cloud functions but encounters the next error
INVALID_ARGUMENT: maximum 500 writes allowed per request
How to workaround this?
const functions = require('firebase-functions');
const admin = require('firebase-admin');
admin.initializeApp();
exports.deleteUserContacts = functions
.runWith({
timeoutSeconds: 540,
memory: '2GB'
})
.https.onCall((data,context) => {
// ...
return admin.firestore().collection('contacts').where('uid','==',context.auth.uid).get()
.then(snap => {
if (snap.size === 0) {
console.log(`User ${context.auth.uid} has no contacts to delete`);
return 'user has no contacts to delete';
}
let batch = admin.firestore().batch();
snap.forEach(doc => {
batch.delete(doc.ref)
});
return batch.commit(); //INVALID_ARGUMENT: maximum 500 writes allowed per request
})
.then(() => {
console.log(`Transaction success on user ${context.auth.uid}`);
return 'Transaction success';
})
.catch(error => {
console.log(`Transaction failure on user ${context.auth.uid}`,error);
throw new functions.https.HttpsError(
'unknown',
'Transaction failure'
);
});
});

To summarize final solution according to Stefan as worked for me.
If any error, please comment
const functions = require('firebase-functions');
const admin = require('firebase-admin');
admin.initializeApp();
exports.deleteUserContacts = functions
.runWith({
timeoutSeconds: 540,
memory: '2GB'
})
.https.onCall((data,context) => {
//...
return admin.firestore().collection('contacts').where('uid','==',context.auth.uid).get()
.then(snap => {
if (snap.size === 0) {
console.log(`User ${context.auth.uid} has no contacts to delete`);
return 'user has no contacts to delete';
}
const batchArray = [admin.firestore().batch()];
let operationCounter = 0;
let batchIndex = 0;
snap.forEach(doc => {
batchArray[batchIndex].delete(doc.ref);
operationCounter++;
if (operationCounter === 499) {
batchArray.push(admin.firestore().batch());
batchIndex++;
operationCounter = 0;
}
});
batchArray.forEach(
async batch => await batch.commit()
);
return 'function ended';
})
.then(() => {
console.log(`Transaction success on user ${context.auth.uid}`);
return 'Transaction success';
})
.catch(error => {
console.log(`Transaction failure on user ${context.auth.uid}`,error);
throw new functions.https.HttpsError(
'unknown',
'Transaction failure'
);
});
});

As you might know this is due to the limit to transactions and batched writes, in order to circumvent that limitation I found this SO post of another person that had a similar issue to you.
Most of the recommendations are batching it under 500 and then commiting.
Here, have a look at the solution here.
Hope this helps.

Related

Update large number of documents

I am trying to update the content of large number of documents in firebase, I have tried the following:
First, reading all documents on client side and looping over the documents and updating them by refence.
the problem here is that I am doing intensive operation on the client side and that would be unpredictable, therefore I switched to Firebase's Functions.
Second, reading all documents in firebase functions and then updating them using bulkwriter
here's the code:
exports.testingFunction1 = functions.runWith({
timeoutSeconds: 540,
memory: "8GB",
}).https.onCall(async (data, context) => {
const storeId = data.text;
if (!(typeof storeId === 'string') || storeId.length === 0) {
throw new functions.https.HttpsError('invalid-argument', 'The function must be called with ' +
'one arguments containing the storeId.');
}
if (!context.auth) {
throw new functions.https.HttpsError('failed-precondition', 'The function must be called ' +
'while authenticated.');
}
const uid = context.auth.uid;
const name = context.auth.token.name || null;
const picture = context.auth.token.picture || null;
const email = context.auth.token.email || null;
let bulk1 = new admin.firestore().bulkWriter();
let products = await admin.firestore().collection("Products").get(); //here's the problems's source
products.forEach((document) => {
bulk1.update(document.ref, { "IsStorePublished": true });
});
await bulk1.flush().then(() => {
return { "result": "Success!" };
})
.catch((error) => {
throw new functions.https.HttpsError('unknown', error.message, error);
});
return { "Result": "Success" }
});
the problem here appears when I try to read more that about 8000 documents at a single time, I get the following error although I have changed the memory limitations for the function to the max possible:
FATAL ERROR: Reached heap limit Allocation failed - JavaScript heap
out of memory
Is there a good way to achieve this task ?
For anyone interested, I have solved the issue as following:
the problem with reading a large amount of data to update it in firebase functions is that the memory is filling up, so I have made a recursive function enables reading 500 items at a time and apply the "BulkWrite" to only 500 documents at the time, and to achieve this I used the "startAfter" reading method.
This is the main firebase v1 function:
where it reads the first 500 items and apply the function "operationToDo" to them, and then calls the recursive function to continue the process.
exports.testingFunction1 = functions.runWith({
timeoutSeconds: 540,
memory: "8GB",
}).https.onCall(async (data, context) => {
const storeId = data.text;
if (!(typeof storeId === 'string') || storeId.length === 0) {
throw new functions.https.HttpsError('invalid-argument', 'The function must be called with ' +
'one arguments "storeId" containing the storeId.');
}
if (!context.auth) {
throw new functions.https.HttpsError('failed-precondition', 'The function must be called ' +
'while authenticated.');
}
const uid = context.auth.uid;
const name = context.auth.token.name || null;
const picture = context.auth.token.picture || null;
const email = context.auth.token.email || null;
let first = admin.firestore()
.collection("Products")
.orderBy("Name").where("Store", '==', storeId)
.limit(500);
await first.get().then(
async (documentSnapshots) => {
if (documentSnapshots.docs.length == 0) {
return;
} else {
await operationToDo(documentSnapshots, "update", { "IsStorePublished": false })
}
let lastVisible =
documentSnapshots.docs[documentSnapshots.size - 1];
await recursivePublishingTheStore(lastVisible, storeId);
},
);
return { "Result": "Success" }
});
The recursive function:
async function recursivePublishingTheStore(lastVisible, storeId) {
let next = admin.firestore()
.collection("Products")
.orderBy("Name").where("Store", '==', storeId)
.startAfter(lastVisible)
.limit(500);
await next.get().then(
async (documentSnapshots) => {
if (documentSnapshots.docs.length == 0) {
return;
} else {
await operationToDo(documentSnapshots, "update", { "IsStorePublished": false })
let lastVisible =
documentSnapshots.docs[documentSnapshots.size - 1];
await recursivePublishingTheStore(lastVisible, storeId);
}
}
);
}
The operation can be anything but in my case it would be "update":
async function operationToDo(documents, operation, value) {
let bulk1 = new admin.firestore().bulkWriter();
documents.forEach((document) => {
if (operation == 'update')
bulk1.update(document.ref, value);
});
await bulk1.flush().then(() => {
})
.catch((error) => {
throw new functions.https.HttpsError('unknown', error.message, error);
});
}
The performance of the code above is pretty good, for updating about 15k documents it would take about 2 minutes.
Note: I have chosen the number 500 randomly, different number might work and might perform better, and I will be experimenting with it this week.

Batch commit been called before batch finishes

I'm getting "Cannot modify a WriteBatch that has been committed." in this snippet of code. Although, I'm sure why batch.commit() is not waiting for the forEach finishes.
const db = admin.firestore();
const batch = db.batch();
const channelIds = [];
const messages = data
.map((item) => {
if (!item || !item.phone_number)
return null;
const msg = pupa(message, item);
if (!channelIds.includes(item.channel.id))
channelIds.push(item.channel.id);
return {
...item,
message: msg
};
})
.filter((msg) => msg);
logger.info(`Creating messages/${messageId}/sms entries. [Count = ${messages.length}]`);
// From all channels included in the messages array, it fetchs its remaining sms credits.
channelIds.forEach(async (channelId) => {
const subscriptionDetails = (await admin.firestore()
.collection('channels')
.doc(channelId)
.collection('subscription')
.doc('details')
.get()).data();
const creditsRemaining = subscriptionDetails.limits.snapshot.sms_notifications - subscriptionDetails.limits.used.sms_notifications;
// Sends messages according to its respective channel ID and channel remaining credits.
messages
.filter((item) => item.channel.id === channelId)
.slice(0, creditsRemaining)
.forEach((msg) => {
batch.set(db.collection('messages')
.doc(messageId)
.collection('sms')
.doc(), {
phone_number: msg.phone_number,
message: msg.message
});
});
});
await batch.commit();
EDIT: I fixed this issue by wrapping the forEach in a Promise. Thanks!
It seems you're missing an await before batch.set(db.collection('messages')..., which means that your await batch.commit() gets run before all batch.set() calls have completed.

Firebase Stripe (Error) Promises must be handled appropriately

I am trying to process my payment with firebase and stripe and have come across a problem when trying to deploy my function to the cloud saying 'Promises must be handled appropriately. I know this is a tlint compilation error but can't figure out why the error is being triggered.
Here is my code
import * as functions from 'firebase-functions';
import * as admin from 'firebase-admin';
admin.initializeApp(functions.config().firebase);
const stripe = require('stripe')(functions.config().stripe.testkey);
exports.stripeCharge = functions.firestore
.document('/payments/{userId}/mypayments/{paymentId}')
.onCreate((snap,event) => {
const payment = snap.data()
const userId = event.params.userId
const paymentId = event.params.paymentId
// checks if payment exists or if it has already been charged
if (!payment || payment.charge) return null;
return admin.firestore()
.doc(`/users/${userId}`)
.get()
.then(snapshot => {
return snapshot
})
.then(customer => {
const amount = payment.price * 100 // amount must be in cents
const idempotency_key = paymentId // prevent duplicate charges
const source = payment.token.id
const currency = 'usd'
const charge = {amount, currency, source}
return stripe.charges.create(charge, { idempotency_key })
})
.then((charge) => {
admin.firestore()//The error keeps referring me to this line
.collection('/payments').doc(userId).collection('mypayments').doc(paymentId)
.set({
charge: charge
}, { merge: true })
})
})
The line generating the error is stated above
Actually, with the latest version(s) of Cloud Functions you are not obliged to include a catch() in your Promises chaining. The platform where the Cloud Function runs will handle the error itself.
Based on this post What could this be about? [TsLint Error: "Promises must be handled appropriately"] it is apparently an error generated by TsLint (EsLint?).
However, independently of this "error" detected by TsLint, I think you may encounter problems with your Cloud Function because you don't return the last promise of your chain:
return admin.firestore() //HERE YOU RETURN CORRECTLY
.doc(`/users/${userId}`)
.get()
.then(snapshot => {
return snapshot //HERE YOU RETURN CORRECTLY
})
.then(customer => {
const amount = payment.price * 100 // amount must be in cents
const idempotency_key = paymentId // prevent duplicate charges
const source = payment.token.id
const currency = 'usd'
const charge = {amount, currency, source}
return stripe.charges.create(charge, { idempotency_key }) //HERE YOU RETURN CORRECTLY
})
.then((charge) => {
return admin.firestore() //HERE, IN YOUR CODE, YOU DON'T RETURN
.collection('/payments').doc(userId).collection('mypayments').doc(paymentId)
.set({
charge: charge
}, { merge: true })
})
})
finally figure it out
Whenever you make a promise function, it has to end with an error handler so i fixed this by using a simple catch
.then((charge) => {
admin.firestore()
.collection('/payments').doc(userId).collection('mypayments').doc(paymentId)
.set({
charge: charge
}, { merge: true })
.catch(er=>{
console.log(er);
return er
}
)
})

Is this the proper way to write a multi-statement transaction with Neo4j?

I am having a hard time interpretting the documentation from Neo4j about transactions. Their documentation seems to indicate preference to doing it this way rather than explicitly declaring tx.commit() and tx.rollback().
Does this look best practice with respect to multi-statement transactions and neo4j-driver?
const register = async (container, user) => {
const session = driver.session()
const timestamp = Date.now()
const saltRounds = 10
const pwd = await utils.bcrypt.hash(user.password, saltRounds)
try {
//Start registration transaction
const registerUser = session.writeTransaction(async (transaction) => {
const initialCommit = await transaction
.run(`
CREATE (p:Person {
email: '${user.email}',
tel: '${user.tel}',
pwd: '${pwd}',
created: '${timestamp}'
})
RETURN p AS Person
`)
const initialResult = initialCommit.records
.map((x) => {
return {
id: x.get('Person').identity.low,
created: x.get('Person').properties.created
}
})
.shift()
//Generate serial
const data = `${initialResult.id}${initialResult.created}`
const serial = crypto.sha256(data)
const finalCommit = await transaction
.run(`
MATCH (p:Person)
WHERE p.email = '${user.email}'
SET p.serialNumber = '${serial}'
RETURN p AS Person
`)
const finalResult = finalCommit.records
.map((x) => {
return {
serialNumber: x.get('Person').properties.serialNumber,
email: x.get('Person').properties.email,
tel: x.get('Person').properties.tel
}
})
.shift()
//Merge both results for complete person data
return Object.assign({}, initialResult, finalResult)
})
//Commit or rollback transaction
return registerUser
.then((commit) => {
session.close()
return commit
})
.catch((rollback) => {
console.log(`Transaction problem: ${JSON.stringify(rollback, null, 2)}`)
throw [`reg1`]
})
} catch (error) {
session.close()
throw error
}
}
Here is the reduced version of the logic:
const register = (user) => {
const session = driver.session()
const performTransaction = session.writeTransaction(async (tx) => {
const statementOne = await tx.run(queryOne)
const resultOne = statementOne.records.map((x) => x.get('node')).slice()
// Do some work that uses data from statementOne
const statementTwo = await tx.run(queryTwo)
const resultTwo = statementTwo.records.map((x) => x.get('node')).slice()
// Do final processing
return finalResult
})
return performTransaction.then((commit) => {
session.close()
return commit
}).catch((rollback) => {
throw rollback
})
}
Neo4j experts, is the above code the correct use of neo4j-driver ?
I would rather do this because its more linear and synchronous:
const register = (user) => {
const session = driver.session()
const tx = session.beginTransaction()
const statementOne = await tx.run(queryOne)
const resultOne = statementOne.records.map((x) => x.get('node')).slice()
// Do some work that uses data from statementOne
const statementTwo = await tx.run(queryTwo)
const resultTwo = statementTwo.records.map((x) => x.get('node')).slice()
// Do final processing
const finalResult = { obj1, ...obj2 }
let success = true
if (success) {
tx.commit()
session.close()
return finalResult
} else {
tx.rollback()
session.close()
return false
}
}
I'm sorry for the long post, but I cannot find any references anywhere, so the community needs this data.
After much more work, this is the syntax we have settled on for multi-statement transactions:
Start session
Start transaction
Use try/catch block after (to enable proper scope in catch block)
Perform queries in the try block
Rollback in the catch block
.
const someQuery = async () => {
const session = Neo4J.session()
const tx = session.beginTransaction()
try {
const props = {
one: 'Bob',
two: 'Alice'
}
const tx1 = await tx
.run(`
MATCH (n:Node)-[r:REL]-(o:Other)
WHERE n.one = $props.one
AND n.two = $props.two
RETURN n AS One, o AS Two
`, { props })
.then((result) => {
return {
data: '...'
}
})
.catch((err) => {
throw 'Problem in first query. ' + e
})
// Do some work using tx1
const updatedProps = {
_id: 3,
four: 'excellent'
}
const tx2 = await tx
.run(`
MATCH (n:Node)
WHERE id(n) = toInteger($updatedProps._id)
SET n.four = $updatedProps.four
RETURN n AS One, o AS Two
`, { updatedProps })
.then((result) => {
return {
data: '...'
}
})
.catch((err) => {
throw 'Problem in second query. ' + e
})
// Do some work using tx2
if (problem) throw 'Rollback ASAP.'
await tx.commit
session.close()
return Object.assign({}, tx1, { tx2 })
} catch (e) {
tx.rollback()
session.close()
throw 'someQuery# ' + e
}
}
I will just note that if you are passing numbers into Neo4j, you should wrap them inside the Cypher Query with toInteger() so that they are parsed correctly.
I included examples of query parameters also and how to use them. I found it cleans up the code a little.
Besides that, you basically can chain as many queries inside the transaction as you want, but keep in mind 2 things:
Neo4j write-locks all involved nodes during a transaction, so if you have several processes all performing operations on the same node, you will see that only one process can complete a transaction at a time. We made our own business logic to handle write issues and opted to not even use transactions. It is working very well so far, writing 100,000 nodes and creating 100,000 relationships in about 30 seconds spread over 10 processes. It took 10 times longer to do in a transaction. We experience no deadlocking or race conditions using UNWIND.
You have to await the tx.commit() or it won't commit before it nukes the session.
My opinion is that this type of transaction works great if you are using Polyglot (multiple databases) and need to create a node, and then write a document to MongoDB and then set the Mongo ID on the node.
It's very easy to reason about, and extend as needed.

Why is Cloud Functions for Firebase taking 25 seconds?

For clarity I have other cloud functions that all run intermittently (i.e from 'cold' in around 2-6 seconds, and all use the same boilerplate set up of importing an admin instance and exporting the function as a module)
I've seen other similar posts but this is really bugging me. I have a cloud function like so:
const admin = require('../AdminConfig');
const { reportError } = require('../ReportError');
module.exports = (event) => {
const uid = event.params.uid;
const snapshot = event.data;
if (snapshot._newData === null ) {
return null;
}
console.log('Create org begin running: ', Date.now());
const organisation = event.data.val();
const rootRef = admin.database().ref();
const ref = rootRef.child('/organisations').push();
const oid = ref.key;
const userData = {
level: 'owner',
name: organisation.name,
};
const orgShiftInfo = {
name: organisation.name,
startDay: organisation.startDay || 'Monday',
};
const updatedData = {};
updatedData[`/users/${uid}/currentOrg`] = oid;
updatedData[`/users/${uid}/organisations/${oid}`] = userData;
updatedData[`/organisations/${oid}`] = organisation;
updatedData[`/org_shift_info/${oid}`] = orgShiftInfo;
rootRef.update(updatedData, (err) => {
if (err) {
return rootRef.child(`/users/${uid}/addOrgStatus`).set({ error: true })
.then(() => {
console.log(`error adding organisation for ${uid}: `, err);
return reportError(err, { uid });
});
}
console.log('Create org wrote succesfully: ', Date.now());
return rootRef.child(`/users/${uid}/addOrgStatus`).set({ success: true });
});
}
I understand the 'cold start' thing but I think something is seriously wrong that it's taking 25 seconds. The logs don't return any error and are as so:
Is there some deeper way I can debug this to try and figure out why it's taking so long? It's unusable at the moment. Thanks a lot.
Solved:
Sorry,
I misunderstood the API a bit. I should have watched the promise video first!
I needed to put
return rootRef.update...
instead of
rootRef.update...

Resources