Modifying current reference causes maximum stack size exceeded crash - node.js

In node js, using version 4.1.0 of the 'firebase-admin' SDK, I have a listener which listens to a message queue reference in my database, processes messages, and thereafter tries to remove it from the queue reference.
When I have greater than a certain number of records (1354 on my machine) in the queue prior to starting the script, the script crashes with a maximum call stack exceeded error.
The strange thing is that this only occurs when I have 1354+ values in the queue prior to script start. Any lower than this and the problem vanishes.
I don't know why this is happening, but I know that it only occurs when I try to modify/remove the object at the snapshot reference.
Here is a self-contained mcve with the problem area marked in the comments:
var admin = require("firebase-admin");
var serviceAccount = require("<ADMIN JSON FILE PATH GOES HERE>");
admin.initializeApp({
credential: admin.credential.cert(serviceAccount),
databaseURL: "<FIREBASE URL GOES HERE>"
});
var ref = admin.database().ref();
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// the number of messages to generate for the queue. when this is >= 1354 (on my machine) the program crashes, if it's less than that,
// it works perfectly fine; your tipping point may vary
var amount = 1354;
// message payload to deliver to the queue <amount> times
var payload = {};
// message generation loop
for (i = 0; i < amount; i++) {
var message = {msg: "hello"};
payload['message-queue/' + ref.push().key] = message;
}
// add the generated messages simultaneously to message-queue
ref.update(payload).then(function () {
// 'on child added' listener that causes the crash of the program when there are 1354+ pre-existing messages in the queue prior to application start
ref.child('message-queue').on('child_added', function(snapshot) {
var msgKey = snapshot.key;
var msgContents = snapshot.val().msg
// do something with msgContents (e.g. sanitize message and deliver to some user's message-received node in the firebase)
// ***THIS*** is what causes the crash. if you remove this line of code, the program does not crash. it seems that any
// modification/removal to/of the current <msgKey> node does the same
ref.child('message-queue').child(msgKey).remove();
});
});
And here is the stack trace of the crash:
FIREBASE WARNING: Exception was thrown by user callback. RangeError: Maximum call stack size exceeded
at RegExp.exec (native)
at RegExp.test (native)
at tc (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:63:86)
at ub (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:60:136)
at vb (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:43:1228)
at Xb.h.remove (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:52:44)
at Xb.h.remove (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:52:136)
at Xb.h.remove (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:52:136)
at Xb.h.remove (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:52:136)
at Xb.h.remove (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:52:136)
<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:63
(d="0"+d),c+=d;return c.toLowerCase()}var zc=/^-?\d{1,10}$/;function tc(a){retur
n zc.test(a)&&(a=Number(a),-2147483648<=a&&2147483647>=a)?a:null}function Ac(a){
try{a()}catch(b){setTimeout(function(){N("Exception was thrown by user callback.
",b.stack||"");throw b;},Math.floor(0))}}function Bc(a,b,c){Object.definePropert
y(a,b,{get:c})}function Cc(a,b){var c=setTimeout(a,b);"object"===typeof c&&c.unr
ef&&c.unref();return c};function Dc(a){var b={},c={},d={},e="";try{var f=a.split
("."),b=bb(hc(f[0])||""),c=bb(hc(f[1])||""),e=f[2],d=c.d||{};delete c.d}catch(g)
{}return{wg:b,Ge:c,data:d,mg:e}}function Ec(a){a=Dc(a);var b=a.Ge;return!!a.mg&&
!!b&&"object"===typeof b&&b.hasOwnProperty("iat")}function Fc(a){a=Dc(a).Ge;retu
rn"object"===typeof a&&!0===y(a,"admin")};function Gc(a,b,c){this.type=Hc;this.s
ource=a;this.path=b;this.children=c}Gc.prototype.Jc=function(a){if(this.path.e()
)return a=this.children.sub
RangeError: Maximum call stack size exceeded
at RegExp.exec (native)
at RegExp.test (native)
at tc (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:63:86)
at ub (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:60:136)
at vb (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:43:1228)
at Xb.h.remove (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:52:44)
at Xb.h.remove (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:52:136)
at Xb.h.remove (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:52:136)
at Xb.h.remove (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:52:136)
at Xb.h.remove (<MY_PROJECT_PATH>\node_modules\firebase-admin\lib\database\database.js:52:136)
<MY_PROJECT_PATH>>
<MY_PROJECT_PATH>>
<MY_PROJECT_PATH>>

Even though you aren't processing it, the call to remove() is still async/promise-based and generates a context to run in. Promise contexts are fairly big and it's not a surprise you're running out of stack here. If you really needed a pattern like this to work properly, you could batch the updates - have child_added insert the values into a "to be deleted" array, then process that array a batch of entries at a time as a separate task until it was empty. There are plenty of helper methods for working with arrays and Promises in the BlueBird (http://bluebirdjs.com/) library that could help with this (e.g. map/mapSeries).
This isn't really a Firebase problem - every other VM (PHP, Java, etc.) has stack size limits to deal with as well. Like most others, V8's is tunable, and if you need to, you can query (and adjust) it using a command like:
node --v8-options | grep -B0 -A1 stack_size
But I believe your best approach is to structure your program to minimize your stack usage for this deletion pattern. Increasing stack size is always going to leave you open to the "is it big enough now?" question.

Minmize the memory allocated to stack.for example static array inside function instead of that use dynamic array.

Related

Optimizing file parse and SNS publish of large record set

I have an 85mb data file with 110k text records in it. I need to parse each of these records, and publish an SNS message to a topic for each record. I am doing this successfully, but the Lambda function requires a lot of time to run, as well as a large amount of memory. Consider the following:
const parse = async (key) => {
//get the 85mb file from S3. this takes 3 seconds
//I could probably do this via a stream to cut down on memory...
let file = await getFile( key );
//parse the data by new line
const rows = file.split("\n");
//free some memory now
//this free'd up ~300mb of memory in my tests
file = null;
//
for( let i = 0; i < rows.length; i++ ) {
//... parse the row and build a small JS object from it
//publish to SNS. assume publishMsg returns a promise after a successful SNS push
requests.push( publishMsg(data) );
}
//wait for all to finish
await Promise.all(requests);
return 1;
};
The Lambda function will timeout with this code at 90 seconds (the current limit I have set). I could raise this limit, as well as the memory (currently at 1024mb) and likely solve my issue. But, none of the SNS publish calls take place when the function hits the timeout. Why?
Lets say 10k rows process before the function hits the timeout. Since I am submitting the publish async, shouldn't several of these complete regardless of the timeout? It seems they only run if the entire function completes.
I have run a test where I cut the data down to 15k rows, and it runs without any issue, in roughly 15 seconds.
So the question, why are the async calls not firing prior to the function timeout, and any input on how I can optimize this without moving away from Lambda?
Lambda Config: nodeJS 10.x, 1024 mb, 90 second timeout

Querying DB2 every 15 seconds causing memory leak in NodeJS

I have an application which checks for new entries in DB2 every 15 seconds on the iSeries using IBM's idb-connector. I have async functions which return the result of the query to socket.io which emits an event with the data included to the front end. I've narrowed down the memory leak to the async functions. I've read multiple articles on common memory leak causes and how to diagnose them.
MDN: memory management
Rising Stack: garbage collection explained
Marmelab: Finding And Fixing Node.js Memory Leaks: A Practical Guide
But I'm still not seeing where the problem is. Also, I'm unable to get permission to install node-gyp on the system which means most memory management tools are off limits as memwatch, heapdump and the like need node-gyp to install. Here's an example of what the functions basic structure is.
const { dbconn, dbstmt } = require('idb-connector');// require idb-connector
async function queryDB() {
const sSql = `SELECT * FROM LIBNAME.TABLE LIMIT 500`;
// create new promise
let promise = new Promise ( function(resolve, reject) {
// create new connection
const connection = new dbconn();
connection.conn("*LOCAL");
const statement = new dbstmt(connection);
statement.exec(sSql, (rows, err) => {
if (err) {
throw err;
}
let ticks = rows;
statement.close();
connection.disconn();
connection.close();
resolve(ticks.length);// resolve promise with varying data
})
});
let result = await promise;// await promise
return result;
};
async function getNewData() {
const data = await queryDB();// get new data
io.emit('newData', data)// push to front end
setTimeout(getNewData, 2000);// check again in 2 seconds
};
Any ideas on where the leak is? Am i using async/await incorrectly? Or else am i creating/destroying DB connections improperly? Any help on figuring out why this code is leaky would be much appreciated!!
Edit: Forgot to mention that i have limited control on the backend processes as they are handled by another team. I'm only retrieving the data they populate the DB with and adding it to a web page.
Edit 2: I think I've narrowed it down to the DB connections not being cleaned up properly. But, as far as i can tell I've followed the instructions suggested on their github repo.
I don't know the answer to your specific question, but instead of issuing a query every 15 seconds, I might go about this in a different way. Reason being that I don't generally like fishing expeditions when the environment can tell me an event occurred.
So in that vein, you might want to try a database trigger that loads the key to the row into a data queue on add, or even change or delete if necessary. Then you can just put in an async call to wait for a record on the data queue. This is more real time, and the event handler is only called when a record shows up. The handler can get the specific record from the database since you know it's key. Data queues are much faster than database IO, and place little overhead on the trigger.
I see a couple of potential advantages with this method:
You aren't issuing dozens of queries that may or may not return data.
The event would fire the instant a record is added to the table, rather than 15 seconds later.
You don't have to code for the possibility of one or more new records, it will always be 1, the one mentioned in the data queue.
yes you have to close connection.
Don't make const data. you don't need promise by default statement.exec is async and handles it via return result;
keep setTimeout(getNewData, 2000);// check again in 2 seconds
line outside getNewData otherwise it becomes recursive infinite loop.
Sample code
const {dbconn, dbstmt} = require('idb-connector');
const sql = 'SELECT * FROM QIWS.QCUSTCDT';
const connection = new dbconn(); // Create a connection object.
connection.conn('*LOCAL'); // Connect to a database.
const statement = new dbstmt(dbconn); // Create a statement object of the connection.
statement.exec(sql, (result, error) => {
if (error) {
throw error;
}
console.log(`Result Set: ${JSON.stringify(result)}`);
statement.close(); // Clean up the statement object.
connection.disconn(); // Disconnect from the database.
connection.close(); // Clean up the connection object.
return result;
});
*async function getNewData() {
const data = await queryDB();// get new data
io.emit('newData', data)// push to front end
setTimeout(getNewData, 2000);// check again in 2 seconds
};*
change to
**async function getNewData() {
const data = await queryDB();// get new data
io.emit('newData', data)// push to front end
};
setTimeout(getNewData, 2000);// check again in 2 seconds**
First thing to notice is possible open database connection in case of an error.
if (err) {
throw err;
}
Also in case of success connection.disconn(); and connection.close(); return boolean values that tell is operation successful (according to documentation)
Always possible scenario is to pile up connection objects in 3rd party library.
I would check those.
This was confirmed to be a memory leak in the idb-connector library that i was using. Link to github issue Here. Basically there was a C++ array that never had it's memory deallocated. A new version was added and the commit can viewed Here.

Maximum call stack size exceeded crash - Firebase on child added listener

I've been getting the maximum call stack size exceeded crash when starting my node js server (using the firebase-admin package) if I have 1255 or more children at a reference that I'm listening to using 'on child added'.
FIREBASE WARNING: Exception was thrown by user callback. RangeError: Maximum call stack size exceeded
If I don't add limitToLast(1255) to my child added listener (or anything less than 1255), the crash keeps occurring.
I want to listen to all previous values that exist or are added regardless of magnitude without the program crashing, but I'm not sure how to do this.
on('value') works perfectly with >10k nodes in the database reference, but not 'on child added' when initially looking up all the values.
Same issue with 'on child removed'.
Here is my 'child added' code:
messageRef.on('child_added', function(snapshot) {
var payload = {};
var msgKey = snapshot.key;
var msgName = snapshot.child('name').val();
var msgContent = snapshot.child('msg').val();
var msgTime = snapshot.child('time').val();
var message = {msg: msgContent, name: msgName, time: msgTime};
payload['messages/' + msgKey] = message;
ref.update(payload);
});
Thanks very much for any help!
set is a simpler operation that might avoid some limitations. Also I'd prefix the path passed to ref with / to ensure referencing from the root node.
Replace your last 6 statements with:
const message = snapshot.val();
const name = message.name;
const content = message.content;
const time = message.time;
firebase.database().ref(`/messages/${msgKey}`).set({name, msg, time})

Access of global variables in setImmediate in node.js

Below is a piece of code:
var buffer = new Buffer(0, 'hex'); //Global buffer
socket.on('data', function(data) {
// Concatenate the received data to buffer
buffer = Buffer.concat([buffer, new Buffer(data, 'hex')]);
setImmediate(function() { // Executed asynchronously
/*Process messages received in buffer*/
var messageLength = getMessageLength(buffer);
while (buffer.length >= messageLength) {
/*Process message and send response*/
}
//Remove message from buffer after processing is done
buffer.splice(messageLength);
}) // End of setImmediate
}) //End of socket.on
I am using a global variable 'buffer', inside the setImmediate block(executed asynchronously). Is there a guarantee that the global buffer variable does not change(either due to addition of data or deletion of data) during the execution of code in setImmediate block?? If no, how to handle such that the buffer is accessed safely??
The oft-repeated saying "NodeJS is single-threaded" means there is no question of "safety" here. Simultaneous accesses to a variable are not possible because simultaneous operations do not occur. Even though the setImmediate code is executed asynchronously, that does not mean it is executed as the SAME TIME. It just means it is executed "soon". The parent function can return before this happens - but the parent function is not running when the anonymous setImmediate callback is triggered. At that time, the callback is the only thing running.
These operations are thus safe - but for what it's worth, it's not very efficient. NodeJS buffers are fixed-length, which is why you need to need to keep re-allocating a new one to append data. They're suitable for one-time loads but not really ideal for constant append operations. Consider using a readable stream. This allows you to pull out and process any length of data you want at a time, and can return a buffer. But internally it does not constantly re-allocate its storage block for the data read.

Debug a stack overflow exception with nodejs

I'm parsing a large amount of files using nodejs. In my process, I'm parsing audio files, video files and than the rest.
The function to parse files looks like this :
/**
* #param arr : array of files objects (path, ext, previous directory)
* #param cb : the callback when every object is parsed,
* objects are then throwed in a database
* #param others : the array beeing populated by matching objects
**/
var parseOthers = function(arr, cb, others) {
others = others === undefined ? [] : others;
if(arr.length == 0)
return cb(others); //should be a nextTick ?
var e = arr.shift();
//do some tests on the element and add it
others.push(e);
//Then call next tested callImediate and nextTick according
//to another stackoverflow questions with no success
return parseOthers(arr, cb, others);
});
Full code here (care it's a mess)
Now with about 3565 files (not so much) the script catch a "RangeError: Maximum call stack size exceeded" exception, with no trace.
What have I tried :
I've tried to debug it with node-inspector and node debug script, but it never hangs as if it was running without debugging (does debugging increase the stack ?).
I've tried with process.on('uncaughtException') to catch the exception with no success.
I've got no memory leak.
How may I found an exception trace ?
Edit 1
Increasing the --stack_size seams to work pretty well. Isn't there another way of preventing this ?
(about 1300 there)
Edit 2
According to :
$ node --v8-options | grep -B0 -A1 stack_size
The default stack size (in kBytes) is 984.
Edit 3
A few more explanations :
I'm never reading this type of files itselves
I'm working here on an array of paths, I don't parse folders recursively
I'm looking at the path and checking if it's already stored in the database
My guess is that the populated array becomes to big for nodejs, but memory looks fine and that's weird...
Most Stackoverflow situations are not easy or sometimes possible to debug. Even if you debug on the problem, you may not find the trigger.
But I can suggest you a way to share the task load easily (including the queue management):
JXcore (a multithreaded fork on Node.JS) would suit better in your case. Simply create a task pool and set a task method handling 1 file at a time. It will manage your queue 1 by 1 multithreaded.
var myTask = function ( args here )
{
logic here
}
for(var i=0;i<LIST_OF_THE_FILES;i++)
jxcore.tasks.addTask( myTask, paramshere, optional callback ...
OR in case the logic definition is out of the scope of a single method;
var myTask = function ( args here )
{
require('mytasketc.js').handleTask(args here);
}
for(var i=0;i<LIST_OF_THE_FILES;i++)
jxcore.tasks.addTask( myTask, paramshere, optional callback ...
Remarks
Every single thread has its own V8 memory limit.
The context among the threads are separated
Make sure the task method closes the file in the end
Link
You can find more on multithreaded Javascript tasks
You getting this error because of recursion. Reformat your code to do not use it, especially because this peace of code really don't need it. Here is just APPROXIMATE example, to show you how better to do it:
var parseElems = function(arr, cb) {
var result = [];
arr.forEach(function (el) {
//do some tests on the element (el)
result.push(el);
});
cb(result);
});

Resources