looping in javascript - callback happening before first loop is fully over - node.js

need some help in correcting a loop closure in javascript.
Required flow: MasterGet function is run, data is fetched from a mysql table, for each record fetched a set of 'rules' is run.
Issue faced: if there are two records fetched, while 'rules' are being run for record 1, the record 2 'rules' also get triggered. Need to modify code such at record 2 is checked only after the 'rules' action is completed for record 1.
function MasterGet() {
var countCheckRule = 0;
connection.query(
'SELECT * FROM MASTER',
function selectCb(error, rows, fields) {
if (error) {
console.log('Log 045 - GetData Error: ' + error.message);
return;
}
for (var i = 0; i < rows.length; i++) {
console.log(+ rows[i].INDEX1);
var firstResult = rows[i];
rules.checRules(firstResult, myhttp, function (rules_res) {
firstResult.rules = rules_res;
})
}
});
countCheckRule++;
setTimeout(funcL, 4000);
};
Any guidance will help. Thanks
Added to the issue:
The rules.checrules coding is as below:
exports.checRules = function (A, myhttp, _callback) {
var objrules = { 'rule12': false };
function rule11() {
if (A.NQ > 0 && A.PSQ > 0) {
objrules.rule11 = true;
if (config.execute) {
modifyOrder('S', 'A.BQ', A.TS);
}
} else {
objrules.rule11 = false;
}
}
rule11();
_callback(objrules);
}
So in the loop for 1st record, it checks rule11, and if rule11 is true then it has to execute 'modifyOrder' with the given variables, after 'modifyOrder' is completed, then go back to the loop and check for the 2nd record. If Rule11 is false for the 1st record, then it should automatically go back to the loop and check for the 2nd record.
Currently with the given changes, 2nd record check gets triggered before 'modifyOrder' is complete. Maybe the issue is that the code does not wait for the callback from 'modifyOrder'? Is that the issue? How can I make the code wait till 'modifyOrder' is complete if started.

This code may solve your problem.
This kind of problem accord bez of async nature of js.
function asyncLoop(i, rows, cb) {
if (i < rows.length) {
rules.checRules(rows[i], myhttp, function (rules_res) {
console.log(rules_res);
asyncLoop(i + 1, rows, cb);
//your code
});
} else {
cb();
}
}
function MasterGet() {
connection.query(
'SELECT * FROM MASTER',
function selectCb(error, rows, fields) {
if (error) {
console.log('Log 045 - GetData Error: ' + error.message);
return;
}
asyncLoop(0, rows, () => {
//after async loop complete...
});
});
countCheckRule++;
setTimeout(funcL, 4000);
}

Related

Can't break while loop from within after obtaining result form a spawned process

I have been trying to make a video listing function that makes use of node-js's spawn to spawn a yt-dlp process, whose output gets stored in a database.
Now it works but not as expected (the save order gets messed up even then) when I give it the size of the playlist it must process, but when the submitted playlist size is not known I can't stop the while loop that I have been using to run it.
Here it the function:
const { Sequelize, DataTypes } = require('sequelize'); // including this just in case
const { spawn } = require("child_process");
async function list_background(body_url, start_num, stop_num, chunk_size) {
// sleep just to make it possible to catch
// await sleep(2 * 1000);
console.log('\nlisting in background');
var i = 0;
var dont_stop = true;
// need to find a way to make the loop work only until the time we get a response
// empty response means we should stop
// while (dont_stop) { // this is disastrous as the variable never gets updated
while (i < 10) {
// prepare an empty string to append all the data to
var response = '';
// make the start and stop numbers
start_num = parseInt(start_num) + chunk_size;
stop_num = parseInt(stop_num) + chunk_size;
console.log("\nsupplied data:", "\ni:", i, "\nbody_url:", body_url, "\nstart_num:", start_num, "\nstop_num:", stop_num, "\nchunk_size", chunk_size);
// actually spawn the thing
const yt_list = spawn("yt-dlp", ["--playlist-start", start_num, "--playlist-end", stop_num, "--flat-playlist",
"--print", '%(title)s\t%(id)s\t%(webpage_url)s', body_url]);
yt_list.stdout.on("data", async data => {
response += data;
});
yt_list.stderr.on("data", data => {
response = `stderr: ${data}`;
});
yt_list.on('error', (error) => {
response = `error: ${error.message}`;
});
// apparently await has no effect on this expression
// but then how are we supposed to know when to stop?
// the listing only ends when dont_stop is false
yt_list.on("close", async (code) => {
end = `child process exited with code ${code}`;
response_list = response.split("\n");
// remove the "" from the end of the list
response_list.pop();
// get the status at the end
console.log("\ndata after processing\ni:", i, "response:\n", response, "\nresponse_list:", response_list, "\nresponse_list.length:", response_list.length, "\n");
if (response_list == '') {
// basically when the resonse is empty it means that all
// the items have been listed and the function can just return
// this should then break the outer listing loop
console.log("no vidoes found", "\ni:", i, "\n");
// break wont work as `Jump target cannot cross function boundary.ts(1107)`
// so I am returning false to dont_stop and if dont_stop is is true then the loop
// should stop in the next iteration
dont_stop = false;
} else {
// adding the items to db
console.log("adding items to db", "\ni:", i, "\n");
await Promise.all(response_list.map(async (element) => {
var items = element.split("\t");
// console.log(items, items.length, "\ni:", i, "\n");
// update the vidoes too here by looking for any changes that could have been made
// use find or create here to update the entries
if (items.length == 3) {
try {
if (items[0] == "[Deleted video]" || items[0] == "[Private video]") {
item_available = false;
} else {
item_available = true;
}
const [found, created] = await vid_list.findOrCreate({
where: { url: items[2] },
defaults: {
id: items[1],
reference: body_url,
title: items[0],
downloaded: false,
available: item_available
}
})
//if (created)
//console.log("\nsaved", items[0], "\ni:", i, "\n");
//else
if (found) {
if (!item_available) {
found.available = false;
//console.log("\nfound", items[0], "updated", "\ni:", i, "\n");
}
else {
//console.log("\nfound", items[0], "no changes", "\ni:", i, "\n");
}
found.changed('updatedAt', true);
}
} catch (error) {
// remember to uncomment this later, the sequelize erros are not relevant here now
// console.error(error);
}
}
}));
dont_stop = true;
}
});
console.log('\n\ndont_stop', dont_stop, "\ni:", i, "\n");
i++;
}
console.log('\noutside the loop, and persumably done', "\ni:", i, "\n");
}
this is the test data that I use:
const daft_punk_essentials = { url: "https://www.youtube.com/playlist?list=PLSdoVPM5WnneERBKycA1lhN_vPM6IGiAg", size: 22 }
// first 10 will be listed by the main method so the number of vidoes that we should get here is total-10
list_background(daft_punk_essentials['url'], 1, 10, 10);
I recorded the output of the execution to find out what is happening
can't_stop.log
From my observations I have found out that the spawn doesn't start until after the loop has finished, which I had to limit it 10 as without a limit it just crashes my computer. (see log file for how it happening)
Now I know about await Promise.all() to wait for it's internal stuff to complete but how do i don't get how to implement this for a while loop that need process parts of a list in order to add them to a db.
I am not sure if this is the right approach to do this. I used while loop because there can be up to 5000 videos in a playlist and using a for loop to make chunks would be wasteful if the playlist has like < 500 videos.
The beauty of using promises and async/await is that you can use normal flow of control programming with loops, break, return, etc... because your code isn't running inside of event triggered callback functions which have no control over the higher level scope.
So, the first thing to clean up here is to take all the .on() event handling from the spawn() and wrap it into a promise so that can all be abstracted away in a separate function that you can use await on.
Then, I'd also suggest breaking some of the complication you have into separate functions as that will also allow you to more simply see and control the flow.
I did not follow everything you were trying to do in this loop or how you want to handle all possible error conditions so I'm sure this will need some further tweaking, but here's the general idea.
Synopsis of Changes
Put the spawn operation into a separate function which I called getVideoInfo() that returns a promise that resolves/rejects when its done. This wraps all the .on() event handlers in a promise that the caller can more simply deal with.
Break out the functionality that adds items to the DB into its own function. This is done just to simplify the code and make the main control flow easier to follow and see and write.
Just use a while (true) loop and when you're done, you can simply return. No need for stop loop variables or any of that.
Here's the general idea for how that could look (you will likely have to fix up some details and error handling since I can't run this myself).
const { Sequelize, DataTypes } = require('sequelize'); // including this just in case
const { spawn } = require("child_process");
function getVideoInfo(body_url, start_num, stop_num) {
return new Promise((resolve, reject) => {
// actually spawn the thing
let response = "";
const yt_list = spawn("yt-dlp", [
"--playlist-start",
start_num,
"--playlist-end",
stop_num,
"--flat-playlist",
"--print", '%(title)s\t%(id)s\t%(webpage_url)s',
body_url
]);
yt_list.stdout.on("data", data => {
response += data;
});
yt_list.stderr.on("data", data => {
reject(new Error(`stderr: ${data}`));
});
yt_list.on("close", async (code) => {
resolve(response);
});
yt_list.on("error", reject);
});
}
async function addItemsToDb(response_list) {
// adding the items to db
console.log("adding items to db", "\ni:", i, "\n");
await Promise.all(response_list.map(async (element) => {
const items = element.split("\t");
// update the vidoes too here by looking for any changes that could have been made
// use find or create here to update the entries
if (items.length === 3) {
try {
const item_available = items[0] === "[Deleted video]" || items[0] === "[Private video]";
const [found, created] = await vid_list.findOrCreate({
where: { url: items[2] },
defaults: {
id: items[1],
reference: body_url,
title: items[0],
downloaded: false,
available: item_available
}
});
if (found) {
if (!item_available) {
found.available = false;
//console.log("\nfound", items[0], "updated", "\ni:", i, "\n");
}
else {
//console.log("\nfound", items[0], "no changes", "\ni:", i, "\n");
}
found.changed('updatedAt', true);
}
} catch (error) {
// remember to uncomment this later, the sequelize erros are not relevant here now
// console.error(error);
}
}
}));
}
async function list_background(body_url, start_num, stop_num, chunk_size) {
console.log('\nlisting in background');
start_num = parseInt(start_num);
stop_num = parseInt(stop_num);
while (true) {
// make the start and stop numbers
start_num += chunk_size;
stop_num += chunk_size;
const response = await getVideoInfo(body_url, start_num, stop_num);
const response_list = response.split("\n");
// remove the "" from the end of the list
response_list.pop();
// get the status at the end
if (response_list == '') {
return;
} else {
await addItemsToDb(response_list);
}
}
}
P.S. I don't understand why you're adding chunk_size to start_num before you ever use it. It seems like you'd want to do that after you do the first iteration so you start at start_num, not start at start_num + chunk_size. But, this is how your original code was written so I left it that way.

Nodejs async.eachSeries

I asked few questions about this few months ago and recently I got back to that script. I figured out some thing and a friend helped with the script but now I have another problem.
This is my script now:
var j = schedule.scheduleJob('*/5 * * * * *', function(){
var steamids = [];
con.query("SELECT * FROM counterStrikeGlobalOffensive", function (err, rows) {
for (var i = 0; i < rows.length; i++) {
steamids.push(rows[i].steam64ID);
}
//const steamIDs = ["2342342341234123", "23423412341234", "2342314123423"]; // Steam IDs to check
eachSeries(steamids, (steamID, callback) => {
CSGOCli.playerProfileRequest(CSGOCli.ToAccountID(steamID));
CSGOCli.on("playerProfile", function(profile) {
console.log(JSON.stringify(profile, null, 2));
callback();
});
}, (err) => {
// error thrown = set, else we're done
});
});
});
When I use constant steamIDs, it works perfectly, but when I use steamids, it gives me an error.(I will explain)...
When I do, console.log(steamids); it returns me this
[ '76561198152643711', '76561198213530057' ]
and steamIDs is
const steamIDs = ["2342342341234123", "23423412341234", "2342314123423"];
so its almost the same as constant SteamIDs but constant has " " around the numbers which shouldn't be why it isn't working but maybe I'm wrong?
Also, I have the callback() but how can I make it stop giving me an error
Error: Callback was already called.
Ask for any other info please :)
You get the Error: Callback was already called. because the CSGOCli.on() is executed multiple times. So it calls the callback once, and later the event fires again. So the callback gets called again but it should only be called once.
For a simple reproduction see this example:
async.eachSeries([1, 2, 3], (data, callback) => {
console.log("Data:", data);
for(let i = 0; i < 2; i++) {
callback();
}
},
(err) => {
console.log("Callback: ", err);
});
But if you add return before the callback like this: return callback();, then the problem disappears because the function will return and no callbacks will be called again.
So change your code to this and see if it works:
CSGOCli.on("playerProfile", function(profile) {
console.log(JSON.stringify(profile, null, 2));
return callback();
});

Not able to insert huge number of (ex: 10000) records with insertMany in monogDB

So actually I made a script that inserts more than 10000 records in a document. So what I have done, first I have inserted the 10000 records using insertMany query into my collection. But when I again tried to insert the same number of records, it not gets inserted into my document. Then I have tried for 5000 records, but those records also did not get an insert.
So what is the problem here? Is MongoDB have a limit for records? Please help me with this.
Here is my script:
var resultArr=[];
var getUID = function () {
return (((1 + Math.random()) * 0x10000) | 0).toString(16).substring(1);
}
var date = new Date();
date.setDate(date.getDate() + 12);
for(var i=0;i<10000;i++){
resultObj = {
"guid" : (getUID() + getUID() + "-" + getUID() + "-4" + getUID().substr(0, 3) + "-" + getUID()),
title: 'something'
}
resultArr.push(resultObj);
resultObj={};
}
db.getCollection('5891847dc654e8e2150b3bff').insertMany(resultArr, function (err, result) {
print('data saved')
if (err) {
deferred.reject(err)
db.close()
};
});
Use the following code instead:
var async = require('async');
var resultArr = [];
var getUID = function () {
return (((1 + Math.random()) * 0x10000) | 0).toString(16).substring(1);
}
var date = new Date();
date.setDate(date.getDate() + 12);
var count = 0;
async.whilst(
// Check to see if we continue
function() { return count < 10000 },
// The loop function
function(callback) {
count++;
resultArr.push({
"guid" : (getUID() + getUID() + "-" + getUID() + "-4" + getUID().substr(0, 3) + "-" + getUID()),
"title": "something"
});
if ( resultArray.length >= 1000 ) {
db.getCollection('5891847dc654e8e2150b3bff').bulkWrite(
resultArray.map(function(d) {
return { "insertOne": { "document": d } }
}),
function(err,res) {
resultArray = [];
callback(err);
}
);
} else {
callback()
}
},
// This is on completion or error
function(err, n) {
if (err) throw err;
if ( resultArray.length > 0 ) {
db.getCollection('5891847dc654e8e2150b3bff').bulkWrite(
resultArray.map(function(d) { return {
"insertOne": { "document": d }
}),
function(err,res) {
resultArray = [];
db.close();
}
);
} else {
db.close();
}
}
);
This splits up the loop with flow control on the inner async calls with async.whilst. This gives a callback to invoke in order to continue the "loop" so you don't blow up the call stack on node with concurrent running operations.
The actual .bulkWrite() call is made inside the loop when ever the amount of items in the array reaches 1000, which is the maximum the driver is going to send in one go anyway. You can make it more, but we are saving memory and this is about the "sweet spot".
Using the callback here, we clear the array and invoke the "loop's callback" in order to allow the loop to continue.
Finally when the "loop" is complete you would "typically make check to see if there is anything left unprocessed by the even amounts of 1000. In this case we know it's a round number but I'm leaving it in to show what to do with a real list.
So that part is in the final callback argument to the async.whilst which is met when the loop is completed and we can actually then exit.

DynamoDB javascript SDK batchWriteItem doesn't complete unless I increase write capacity

I'm running a series of unit tests (node.js 4.x, aws-sdk, mocha) which load data into a table before each test then clears the table after the test.
I have two tests that are failing because of a ConditionExpression which triggers a ConditionCheckFailedException. But if I increase the read/write capacity they tests pass.
It's my understanding that the SDK handles throttling exceptions and retries them for you so why wouldn't my tests just run slower and pass? Instead it seems as though the tests fail to complete the scan -> batchWriteItem process and so there are records still left in the table when a new tests starts.
I'm told by team members that they've seen similar problems and they just increased the throughput to fix the problem. This doesn't sit right with me. Either I'm doing something wrong and there's a race condition with my tests or there should be a pattern I can implement to make sure that my operations complete when being throttled? I should be able to use throttling metrics to inform when I need to increase throughput but I should still be able to keep retrying until I run out of memory.
Has anyone else run into this and what have you done to handle the problem?
After some debugging I noticed the UnprocessedItems response element. After looking up UnprocessedItems in the docs I realize I should have read more closely. The code below will run a retry loop with a delay (exponential back-off):
var clearEventTable = function (tableName, client, cleared) {
var exclusiveStartKey = null;
var retryCount = 0;
var read = function(query, callback) {
client.scan(query, function (err, page) {
if(err) {
console.log(err);
return callback(err);
}
retryCount = 0;
exclusiveStartKey = page.LastEvaluatedKey || null;
if(page.Count == 0) {
return callback(null, {});
}
if(page.Count < 25 && exclusiveStartKey) {
console.log("read capacity limit reached: " + JSON.stringify(page, null, 2));
}
var keys = _.map(page.Items, function(n) {
return { DeleteRequest: { Key: n } };
});
var batch = {
RequestItems: {},
ReturnConsumedCapacity: "INDEXES",
ReturnItemCollectionMetrics: "SIZE"
};
batch.RequestItems[tableName] = keys;
callback(null, batch);
});
};
var write = function(batch, callback) {
if(batch && batch.RequestItems){
client.batchWriteItem(batch, function(err, result) {
if(err) {
console.log(err);
return callback(err);
}
if(Object.keys(result.UnprocessedItems).length !== 0) {
console.log("Retry batchWriteItem: " + JSON.stringify(result, null, 2));
retryCount++;
var retry = {
RequestItems: result.UnprocessedItems,
ReturnConsumedCapacity: "INDEXES",
ReturnItemCollectionMetrics: "SIZE"
};
// retry with exponential backoff
var delay = retryCount > 0 ? (50 * Math.pow(2, retryCount - 1)) : 0;
setTimeout(write(retry, callback), delay);
return;
}
callback(null, result);
});
} else {
callback(null);
}
};
var params = {
TableName: tableName,
ProjectionExpression: "aggregateId,id",
Limit: 25, // max 25 per batchWriteItem
ConsistentRead: false,
ReturnConsumedCapacity: "TOTAL"
};
async.doWhilst(function (next) {
// retrieve entities
if (exclusiveStartKey)
params.ExclusiveStartKey = exclusiveStartKey;
async.compose(write, read)(params, function (err, result) {
if (err) next(err);
else next(null, result);
});
}, function () {
// test if we need to load more
return exclusiveStartKey !== null;
}, function (err, r) {
// return results
if (err) {
console.log(err);
return cleared(err);
}
return cleared(null);;
});
};
Also take a look at the amount of memory provisioned for the Lambda. Might be too low and hitting the max leads to unpredictable results IMX.

While loop to check uniqueness of custom id

I have a MongoDB databse set up with some objects that have a unique code (not the primary key).
I should also note that I'm using NodeJS and this code is in my server.js to connect to the MongoDB database.
To generate a new code, I generate one randomly and I want to check if it already exists. If not then we use it no problem, but if it already exists I want to generate another code and check it again. This is the code I use to check if the id already exists:
function createPartyId(callback) {
var min = 10000, max = 99999;
var partyId = -1, count = -1;
async.whilst(
function () { return count != 0; },
function (callback) {
partyId = min + Math.floor(Math.random() * (max - min + 1));
partyId = 88888;
getPartyIdCount(partyId, function(num) {
count = num;
});
},
function (err) {
}
);
}
function getPartyIdCount(partyId, callback) {
count = -1;
db.db_name.find({id: partyId}, function(err, records) {
if(err) {
console.log("There was an error executing the database query.");
callback(count);
}
count = records.length;
callback(count);
});
}
First of all, is there any particular reason you're not using a simple number increment sequence? This type of code is prone to inefficiency, the more numbers you generate the more chance you have of collisions which means you're going to be spending more time on generating an ID for your data than you are on the rest of your processing. Not a good idea.
But I can still tell you what's going wrong.
OK, so getPartyIdCount() will only, ever, always, without fail, return undefined (or, basically, nothing).
Your mongo call processes the return value in a callback, and that callback doesn't assign its value to anything, so return records.length just gets lost into nothingness.
You've mixed up createPartyId(), which it appears you want to run synchronously, with your mongo call, which must run asynchronously.
return always goes with the nearest containing function, so in this case it goes with function(err, records), not function getPartyIdCount(partyId).
(Expanding my comment from above)
The issue is that createPartyId is an asynchronous function, but you're trying to return the value synchronously. That won't work. Once you touch an async operation, the rest of the call stack has to be async as well.
You don't include the code that's calling this, but I assume you want it to be something like:
var partyId = createPartyId();
// do stuff...
That's not going to work. Try this:
function createPartyId(callback) {
var min = 10000, max = 99999;
var partyId = -1, count = -1;
async.whilst(
function () { return (count == 0); },
function (callback) {
partyId = min + Math.floor(Math.random() * (max - min + 1));
partyId = 88888;
getPartyIdCount(partyId, function(err, num) {
if (!err) {
count = num;
}
callback(err);
});
},
function (err) {
// this is called when the loop ends, error or not
// Invoke outer callback to return the result
callback(err, count);
}
);
}
function getPartyIdCount(partyId, callback) {
count = -1;
db.db_name.find({id: partyId}, function(err, records) {
if(err) {
console.log("There was an error executing the database query.");
callback(err);
}
count = records.length;
callback(null, count);
});
}
(I've also adopted the default node.js convention of always returning errors as the first argument to callback functions.)
So, to use this you would do:
getPartyId(function (err, num) {
if (err) { return aughItFellOver(err); }
// do stuff
});

Resources