async.whilst with internal callback - node.js

I am trying to loop while the count of some array is less than 50, or if the loop has gone through more than 14 iterations. This seems like a perfect use for async.whilst.
However, my complication is that my work function has an asynchronous query inside of it (a database query).
Here is a simple version of my code:
var items = [];
var key = 20150713;
var iterations = 0;
async.whilst(
function(){
return items.length < 50 || iterations < 14;
},
function(callback){
iterations+=1;
dbQuery("my query", function(err, res){
key -=1;
//add res to items.
callback();
});
},
function(err){
});
Of course this code doesn't work because dbQuery() returns immediately, so async.whilst just blows through 14 iterations and returns an empty array before the first dbQuery even returns.
How do I handle this so that async.whilst waits for the return of the inner function before running again?
Or is async.whilst not suited to my task?

You're using it correctly. Only, from the code you're posting it doesn't look like you're doing anything with the result:
async.whilst(
function(){
return items.length < 50 || iterations < 14;
},
function(callback){
iterations+=1;
dbQuery("my query", function(err, res){
key -=1;
//add res to items.
callback();
});
},
function(err){
// this function will be called when whilst completes
// or when there's an error
if (!err) {
// use items:
console.log(items);
}
else {
console.log('OOps.. something went wrong somewhere');
}
}
);

Related

Nodejs async.eachSeries

I asked few questions about this few months ago and recently I got back to that script. I figured out some thing and a friend helped with the script but now I have another problem.
This is my script now:
var j = schedule.scheduleJob('*/5 * * * * *', function(){
var steamids = [];
con.query("SELECT * FROM counterStrikeGlobalOffensive", function (err, rows) {
for (var i = 0; i < rows.length; i++) {
steamids.push(rows[i].steam64ID);
}
//const steamIDs = ["2342342341234123", "23423412341234", "2342314123423"]; // Steam IDs to check
eachSeries(steamids, (steamID, callback) => {
CSGOCli.playerProfileRequest(CSGOCli.ToAccountID(steamID));
CSGOCli.on("playerProfile", function(profile) {
console.log(JSON.stringify(profile, null, 2));
callback();
});
}, (err) => {
// error thrown = set, else we're done
});
});
});
When I use constant steamIDs, it works perfectly, but when I use steamids, it gives me an error.(I will explain)...
When I do, console.log(steamids); it returns me this
[ '76561198152643711', '76561198213530057' ]
and steamIDs is
const steamIDs = ["2342342341234123", "23423412341234", "2342314123423"];
so its almost the same as constant SteamIDs but constant has " " around the numbers which shouldn't be why it isn't working but maybe I'm wrong?
Also, I have the callback() but how can I make it stop giving me an error
Error: Callback was already called.
Ask for any other info please :)
You get the Error: Callback was already called. because the CSGOCli.on() is executed multiple times. So it calls the callback once, and later the event fires again. So the callback gets called again but it should only be called once.
For a simple reproduction see this example:
async.eachSeries([1, 2, 3], (data, callback) => {
console.log("Data:", data);
for(let i = 0; i < 2; i++) {
callback();
}
},
(err) => {
console.log("Callback: ", err);
});
But if you add return before the callback like this: return callback();, then the problem disappears because the function will return and no callbacks will be called again.
So change your code to this and see if it works:
CSGOCli.on("playerProfile", function(profile) {
console.log(JSON.stringify(profile, null, 2));
return callback();
});

node.js - sqlite3 read all records in table and return

I'm trying to read all records in a sqlite3 table and return them via callback. But it seems that despite using serialize these calls are still ASYNC. Here is my code:
var readRecordsFromMediaTable = function(callback){
var db = new sqlite3.Database(file, sqlite3.OPEN_READWRITE | sqlite3.OPEN_CREATE);
var allRecords = [];
db.serialize(function() {
db.each("SELECT * FROM MediaTable", function(err, row) {
myLib.generateLog(levelDebug, util.inspect(row));
allRecords.push(row);
}
callback(allRecords);
db.close();
});
}
When the callback gets fired the array prints '[]'.
Is there another call that I can make (instead of db.each) that will give me all rows in one shot. I have no need for iterating through each row here.
If there isn't, how do I read all records and only then call the callback with results?
I was able to find answer to this question. Here it is for anyone who is looking:
var sqlite3 = require("sqlite3").verbose();
var readRecordsFromMediaTable = function(callback){
var db = new sqlite3.Database(file, sqlite3.OPEN_READONLY);
db.serialize(function() {
db.all("SELECT * FROM MediaTable", function(err, allRows) {
if(err != null){
console.log(err);
callback(err);
}
console.log(util.inspect(allRows));
callback(allRows);
db.close();
});
});
}
A promise based method
var readRecordsFromMediaTable = function(){
return new Promise(function (resolve, reject) {
var responseObj;
db.all("SELECT * FROM MediaTable", null, function cb(err, rows) {
if (err) {
responseObj = {
'error': err
};
reject(responseObj);
} else {
responseObj = {
statement: this,
rows: rows
};
resolve(responseObj);
}
db.close();
});
});
}
The accepted answer using db.all with a callback is correct since db.each wasn't actually needed. However, if db.each was needed, the solution is provided in the node-sqlite3 API documentation, https://github.com/mapbox/node-sqlite3/wiki/API#databaseeachsql-param--callback-complete:
Database#each(sql, [param, ...], [callback], [complete])
...
After all row callbacks were called, the completion callback will be called if present. The first argument is an error object, and the second argument is the number of retrieved rows
So, where you end the first callback, instead of just } put }, function() {...}. Something like this:
var readRecordsFromMediaTable = function(callback){
var db = new sqlite3.Database(file, sqlite3.OPEN_READWRITE | sqlite3.OPEN_CREATE);
var allRecords = [];
db.serialize(function() {
db.each("SELECT * FROM MediaTable", function(err, row) {
myLib.generateLog(levelDebug, util.inspect(row));
allRecords.push(row);
}, function(err, count) {
callback(allRecords);
db.close();
}
});
}
I know I'm kinda late, but since you're here, please consider this:
Note that it first retrieves all result rows and stores them in memory. For queries that have potentially large result sets, use the Database#each function to retrieve all rows or Database#prepare followed by multiple Statement#get calls to retrieve a previously unknown amount of rows.
As described in the node-sqlite3 docs, you should use .each() if you're after a very large or unknown number or rows, since .all() will store all result set in memory before dumping it.
That being said, take a look at Colin Keenan's answer.
I tackled this differently, since these calls are asynchronous you need to wait until they complete to return their data. I did it with a setInterval(), kind of like throwing pizza dough up into the air and waiting for it to come back down.
var reply = '';
db.all(query, [], function(err, rows){
if(err != null) {
reply = err;
} else {
reply = rows;
}
});
var callbacker = setInterval(function(){
// check that our reply has been modified yet
if( reply !== '' ){
// clear the interval
clearInterval(callbacker);
// do work
}
}, 10); // every ten milliseconds
Old question, but I came across the issue, with a different approach as to solve the problem. The Promise option works, though being a little too verbose to my taste, in the case of a db.all(...) call.
I am using instead the event concept of Node:
var eventHandler = require('events')
In your Sqlite function:
function queryWhatever(eventHandler) {
db.serialize(() => {
db.all('SELECT * FROM myTable', (err, row) => {
// At this point, the query is completed
// You can emit a signal
eventHandler.emit('done', 'The query is completed')
})
})
}
Then, give your callback function to the eventHandler, that "reacts" to the 'done' event:
eventHandler.on('done', () => {
// Do something
})

While loop to check uniqueness of custom id

I have a MongoDB databse set up with some objects that have a unique code (not the primary key).
I should also note that I'm using NodeJS and this code is in my server.js to connect to the MongoDB database.
To generate a new code, I generate one randomly and I want to check if it already exists. If not then we use it no problem, but if it already exists I want to generate another code and check it again. This is the code I use to check if the id already exists:
function createPartyId(callback) {
var min = 10000, max = 99999;
var partyId = -1, count = -1;
async.whilst(
function () { return count != 0; },
function (callback) {
partyId = min + Math.floor(Math.random() * (max - min + 1));
partyId = 88888;
getPartyIdCount(partyId, function(num) {
count = num;
});
},
function (err) {
}
);
}
function getPartyIdCount(partyId, callback) {
count = -1;
db.db_name.find({id: partyId}, function(err, records) {
if(err) {
console.log("There was an error executing the database query.");
callback(count);
}
count = records.length;
callback(count);
});
}
First of all, is there any particular reason you're not using a simple number increment sequence? This type of code is prone to inefficiency, the more numbers you generate the more chance you have of collisions which means you're going to be spending more time on generating an ID for your data than you are on the rest of your processing. Not a good idea.
But I can still tell you what's going wrong.
OK, so getPartyIdCount() will only, ever, always, without fail, return undefined (or, basically, nothing).
Your mongo call processes the return value in a callback, and that callback doesn't assign its value to anything, so return records.length just gets lost into nothingness.
You've mixed up createPartyId(), which it appears you want to run synchronously, with your mongo call, which must run asynchronously.
return always goes with the nearest containing function, so in this case it goes with function(err, records), not function getPartyIdCount(partyId).
(Expanding my comment from above)
The issue is that createPartyId is an asynchronous function, but you're trying to return the value synchronously. That won't work. Once you touch an async operation, the rest of the call stack has to be async as well.
You don't include the code that's calling this, but I assume you want it to be something like:
var partyId = createPartyId();
// do stuff...
That's not going to work. Try this:
function createPartyId(callback) {
var min = 10000, max = 99999;
var partyId = -1, count = -1;
async.whilst(
function () { return (count == 0); },
function (callback) {
partyId = min + Math.floor(Math.random() * (max - min + 1));
partyId = 88888;
getPartyIdCount(partyId, function(err, num) {
if (!err) {
count = num;
}
callback(err);
});
},
function (err) {
// this is called when the loop ends, error or not
// Invoke outer callback to return the result
callback(err, count);
}
);
}
function getPartyIdCount(partyId, callback) {
count = -1;
db.db_name.find({id: partyId}, function(err, records) {
if(err) {
console.log("There was an error executing the database query.");
callback(err);
}
count = records.length;
callback(null, count);
});
}
(I've also adopted the default node.js convention of always returning errors as the first argument to callback functions.)
So, to use this you would do:
getPartyId(function (err, num) {
if (err) { return aughItFellOver(err); }
// do stuff
});

accessing an array outside the function in node js

I know node.js run asynchronously, so outer functions execute earlier than the inner. But what is the way to access the notification array outside the for loop? I would like to access all the values in array at once, is this feasible?
var notification=[];
for(var j=0;j<6; j++)
{
getNotification(response[j].sender_id,function(results) // a function called
{
notification[j] =results;
console.log(notification); // output: correct
});
}
console.log(notification); // output: [], need notification array values here
EDIT: If you don't want to use third party libs, this is how to do this in your own code.
/* jshint node:true*/
function getNotifications(responses, callbackToMainProgramLogic) {
'use strict';
var results = [];
function getNotificationAsync(response) {
getNotification(response.sender_id, function (data) {
results.push(data);
if (responses.length) {
getNotificationAsync(responses.pop());//If there are still responses, launch another async getNotification.
} else {
callbackToMainProgramLogic(results);//IF there aren't we're done, and we return to main program flow
}
});
}
getNotificationAsync(responses.pop());
}
getNotifications(someArrayOfResonses, function (dataFromNotifications) {
console.log('The collected data: ' + JSON.stringify(dataFromNotifications, 0, 4));
});
If you absolutely must, you could do something ridiculous like this. Your logic in the loopUntilDatReceived would be waiting for array sizes, not waiting for a non-empty string, but the idea is similar, and you shouldn't be using this anyway! :)
var fileData = '';
fs.readFile('blah.js', function (err, data) { //Async operation, similar to your issue.
'use strict';
fileData = data;
console.log('The Data: ' + data);
});
function loopUntilDataReceived() {
'use strict';
process.nextTick(function () {//A straight while loop would block the event loop, so we do this once per loop around the event loop.
if (fileData === '') {
console.log('No Data Yet');
loopUntilDataReceived();
} else {
console.log('Finally: ' + fileData);
}
});
}
loopUntilDataReceived();
Did I mention this is ridiculous? Honestly, this is an awful idea, but it may help you understand what is going on and how the Node event loop works, and why what you want is not possible. AND why the other posts about callbacks, and flow control libraries are the way to go.
First off, you're having a closure issue in your code (please see the details here)
Then, you simply can't have the array values just next to the loop, because the values are not ready at this point.
You need to wait until all 6 of your getNotification calls get resolved. You can do that with the async library. Something like:
var notification = [];
function createRequest (index) {
return function (callback) {
getNotification(response[index].sender_id, function(results) {
notification[index] = results;
callback(results);
});
}
}
var requests = [];
for(var j=0;j<6; j++) {
requests.push(createRequest(j));
}
async.parallel(requests, function (allResults) {
// notifications array is ready at this point
// the data should also be available in the allResults array
console.log(notifications);
});
Send a callback to the notification loop like this:
var notification=[];
getNotificationArray( function() {
console.log(notification);
});
function getNotificationArray (callback)
{
for(var j=0;j<6; j++)
{
getNotification(response[j].sender_id,function(results) // a function called
{
notification[j] =results;
console.log(notification); // output: correct
});
}
callback();
}

NodeJS async queue too fast (Slowing down async queue method)

I have an HTTP Get request and I want to parse the response and save it to my database.
If i call crawl(i) independentely i get good results. But i have to call crawl() from 1 to 2000.
I get good results but some responses seem to get lost and some responses are duplicates. I don't think I understand how to call thousands of asynchronous functions. I am using the async module queue function but so far I am still missing some data and still have some duplicates. What am I doing wrong here? Thanks for your help.
What i am crawling
My node functions :
function getOptions(i) {
return {
host: 'magicseaweed.com',
path: '/syndicate/rss/index.php?id='+i+'&unit=uk',
method: 'GET'
}
};
function crawl(i){
var req = http.request(getOptions(i), function(res) {
res.on('data', function (body) {
parseLocation(body);
});
});
req.end();
}
function parseLocation(body){
parser.parseString(body, function(err, result) {
if(result && typeof result.rss != 'undefined') {
var locationTitle = result.rss.channel[0].title;
var locationString = result.rss.channel[0].item[0].link[0];
var location = new Location({
id: locationString.split('/')[2],
name: locationTitle
});
location.save();
}
});
}
N = 2 //# of simultaneous tasks
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
q.drain = function() {
console.log('Crawling done.');
}
for(var i = 0; i < 100; i++){
q.push({url: 'http://magicseaweed.com/syndicate/rss/index.php?id='+i+'&unit=uk'});
}
[EDIT] WELL, after a lot of testing it seems that the service I am crawling cannot handle so many request that fast. Because when I do each requests sequentially, I can get all the good responses.
Is there a way to SLOW DOWN ASYNC queue method?
You should have a look at this great module, async which simplifies async tasks like this. You can use queue, simple example:
N = # of simultaneous tasks
var q = async.queue(function (task, callback) {
somehttprequestfunction(task.url, function(){
callback();
}
}, N);
q.drain = function() {
console.log('all items have been processed');
}
for(var i = 0; i < 2000; i++){
q.push({url:"http://somewebsite.com/"+i+"/feed/"});
}
It will have a window of ongoing actions and the tasks room will be available for a future task if you only invoke the callback function. Difference is, your code now opens 2000 connections immidiately and obviously the failure rate is high. Limiting it to a reasonable value, 5,10,20 (depends on site and connection) will result in a better sucess rate. If a request fails, you can always try it again, or push the task to another async queue for another trial. The key point is to invoke callback() in queue function, so that a room will be available when it is done.
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
You'are executing next task immediately after starting the previous one, in this way, the queue is just meaningless. You should modify your code like this:
// first, modify your 'crawl' function to take a callback argument, and call this callback after the job is done.
// then
var q = async.queue(function (task, next/* name this argument as 'next' is more meaningful */) {
crawl(task.url, function () {
// after this one is done, start next one.
next();
});
// or, more simple way, crawl(task.url, next);
}, N);
Another option if you want. Vanilla JS without fancy libraries.
var incrementer = 0;
var resultsArray = [];
var myInterval = setInterval(function() {
incrementer++
if(incrementer == 100){
clearInterval(myInterval)
//when done parse results array
}
//make request here
//push request result to array here
}, 500);
Invokes the function every half second. Easy way to force sync and exit after x requests.
I know I am a little late to the question, however here is a solution I wrote to slow down the number of requests when testing an api endpoint, using node 4 or node 5:
var fs = require('fs');
var supertest = require('supertest');
var request = supertest("http://sometesturl.com/api/test/v1/")
var Helper = require('./check.helper');
var basicAuth = Helper.basicAuth;
var options = Helper.options;
fs.readFile('test.txt', function(err, data){
var parsedItems = JSON.parse(data);
var urlparts = []
// create a queue
for (let year of range(1975, 2016)) {
for (var make in parsedItems[year]){
console.log(year, make, '/models/' + year + '/' + make)
urlparts.push({urlpart:'/models/' + year + '/' + make, year: year, make: make})
}
}
// start dequeue
waitDequeue();
// This function calls itself after the makeRequest promise completes
function waitDequeue(){
var item = urlparts.pop()
if (item){
makeRequest(item)
.then(function(){
// wait this time before next dequeue
setTimeout(function() {
waitDequeue();
}, 3000);
})
} else {
write(parsedItems)
}
}
// make a request, mutate parsedItems then resolve
function makeRequest(item){
return new Promise((resolve, reject)=>{
request
.get(item.urlpart)
.set(options.auth[0], options.auth[1])
.set(options.type[0], options.type[1])
.end(function(err, res) {
if (err) return done1(err);
console.log(res.body)
res.body.forEach(function(model){
parsedItems[item.year][item.make][model] = {}
});
resolve()
})
})
}
// write the results back to the file
function write(parsedItems){
fs.writeFile('test.txt', JSON.stringify(parsedItems, null, 4), function(err){
console.log(err)
})
}
})
A little late but I have found this works!
Using async you can slow down the queue by using whilst inside the task handler eg:
var q = async.priorityQueue(function(task, callback) {
// your code process here for each task
//when ready to complete the task delay it by calling
async.whilst( //wait 6 seconds
function() {
return count < 10;
},
function(callback) {
count++;
setTimeout(function() {
callback(null, count);
}, 1000);
},
function (err, n) {
// n seconds have passed
callback(); //callback to q handler
}
); //whilst
} , 5);

Resources