I am using async queue to process a huge amount of data. The queue works great until I try to perform an update in the database with MongoDB findOneAndUpdate method.
I first establish the queue, and start pushing objects to it from a Node Stream:
//Create queue to process all items
let q = async.queue(processLink, 2);
// Create Node Stream
let createStream = function () {
let stream = fs.createReadStream(LinkData, {encoding: 'utf8'});
let parser = JSONStream.parse('RECDATA.*');
return stream.pipe(parser);
};
//Listen to 'data' event on stream and add object to queue
createStream().on('data', function(link){
q.push(link)
});
Here is my task function, 'processLink'. It is here, that I am having trouble tracking down the problem. Whenever the findOneAndUpdate callback is fired, it enters one of the conditional blocks, and I am getting the message logged out to the console, but when I call the async callback processComplete(), the task does not finish as expected.
As the title suggests, Why are my async callbacks not completing each task?
function processLink(link, processComplete){
if(_.includes(link.URL, 'www.usda.gov') && _.includes(link.URL, '?recid=')){
let url_items = _.split(link.URL, '=',2);
let facilityOrgID = url_items[1];
let update = {$push: {"links": link}};
if(_.isNumber(parseInt(facilityOrgID)) && facilityOrgID.length > 4 ){
Facility.findOneAndUpdate({facilityOrgID: parseInt(facilityOrgID)}, update, (err, result) => {
if(err !== null){
console.log("Error:",err);
return processComplete(err); /** NOT FIRING **/
} else if(err === null && result !== null){
console.log("Link added to:", result.name);
return processComplete(); /** NOT FIRING **/
}else if(err === null && result === null){
console.log('Facility not in database');
processComplete(); /** NOT FIRING **/
}else{
console.log('Something has gone terrible wrong');
}
});
}else{
console.log("Invalid facilityID");
return processComplete();
}
}else{
console.log('Link Discarded:', link.URL);
processComplete(); /** Fires normally **/
}
}
I solved my problem this morning. The callbacks were fine, but rather there was a conditional state of the data I was not aware of, thus it was leading to a state in which the code would never call the processComplete() callback.
If anyone else is finding themselves in a similar bind, and you have tons of data, I would ensure that your data is being reported accurately and there are not some edge cases that were not being processed as expected.
Related
So I'm working with websockets to process data from website's API. For every new event I also send some http requests back to the website in order to obtain more data. Up untill now everything has worked fine, but now that I started using async requests to speed it up a bit things got a bit different. My code used to process one event and then move on to the next one (these events come in extremely quick - around 10 per second) but now it just seems to ignore the async (non blocking) part and move on to the next event and that way it just skips over half of the code. Note that the code works fine outside the Pusher. I'm using the 'pusher-client' module. My code looks like this:
var Request = require("request");
var requestSync = require('sync-request');
var Pusher = require('pusher-client');
var events_channel = pusher.subscribe('inventory_changes');
events_channel1.bind('listed', function(data)
{
var var2;
//Async request (to speed up the code)
function myFunction(callback){
request("url", function(error, response, body) {
if (!error && response.statusCode == 200)
{
result = JSON.stringify(JSON.parse(body));
return callback(null, result);
}
else
{
return callback(error, null);
}
});
}
myFunction(function(err, data){
if(!err)
{
var2 = data
return(data);
}
else
{
return(err);
}
});
//The part of the code below waits for the callback and the executes some code
var var1 = var2;
check();
function check()
{
if(var2 === var1)
{
setTimeout(check, 10);
return;
}
var1 = var2;
//A CHUNK OF CODE EXECUTES HERE (connected to the data from the callback)
}
});
In conclusion the code works, but not inside the pusher due to the pusher skipping the asynchronous request. How would I make the pusher wait for my async request to finish, before processing the next event (I have no idea)? If you happen to know, please let me know :)
You need to implement a queue to handle events one after another. I'm curious how it worked before, even without Pusher you'd have to implement some queue mechanism for it.
const eventsQueue = []
events_channel1.bind('listed', function(data) {
eventsQueue.push(data)
handleNewEvent()
})
let processingEvent = false
function handleNewEvent() {
if (processingEvent) return // do nothing if already processing an event
processingEvent = true
const eventData = eventsQueue.shift() // pick the first element from array
if (!eventData) return // all events are handled at the moment
... // handle event data here
processingEvent = false
handleNewEvent() // handle next event
}
Also, you should call clearTimeout method to clear your timeout when you don;t need it anymore.
And it's better to use promises or async/await instead of callbacks. Your code will be much easier to read and maintain.
I have the following piece of code where I iterate through a collection and do another db query and construct an object within its callback. Finally I save that object to another collection.
I wish to call another function after all items have been saved, but can't figure out how. I tried using the async library, specifically async whilst when item is not null, but that just throws me in an infinite loop.
Is there a way to identify when all items have been saved?
Thanks!
var cursor = db.collection('user_apps').find({}, {timeout:false});
cursor.each(function (err, item) {
if (err) {
throw err;
}
if (item) {
var appList = item.appList;
var uuid= item.uuid;
db.collection('app_categories').find({schema_name:{$in: appList}}).toArray(function (err, result) {
if (err) throw err;
var catCount = _.countBy(result, function (obj) {
return obj.category;
})
catObj['_id'] = uuid;
catObj['total_app_num'] = result.length;
catObj['app_breakdown'] = catCount;
db.collection('audiences').insert(catObj, function (err) {
if (err) console.log(err);
});
});
}
else {
// do something here after all items have been saved
}
});
The key here is to use something that is going to respect the callback signal when performing the "loop" operation. The .each() as implemented here will not do that, so you need an "async" loop control that will signify that each loop has iterated and completed, with it's own callback within the callback.
Provided your underlying MongoDB driver is at least version 2, then there is a .forEach() which has a callback which is called when the loop is complete. This is better than .each(), but it does not solve the problem of knowing when the inner "async" .insert() operations have been completed.
So a better approach is to use the stream interface returned by .find(), where this is more flow control allowed. There is a .stream() method for backwards compatibility, but modern drivers will just return the interface by default:
var stream = db.collection('user_apps').find({});
stream.on("err",function(err){
throw(err);
});
stream.on("data",function(item) {
stream.pause(); // pause processing of stream
var appList = item.appList;
var uuid= item.uuid;
db.collection('app_categories').find({schema_name:{ "$in": appList}}).toArray(function (err, result) {
if (err) throw err;
var catCount = _.countBy(result, function (obj) {
return obj.category;
})
var catObj = {}; // always re-init
catObj['_id'] = uuid;
catObj['total_app_num'] = result.length;
catObj['app_breakdown'] = catCount;
db.collection('audiences').insert(catObj, function (err) {
if (err) console.log(err);
stream.resume(); // resume stream processing
});
});
});
stream.on("end",function(){
// stream complete and processing done
});
The .pause() method on the stream stops further events being emitted so that each object result is processed one at a time. When the callback from the .insert() is called, then the .resume() method is called, signifying that processing is complete for that item and a new call can be made to process the next item.
When the stream is complete, then everything is done so the "end" event hook is called to continue your code.
That way, both each loop is signified with an end to move to the next iteration as well as there being a defined "end" event for the complete end of processing. As the control is "inside" the .insert() callback, then those operations are respected for completion as well.
As a side note, you might consider including your "category" information in the source collection, as it seems likely your results can be more efficiently returned using .aggregate() if all required data were in a single collection.
I have a problem in a nodeJS app with mongoDB, i'm trying to do a forum and for each topic i want a button to display every sub topics.
So i need to get everything in the request:
One array with main topics
Another map array with ['topic1'] containing sub topics
Without the mapping (not an actual problem) i have this:
Post.find({'path': path})
.exec(function (err, posts){
if(err)
console.log("Get post list:" + err);
else
{
var sub_posts = new Array; // Second array with sub topics
for (var i = 0; posts[i]; i++) //Here is the loop for each topic
{
var tmp_path = ... // Path and next request works
Post.find({'path': tmp_path}) // Here is the second request
.exec(function(err, bis_posts) {
if (err) console.log('Error loading subforum');
else sub_posts.push(bis_posts); // Affectation fail !!!
})
}
res.render(... 'post_list': posts, 'sub_posts': sub_posts); // Send result
}
})}
So i get it's a scope problem and i should use callback but with the loop i can't resolve this problem.
Sorry for my english and thanks for your answers !
I have no idea what you mean by "affectation fail", but it looks like you're calling res.render too early — callbacks are invoked asynchronously after your current context finishes executing, so when you call res.render(...) after your for loop has finished, your Post.find(...)... operations still haven't finished and their callbacks haven't been invoked, so sub_posts will be still empty.
My node.js and Mongo are rusty, so perhaps this isn't the canonical way to do it, but I'd add a counter to track the state of the pending requests and only call res.render when all subposts have been fetched:
var sub_posts = new Array;
var pending = 0;
for (var i = 0; posts[i]; i++)
{
var tmp_path = ...
Post.find({'path': tmp_path})
.exec(function(err, bis_posts) {
if (err) console.log('Error loading subforum');
else sub_posts.push(bis_posts);
pending -= 1;
if (!pending) {
// all pending subpost lookups finished, render the response:
res.render(... 'post_list': posts, 'sub_posts': sub_posts);
}
});
pending += 1;
}
I want to stop of executing of my async.queue after first task error was occurred. I need to perform several similar actions in parallel with the concurrency restriction, but stop all the actions after first error. How can I do that or what should I use instead?
Assuming you fired 5 parallel functions, each will take 5 seconds. While in 3rd second, function 1 failed. Then how you can stop the execution of the rest?
It depends of what those functions do, you may poll using setInterval. However if your question is how to stop further tasks to be pushed to the queue. You may do this:
q.push(tasks, function (err) {
if (err && !called) {
//Will prevent async to push more tasks to the queue, however please note that
//whatever pushed to the queue, it will be processed anyway.
q.kill();
//This will not allow double calling for the final callback
called = true;
//This the main process callback, the final callback
main(err, results);
}
});
Here a full working example:
var async = require('async');
/*
This function is the actual work you are trying to do.
Please note for example if you are running child processes
here, by doing q.kill you will not stop the execution
of those processes, so you need actually to keep track the
spawned processed and then kill them when you call q.kill
in 'pushCb' function. In-case of just long running function,
you may poll using setInterval
*/
function worker(task, wcb) {
setTimeout(function workerTimeout() {
if (task === 11 || task === 12 || task === 3) {
return wcb('error in processing ' + task);
}
wcb(null, task + ' got processed');
}, Math.floor(Math.random() * 100));
}
/*
This function that will push the tasks to async.queue,
and then hand them to your worker function
*/
function process(tasks, concurrency, pcb) {
var results = [], called = false;
var q = async.queue(function qWorker(task, qcb) {
worker(task, function wcb(err, data) {
if (err) {
return qcb(err); //Here how we propagate error to qcb
}
results.push(data);
qcb();
});
}, concurrency);
/*
The trick is in this function, note that checking q.tasks.length
does not work q.kill introduced in async 0.7.0, it is just setting
the drain function to null and the tasks length to zero
*/
q.push(tasks, function qcb(err) {
if (err && !called) {
q.kill();
called = true;
pcb(err, results);
}
});
q.drain = function drainCb() {
pcb(null, results);
}
}
var tasks = [];
var concurrency = 10;
for (var i = 1; i <= 20; i += 1) {
tasks.push(i);
}
process(tasks, concurrency, function pcb(err, results) {
console.log(results);
if (err) {
return console.log(err);
}
console.log('done');
});
async documentation on github page is either outdated or incorrect, while inspecting the queue object returned by async.queue() method I do not see the method kill().
Nevertheless there is a way around it. Queue object has property tasks which is an array, simply assigning a reference to an empty array did the trick for me.
queue.push( someTasks, function ( err ) {
if ( err ) queue.tasks = [];
});
I have the following code and am attempting to make a Turntable bot using node.js. this piece of code says when a user types "q+" we have to make sure its not already on the queue, that its not already DJing, and if it meets those 2 requirements, add them to the queue. Otherwise if it doesnt meet one of those first 2 criteria, tell the user and do not touch the queue.
My problem is the "isCurrentDJ(userId)". When I pass a userId through that function, the function gives me the correct answer. However the function ALWAYS passes back "false" even when the answer is "true" and the console.log() function within the isCurrentDJ(userId) function proves so.
I am not the most js-savvy person, so I think this may be a variable scope issue. But I am really not sure and have been struggling with it for hours! Any help would be greatly appreciated. Thanks!
// When someone speaks, listen to see if it is one of the q commands
bot.on('speak', function (data) {
var name = data.name;
var text = data.text;
var userId = data.userid;
// q+ :: Add to Queue
if (text.match(/^q\+$/)) {
//Index is the position in the queue that this person's name is found.
//If its not found, -1 is returned.
var index = queue.indexOf(name);
//Function to determine if the user is currently a DJ
function isCurrentDJ(user_id, callback){
bot.roomInfo(false, function (data) {
var djList = data.room.metadata.djs;
for (i = 0; i < djList.length; i++){
if (djList[i] == user_id){
console.log('recognized as a DJ'); //Consistently printed!
callback(true);
}
}
callback(false);
});
}
isCurrentDJ(userId, function(isDJ) {
//If the user is already in the queue
if(index > -1){
//Tell them they are already in there
bot.speak('You are already on the list');
} else if(isDJ){
//Otherwise if they are already a DJ tell them that
bot.speak('You are already a DJ, '+name);
}else{
//Otherise if they are not in the queue add user to end of queue
queue.push(name);
//Tell them about it and the updated q
bot.speak(name+' has been added to queue.');
}
});
}
Your problem is that bot.roomInfo is an asynchronous function.
When you call it, it immediately returns and currDJ is still false. A little while later, the callback (function(data) {...) is called. Most of node.js's API are async so that your code never blocks.
Here's how you should rewrite your code:
// When someone speaks, listen to see if it is one of the q commands
bot.on('speak', function (data) {
var name = data.name;
var text = data.text;
var userId = data.userid;
// q+ :: Add to Queue
if (text.match(/^q\+$/)) {
//Index is the position in the queue that this person's name is found.
//If its not found, -1 is returned.
var index = queue.indexOf(name);
//Function to determine if the user is currently a DJ
function testCurrentDJ(user_id, cb){
bot.roomInfo(false, function (data) {
var djList = data.room.metadata.djs;
for (i = 0; i < djList.length; i++){
if (djList[i] == user_id){
console.log('recognized as a DJ'); //Consistently printed!
return cb(true);
}
}
cb(false);
});
}
//If the user is already in the queue
if(index > -1){
//Tell them they are already in there
bot.speak('You are already on the list');
return;
}
testCurrentDJ(userId, function(isDJ) {
//Otherwise if they are already a DJ tell them that
if(isDJ) {
bot.speak('You are already a DJ, '+name);
} else {
//Otherise if they are not in the queue add user to end of queue
queue.push(name);
//Tell them about it and the updated q
bot.speak(name+' has been added to queue. Is Current DJ? '+isDJ);
}
})
}
I've just updated your code to show you the basic idea. In node.js's API, the first argument of callbacks is usually an error object that is null if everything went fine.
Is bot.roomInfo perhaps an asynchronous function? If so the value of currDJ will be set to true, but too late because you've already returned it. You can no operate on the value of currDJ untill that callback is called.
How familiar are you with the concept of asynchronous functions?