Mongoose callback not working using Forever - node.js

I am trying to run a background process with Forever that adds data from an external service into my mongodb database every hour (I am new with node and had no idea about how to do this). I am using Node with Express and running the forever task using
forever -o out.log -e err.log start background/collector.js
so I have some feedback from the process. The code is the following:
var request = require('request');
var mongoose = require('mongoose');
var Model = require('../models/Model.js');
// Starting the collector process
addNewData();
function addNewData() {
request('external_service_url', function (error, response, body) {
if (!error && response.statusCode == 200) {
var models = JSON.parse(body);
console.log('Adding the new models...')
for(var i = 0; i < models.length; i++)
{
console.log(i);
Model.create(models[i], function (error, post, result) {
console.log('Test');
if (error) console.log('Something went wrong adding the document!');
if ( ! result.updatedExisting)
{
console.log('A new document has been added!');
}
});
}
console.log('Models added: ' + models.length);
// Every hour we will collect new data
console.log('Waiting for an hour...');
setTimeout(addNewData, 36000000);
}
})
}
I don't know why but seems that the .create() callback is not working, the console.logs placed inside the create callback are not showing anything in the logs. Forever does not work with mongoose? Do you have some idea?

Well here's the thing. You never "connected" your mongoose adapter to the database. Simply fixed by adding:
mongoose.connect('mongodb://myhost/mydatabase');
Somewhere near the top before you attempt to do anything else. It won't necessarily "connect" in that order, but the mongoose methods will work it out. "Asynchronous Programming", read on for the rest.
Also you should not be looping like this since you are not waiting for the "callback" ( it's not a closure it's a callback ) to complete or otherwise "throttle" the connection, which can lead to problems.
Use something like "async" instead to handle this. In this example the .eachLimit() in order to limit the number of parallel operations to a reasonabe level:
async.eachLimit(models,10,function(el,callback) {
Model.create(models[i], function (error, result) {
// do something maybe but call
callback(err); // to signal it is done
});
},function(err) {
// comes here on completion or error
});
Remember that "asychronous" code needs to execute on "callbacks" or "promises" in order to signal when things are done. Unline in "scynchronous" programming where each next line only executes when the previous lines of code are complete.
Your "for" loop otherwise just "sends everything" to the server and does not wait for completion before continuing execution.
But the main problem here was the connection. But you still should fix the rest as shown.

Related

Websocket - Waiting for a http request callback to execute before next pusher event

So I'm working with websockets to process data from website's API. For every new event I also send some http requests back to the website in order to obtain more data. Up untill now everything has worked fine, but now that I started using async requests to speed it up a bit things got a bit different. My code used to process one event and then move on to the next one (these events come in extremely quick - around 10 per second) but now it just seems to ignore the async (non blocking) part and move on to the next event and that way it just skips over half of the code. Note that the code works fine outside the Pusher. I'm using the 'pusher-client' module. My code looks like this:
var Request = require("request");
var requestSync = require('sync-request');
var Pusher = require('pusher-client');
var events_channel = pusher.subscribe('inventory_changes');
events_channel1.bind('listed', function(data)
{
var var2;
//Async request (to speed up the code)
function myFunction(callback){
request("url", function(error, response, body) {
if (!error && response.statusCode == 200)
{
result = JSON.stringify(JSON.parse(body));
return callback(null, result);
}
else
{
return callback(error, null);
}
});
}
myFunction(function(err, data){
if(!err)
{
var2 = data
return(data);
}
else
{
return(err);
}
});
//The part of the code below waits for the callback and the executes some code
var var1 = var2;
check();
function check()
{
if(var2 === var1)
{
setTimeout(check, 10);
return;
}
var1 = var2;
//A CHUNK OF CODE EXECUTES HERE (connected to the data from the callback)
}
});
In conclusion the code works, but not inside the pusher due to the pusher skipping the asynchronous request. How would I make the pusher wait for my async request to finish, before processing the next event (I have no idea)? If you happen to know, please let me know :)
You need to implement a queue to handle events one after another. I'm curious how it worked before, even without Pusher you'd have to implement some queue mechanism for it.
const eventsQueue = []
events_channel1.bind('listed', function(data) {
eventsQueue.push(data)
handleNewEvent()
})
let processingEvent = false
function handleNewEvent() {
if (processingEvent) return // do nothing if already processing an event
processingEvent = true
const eventData = eventsQueue.shift() // pick the first element from array
if (!eventData) return // all events are handled at the moment
... // handle event data here
processingEvent = false
handleNewEvent() // handle next event
}
Also, you should call clearTimeout method to clear your timeout when you don;t need it anymore.
And it's better to use promises or async/await instead of callbacks. Your code will be much easier to read and maintain.

Express.js - while loop before sending response

I'm trying to implement and existing solution in node.js, specifically, using express.js framework. Now, the existing solution works as follows:
server exposes a GET service that clients can connect to
when a client calls the GET service, the client number increments (a global variable) and then the number of clients is checked;
if there are not at least 3 clients connected, the service is in endless loop, waiting for other clients to connect
if (or rather, when) the rest of the two clients connect, the service sends respond to everyone that enough clients are connected (a 'true' value).
So what basically happens is, the client connects and the connection is active (in a loop) until enough clients connect, then and only then there is a response (to all clients at the same time).
Now I'm not expert in these architectures, but from what I think, this is not a correct or good solution. My initial thought was: this must be solved with sockets. However, since the existing solution works like that (it's not written in node.js), I tried to emulate such behaviour:
var number = (function(){
var count = 0;
return {
increase: function() {
count++;
},
get: function(){
return count;
}
};
})();
app.get('/test', function(req, res){
number.increase();
while (number.get() < 3) {
//hold it here, until enough clients connect
}
res.json(number.get());
});
Now while I think that this is not a correct solution, I have a couple of questions:
Is there any alternative to solving this issue, besides using sockets?
Why does this "logic" work in C#, but not in express.js? The code above hangs, no other request is processed.
I know node.js is single-threaded, but what if we have a more conventional service that responds immediately, and there are 20 requests all at the same time?
I would probably use an event emitter for this:
var EventEmitter = require('events').EventEmitter;
var emitter = new EventEmitter();
app.get('/', function(req, res) {
// Increase the number
number.increase();
// Get the current value
var current = number.get();
// If it's less than 3, wait for the event emitter to trigger.
if (current < 3) {
return emitter.once('got3', function() {
return res.json(number.get());
});
}
// If it's exactly 3, emit the event so we wake up other listeners.
if (current === 3) {
emitter.emit('got3');
}
// Fall through.
return res.json(current);
});
I would like to stress that #Plato is correct in stating that browsers may timeout when a response takes too much time to complete.
EDIT: as an aside, some explanation on the return emitter.once(...).
The code above can be rewritten like so:
if (current < 3) {
emitter.once('got3', function() {
res.json(number.get());
});
} else if (current === 3) {
emitter.emit('got3');
res.json(number.get());
} else {
res.json(number.get());
}
But instead of using those if/else statements, I return from the request handler after creating the event listener. Since request handlers are asynchronous, their return value is discarded, so you can return anything (or nothing). As an alternative, I could also have used this:
if (current < 3) {
emitter.once(...);
return;
}
if (current === 3) {
...etc...
Also, even though you return from the request handler function, the event listener is still referencing the res variable, so the request handler scope is maintained by Node until res.json() in the event listener callback is called.
Your http approach should work
You are blocking the event loop so node refuses to do any other work while it is in the while loop
You're really close, you just need to check every now and then instead of constantly. I do this below with process.nextTick() but setTimeout() would also work:
var number = (function(){
var count = 0;
return {
increase: function() {
count++;
},
get: function(){
return count;
}
};
})();
function waitFor3(callback){
var n = number.get();
if(n < 3){
setImmediate(function(){
waitFor3(callback)
})
} else {
callback(n)
}
}
function bump(){
number.increase();
console.log('waiting');
waitFor3(function(){
console.log('done');
})
}
setInterval(bump, 2000);
/*
app.get('/test', function(req, res){
number.increase();
waitFor3(function(){
res.json(number.get());
})
});
*/

What's going on with Meteor and Fibers/bindEnvironment()?

I am having difficulty using Fibers/Meteor.bindEnvironment(). I tried to have code updating and inserting to a collection if the collection starts empty. This is all supposed to be running server-side on startup.
function insertRecords() {
console.log("inserting...");
var client = Knox.createClient({
key: apikey,
secret: secret,
bucket: 'profile-testing'
});
console.log("created client");
client.list({ prefix: 'projects' }, function(err, data) {
if (err) {
console.log("Error in insertRecords");
}
for (var i = 0; i < data.Contents.length; i++) {
console.log(data.Contents[i].Key);
if (data.Contents[i].Key.split('/').pop() == "") {
Projects.insert({ name: data.Contents[i].Key, contents: [] });
} else if (data.Contents[i].Key.split('.').pop() == "jpg") {
Projects.update( { name: data.Contents[i].Key.substr(0,
data.Contents[i].Key.lastIndexOf('.')) },
{ $push: {contents: data.Contents[i].Key}} );
} else {
console.log(data.Contents[i].Key.split('.').pop());
}
}
});
}
if (Meteor.isServer) {
Meteor.startup(function () {
if (Projects.find().count() === 0) {
boundInsert = Meteor.bindEnvironment(insertRecords, function(err) {
if (err) {
console.log("error binding?");
console.log(err);
}
});
boundInsert();
}
});
}
My first time writing this, I got errors that I needed to wrap my callbacks in a Fiber() block, then on discussion on IRC someone recommending trying Meteor.bindEnvironment() instead, since that should be putting it in a Fiber. That didn't work (the only output I saw was inserting..., meaning that bindEnvironment() didn't throw an error, but it also doesn't run any of the code inside of the block). Then I got to this. My error now is: Error: Meteor code must always run within a Fiber. Try wrapping callbacks that you pass to non-Meteor libraries with Meteor.bindEnvironment.
I am new to Node and don't completely understand the concept of Fibers. My understanding is that they're analogous to threads in C/C++/every language with threading, but I don't understand what the implications extending to my server-side code are/why my code is throwing an error when trying to insert to a collection. Can anyone explain this to me?
Thank you.
You're using bindEnvironment slightly incorrectly. Because where its being used is already in a fiber and the callback that comes off the Knox client isn't in a fiber anymore.
There are two use cases of bindEnvironment (that i can think of, there could be more!):
You have a global variable that has to be altered but you don't want it to affect other user's sessions
You are managing a callback using a third party api/npm module (which looks to be the case)
Meteor.bindEnvironment creates a new Fiber and copies the current Fiber's variables and environment to the new Fiber. The point you need this is when you use your nom module's method callback.
Luckily there is an alternative that takes care of the callback waiting for you and binds the callback in a fiber called Meteor.wrapAsync.
So you could do this:
Your startup function already has a fiber and no callback so you don't need bindEnvironment here.
Meteor.startup(function () {
if (Projects.find().count() === 0) {
insertRecords();
}
});
And your insert records function (using wrapAsync) so you don't need a callback
function insertRecords() {
console.log("inserting...");
var client = Knox.createClient({
key: apikey,
secret: secret,
bucket: 'profile-testing'
});
client.listSync = Meteor.wrapAsync(client.list.bind(client));
console.log("created client");
try {
var data = client.listSync({ prefix: 'projects' });
}
catch(e) {
console.log(e);
}
if(!data) return;
for (var i = 1; i < data.Contents.length; i++) {
console.log(data.Contents[i].Key);
if (data.Contents[i].Key.split('/').pop() == "") {
Projects.insert({ name: data.Contents[i].Key, contents: [] });
} else if (data.Contents[i].Key.split('.').pop() == "jpg") {
Projects.update( { name: data.Contents[i].Key.substr(0,
data.Contents[i].Key.lastIndexOf('.')) },
{ $push: {contents: data.Contents[i].Key}} );
} else {
console.log(data.Contents[i].Key.split('.').pop());
}
}
});
A couple of things to keep in mind. Fibers aren't like threads. There is only a single thread in NodeJS.
Fibers are more like events that can run at the same time but without blocking each other if there is a waiting type scenario (e.g downloading a file from the internet).
So you can have synchronous code and not block the other user's events. They take turns to run but still run in a single thread. So this is how Meteor has synchronous code on the server side, that can wait for stuff, yet other user's won't be blocked by this and can do stuff because their code runs in a different fiber.
Chris Mather has a couple of good articles on this on http://eventedmind.com
What does Meteor.wrapAsync do?
Meteor.wrapAsync takes in the method you give it as the first parameter and runs it in the current fiber.
It also attaches a callback to it (it assumes the method takes a last param that has a callback where the first param is an error and the second the result such as function(err,result).
The callback is bound with Meteor.bindEnvironment and blocks the current Fiber until the callback is fired. As soon as the callback fires it returns the result or throws the err.
So it's very handy for converting asynchronous code into synchronous code since you can use the result of the method on the next line instead of using a callback and nesting deeper functions. It also takes care of the bindEnvironment for you so you don't have to worry about losing your fiber's scope.
Update Meteor._wrapAsync is now Meteor.wrapAsync and documented.

Asynchronous Database Queries with PostgreSQL in Node not working

Using Node.js and the node-postgres module to communicate with a database, I'm attempting to write a function that accepts an array of queries and callbacks and executes them all asynchronously using the same database connection. The function accepts a two-dimensional array and calling it looks like this:
perform_queries_async([
['SELECT COUNT(id) as count FROM ideas', function(result) {
console.log("FUNCTION 1");
}],
["INSERT INTO ideas (name) VALUES ('test')", function(result) {
console.log("FUNCTION 2");
}]
]);
And the function iterates over the array, creating a query for each sub-array, like so:
function perform_queries_async(queries) {
var client = new pg.Client(process.env.DATABASE_URL);
for(var i=0; i<queries.length; i++) {
var q = queries[i];
client.query(q[0], function(err, result) {
if(err) {
console.log(err);
} else {
q[1](result);
}
});
}
client.on('drain', function() {
console.log("drained");
client.end();
});
client.connect();
}
When I ran the above code, I expected to see output like this:
FUNCTION 1
FUNCTION 2
drained
However, the output bizarrely appears like so:
FUNCTION 2
drained
FUNCTION 2
Not only is the second function getting called for both requests, it also seems as though the drain code is getting called before the client's queue of queries is finished running...yet the second query still runs perfectly fine even though the client.end() code ostensibly killed the client once the event is called.
I've been tearing my hair out about this for hours. I tried hardcoding in my sample array (thus removing the for loop), and my code worked as expected, which leads me to believe that there is some problem with my loop that I'm not seeing.
Any ideas on why this might be happening would be greatly appreciated.
The simplest way to properly capture the value of the q variable in a closure in modern JavaScript is to use forEach:
queries.forEach(function(q) {
client.query(q[0], function(err, result) {
if(err) {
console.log(err);
} else {
q[1](result);
}
});
});
If you don't capture the value, your code reflects the last value that q had, as the callback function executed later, in the context of the containing function.
forEach, by using a callback function isolates and captures the value of q so it can be properly evaluated by the inner callback.
A victim of the famous Javascript closure/loop gotcha. See my (and other) answers here:
I am trying to open 10 websocket connections with nodejs, but somehow my loop doesnt work
Basically, at the time your callback is executed, q is set to the last element of the input array. The way around it is to dynamically generate the closure.
It will be good to execute this using async module . It will help you to reuse the code also . and will make the code more readable . I just love the auto function provided by async module
Ref: https://github.com/caolan/async

How to wait for all async calls to finish

I'm using Mongoose with Node.js and have the following code that will call the callback after all the save() calls has finished. However, I feel that this is a very dirty way of doing it and would like to see the proper way to get this done.
function setup(callback) {
// Clear the DB and load fixtures
Account.remove({}, addFixtureData);
function addFixtureData() {
// Load the fixtures
fs.readFile('./fixtures/account.json', 'utf8', function(err, data) {
if (err) { throw err; }
var jsonData = JSON.parse(data);
var count = 0;
jsonData.forEach(function(json) {
count++;
var account = new Account(json);
account.save(function(err) {
if (err) { throw err; }
if (--count == 0 && callback) callback();
});
});
});
}
}
You can clean up the code a bit by using a library like async or Step.
Also, I've written a small module that handles loading fixtures for you, so you just do:
var fixtures = require('./mongoose-fixtures');
fixtures.load('./fixtures/account.json', function(err) {
//Fixtures loaded, you're ready to go
};
Github:
https://github.com/powmedia/mongoose-fixtures
It will also load a directory of fixture files, or objects.
I did a talk about common asyncronous patterns (serial and parallel) and ways to solve them:
https://github.com/masylum/i-love-async
I hope its useful.
I've recently created simpler abstraction called wait.for to call async functions in sync mode (based on Fibers). It's at an early stage but works. It is at:
https://github.com/luciotato/waitfor
Using wait.for, you can call any standard nodejs async function, as if it were a sync function, without blocking node's event loop. You can code sequentially when you need it.
using wait.for your code will be:
//in a fiber
function setup(callback) {
// Clear the DB and load fixtures
wait.for(Account.remove,{});
// Load the fixtures
var data = wait.for(fs.readFile,'./fixtures/account.json', 'utf8');
var jsonData = JSON.parse(data);
jsonData.forEach(function(json) {
var account = new Account(json);
wait.forMethod(account,'save');
}
callback();
}
That's actually the proper way of doing it, more or less. What you're doing there is a parallel loop. You can abstract it into it's own "async parallel foreach" function if you want (and many do), but that's really the only way of doing a parallel loop.
Depending on what you intended, one thing that could be done differently is the error handling. Because you're throwing, if there's a single error, that callback will never get executed (count won't be decremented). So it might be better to do:
account.save(function(err) {
if (err) return callback(err);
if (!--count) callback();
});
And handle the error in the callback. It's better node-convention-wise.
I would also change another thing to save you the trouble of incrementing count on every iteration:
var jsonData = JSON.parse(data)
, count = jsonData.length;
jsonData.forEach(function(json) {
var account = new Account(json);
account.save(function(err) {
if (err) return callback(err);
if (!--count) callback();
});
});
If you are already using underscore.js anywhere in your project, you can leverage the after method. You need to know how many async calls will be out there in advance, but aside from that it's a pretty elegant solution.

Resources