I am just playing around with Node and Mongoose and I am curious about the following issue:
I am trying to save documents to mongo from within a loop / interval.
The following works just fine:
setInterval(function(){
var doc = new myModel({ name: 'test' });
doc.save(function (err, doc){
if (err) return console.error(err);
doc.speak();
});
}, 1);
The following does not work:
while(true){
var doc = new myModel({ name: 'test' });
doc.save(function (err, doc){
if (err) return console.error(err);
doc.speak();
});
}
What is the explanation for this behavior? The save callback is never firing in scenario 2
Additionally, can someone comment on best practices for building "long running workers"? I am interested in using node to build background workers to process queues of data. Is a while() a bad idea? setInterval()? Additionally, I plan to use the forever module to keep the process alive
Thanks!
Node.js is single threaded so while(true) will fully occupy the single thread, never giving the doc.save callback a chance to run.
The second part of your question is too broad though, and you should really only ask one question at a time anyway.
Related
I have a NodeJS app that is supposed to generate a lot of data sets in a synchronous manner (multiple nested for-loops). Those data sets are supposed to be saved to my MongoDB database to look them up more effectively later on.
I use the mongodb - driver for NodeJS and have a daemon running. The connection to the DB is working fine and according to the daemon window the first group of datasets is being successfully stored. Every ~400-600ms there is another group to store but after the first dataset there is no output in the MongoDB console anymore (not even an error), and as the file sizes doesn't increase i assume those write operations don't work (i cant wait for it to finish as it'd take multiple days to fully run).
If i restart the NodeJS script it wont even save the first key anymore, possibly because of duplicates? If i delete the db folder content the first one will be saved again.
This is the essential part of my script and i wasn't able to find anything that i did wrong. I assume the problem is more in the inner logic (weird duplicate checks/not running concurrent etc).
var MongoClient = require('mongodb').MongoClient, dbBuffer = [];
MongoClient.connect('mongodb://127.0.0.1/loremipsum', function(err, db) {
if(err) return console.log("Cant connect to MongoDB");
var collection = db.collection('ipsum');
console.log("Connected to DB");
for(var q=startI;q<endI;q++) {
for(var w=0;w<words.length;w++) {
dbBuffer.push({a:a, b:b});
}
if(dbBuffer.length) {
console.log("saving "+dbBuffer.length+" items");
collection.insert(dbBuffer, {w:1}, function(err, result) {
if(err) {
console.log("Error on db write", err);
db.close();
process.exit();
}
});
}
dbBuffer = [];
}
db.close();
});
Update
db.close is never called and the connection doesn't drop
Changing to bulk insert doesn't change anything
The callback for the insert is never called - this could be the problem! The MongoDB console does tell me that the insert process was successful but it looks like the communication between driver and MongoDB isn't working properly for insertion.
I "solved" it myself. One misconception that i had was that every insert transaction is confirmed in the MongoDB console while it actually only confirms the first one or if there is some time between the commands. To check if the insert process really works one needs to run the script for some time and wait for MongoDB to dump it in the local file (approx. 30-60s).
In addition, the insert processes were too quick after each other and MongoDB appears to not handle this correctly under Win10 x64. I changed from the Array-Buffer to the internal buffer (see comments) and only continued with the process after the previous data was inserted.
This is the simplified resulting code
db.collection('seedlist', function(err, collection) {
syncLoop(0,0, collection);
//...
});
function syncLoop(q, w, collection) {
batch = collection.initializeUnorderedBulkOp({useLegacyOps: true});
for(var e=0;e<words.length;e++) {
batch.insert({a:a, b:b});
}
batch.execute(function(err, result) {
if(err) throw err;
//...
return setTimeout(function() {
syncLoop(qNew,wNew,collection);
}, 0); // Timer to prevent Memory leak
});
}
I have a method that reads and write a log file, this method is called on every request by all users, then write the log the request path in a file. The questions are two:
Is safe read and then write a file in async mode considering concurrency questions?
If yes for the first question, the code bellow will work considering concurrency questions?
If yes for the first question how I have to do?
Please, disregard exceptions and performance questions, this is a didactic code.
var logFile = '/tmp/logs/log.js';
app.get("/", function(req){
var log = {path: req.path, date: new Date().getTime()};
log(log);
});
function log(data){
fs.exists(logFile, function(exists){
if(exists){
fs.readFile(logFile, function (err, data) {
if (err){
throw err
}
var logData = JSON.parse(data.toString());
logData.push(data);
writeLog(logData);
});
}else{
writeLog([data]);
}
});
}
function writeLog(base){
fs.writeFile(logFile, JSON.stringify(base, null, '\t'), function(err) {
if(err)
throw err;
});
}
I strongly suggest that you don't just "log asynchronously" because you want the log to be ordered based on the order things happened in your app, and there is no guarantee this will happen that way if you don't synchronize it somehow.
You can, for instance, use a promise chain to synchronize it:
var _queue = Promise.resolve();
function log(message){
_queue = _queue.then(function(){ // chain to the queue
return new Promise(function(resolve){
fs.appendFile("/tmp/logs/log.txt", new Date() + message + "\n", function(err, data){
if(err) console.log(err); // don't die on log exceptions
else resolve(); // signal to the queue it can progress
});
});
});
}
You can now call log and it will queue messages and write them some time asynchronously for you. It will never take more than a single file descriptor or exhaust the server either.
Consider using a logging solution instead of rolling your own logger btw.
In you're example you're already using the Asynchronous versions of those functions. If you're concerned about the order of your operations then you should use the synchronous versions of those functions.
readFileSync
writeFileSync
Also to note, JSON.parse() is a synchronous operation.You can make this "asynchronous" using the async module and doing a async.asyncify(JSON.parse(data.toString()));.
As noted by #BenjaminGruenbaum, async.asyncify(); doesn't actually make the operation of JSON.parse(); truly asynchronous but it does provide a more "async" style for the control flow of the operations.
As the code is quite large to posted in here, I append my github repo https://github.com/DiegoGallegos4/Mongo
I am trying to use de NodeJS driver to update some records fulfilling a criteria but first I have to find some records fulfilling another criteria. On the update part, the records found and filter from the find operation are used. This is,
file: weather1.js
MongoClient.connect(some url, function(err,db){
db.collection(collection_name).find({},{},sort criteria).toArray(){
.... find the data and append to an array
.... this data inside a for loop
db.collection(collection_name).update(data[i], {$set...}, callback)
}
})
That´s the structure used to solve the problem, relating when to close the connection , it is when the length of the data array equals the number of callbacks on the update operation. For more details you can refer to the repo.
file: weather.js
On the other approach, Instead of toArray is used .each to iterate on the cursor.
I've looked up for a solution to this for a week now on several forums.
I've read about pooling connections but I want to know what is my conceptual error on my code. I would appreciate a deep insight on this topic.
The way you pose your question is very misleading. All you want to know is "When is the processing complete so I can close?".
The answer to that is you need to respect the callbacks generally only move through the cursor of results once each update is complete.
The simple way without other dependencies is to use the stream interface suported by the driver:
var MongoClient = require('mongodb').MongoClient;
MongoClient.connect('mongodb://localhost:27017/data',function(err,db){
if(err) throw err;
coll = db.collection('weather');
console.log('connection established')
var stream = coll.find().sort([['State',1],['Temperature',-1]])
stream.on('err',function(err) {
throw err;
});
stream.on('end',function() {
db.close();
});
var month_highs = [];
var state = '';
var length = 0;
stream.on('data',function(doc) {
stream.pause(); // pause processing documents
if (err) throw err;
if (doc) {
length = month_highs.length
if(state != doc['State']){
month_highs.push(doc['State']);
//console.log(doc);
}
state = doc['State']
if(month_highs.length > length){
coll.update(doc, {$set : {'month_high':true} }, function(err, updated){
if (err) throw err;
console.log(updated)
stream.resume(); // resume processing documents
});
} else {
stream.resume();
}
} else {
stream.resume();
}
});
});
That's just a copy of the code from your repo, refactored to use a stream. So all the important parts are where the word "stream" appears, and most importantly where they are being called.
In a nutshell the "data" event is emitted by each document from the cursor results. First you call .pause() so new documents do not overrun the processing. Then you do your .update() and within it's callback on return you call .resume(), and the flow continues with the next document.
Eventually "end" is emitted when the cursor is depleted, and that is where you call db.close().
That is basic flow control. For other approaches, look at the node async library as a good helper. But do not loop arrays with no async control, and do not use .each() which is DEPRECATED.
You need to signal when the .update() callback is complete to follow a new "loop iteration" at any rate. This is the basic no additional dependancy approach.
P.S I am a bit suspect about the general logic of your code, especially testing if the length of something is greater when you read it without possibly changing that length. But this is all about how to implement "flow control", and not to fix the logic in your code.
Using Node.js and the node-postgres module to communicate with a database, I'm attempting to write a function that accepts an array of queries and callbacks and executes them all asynchronously using the same database connection. The function accepts a two-dimensional array and calling it looks like this:
perform_queries_async([
['SELECT COUNT(id) as count FROM ideas', function(result) {
console.log("FUNCTION 1");
}],
["INSERT INTO ideas (name) VALUES ('test')", function(result) {
console.log("FUNCTION 2");
}]
]);
And the function iterates over the array, creating a query for each sub-array, like so:
function perform_queries_async(queries) {
var client = new pg.Client(process.env.DATABASE_URL);
for(var i=0; i<queries.length; i++) {
var q = queries[i];
client.query(q[0], function(err, result) {
if(err) {
console.log(err);
} else {
q[1](result);
}
});
}
client.on('drain', function() {
console.log("drained");
client.end();
});
client.connect();
}
When I ran the above code, I expected to see output like this:
FUNCTION 1
FUNCTION 2
drained
However, the output bizarrely appears like so:
FUNCTION 2
drained
FUNCTION 2
Not only is the second function getting called for both requests, it also seems as though the drain code is getting called before the client's queue of queries is finished running...yet the second query still runs perfectly fine even though the client.end() code ostensibly killed the client once the event is called.
I've been tearing my hair out about this for hours. I tried hardcoding in my sample array (thus removing the for loop), and my code worked as expected, which leads me to believe that there is some problem with my loop that I'm not seeing.
Any ideas on why this might be happening would be greatly appreciated.
The simplest way to properly capture the value of the q variable in a closure in modern JavaScript is to use forEach:
queries.forEach(function(q) {
client.query(q[0], function(err, result) {
if(err) {
console.log(err);
} else {
q[1](result);
}
});
});
If you don't capture the value, your code reflects the last value that q had, as the callback function executed later, in the context of the containing function.
forEach, by using a callback function isolates and captures the value of q so it can be properly evaluated by the inner callback.
A victim of the famous Javascript closure/loop gotcha. See my (and other) answers here:
I am trying to open 10 websocket connections with nodejs, but somehow my loop doesnt work
Basically, at the time your callback is executed, q is set to the last element of the input array. The way around it is to dynamically generate the closure.
It will be good to execute this using async module . It will help you to reuse the code also . and will make the code more readable . I just love the auto function provided by async module
Ref: https://github.com/caolan/async
this is my first post on here.
I am learning Node and Mongodb. I have installed the node-mongodb-native driver and found some unexpected things.
My script is below, based on the official tutorial.
var test; //short cut for later use
require('mongodb').MongoClient.connect("mongodb://localhost:27017/exampleDb", function(err, db) {
if (err) {return console.log("some error",err);}
console.log("database connected");
test = db.collection('test');
test.find().toArray(function(err, items) {
if (err) {console.log("some error");}
else {console.log("still connected");}
});
});
var rep = function () {
setTimeout(function () {
test.find().toArray(function(err, items) {
if (err) {console.log("some error: " + err);}
else {console.log("still connected");}
});
rep();
}, 2000);
}
rep();
So every after 2 seconds, the console.log outputs "still connected", this is expected.
If the mongod.exe window is shutdown (to simulate a loss of connection to the database), I expect to see "some error" in the console.log. However, no error message is logged.
When the mongod.exe is restarted (to simulate a reconnection), the console.log outputs many streams of "still connected"
I have not been able to find the answer from the manual or other online sources. So 2 questions from me:
1) What is the current best practice to detect sudden mongodb disconnection apart from on.('close') and should there be an error being emitted using the find() query when such disconnection happens?
2) Upon reconnection, the console.log outputs many lines of the same messages as if they have been queued during the disconnection and now being logged at once. Is this normal? I can think of a real life issue: instead of find(), an admin maybe using insert() to add some data. The database connection is disrupted but there is no error message, so the admin maybe doing many insert() and still see no result. Then upon reconnection, a swarm of insert() queries inundates the database?
Please point at my ignorance...