Node, mongo and loop, how to break loop when I find data - node.js

My code looks similar to that:
var mongo_client = require('mongodb').MongoClient, dataStorage;
lib = {
[...]
find: function(res, param, callback) {
var parentPath = param.path;
while (parentPath !== '/') {
collection.findOne({'paths' : parentPath}, {...}, function(err, data)) {
if (data) {
dataStorage = data;
callback(data, res);
}
}
if (dataStorage) {
return;
}
parentPath = lib.removeLastBlockOfPath(parentPath);
}
if (!dataStorage) {
callback(someDefaultData, res);
}
}
[...]
}
What I want to do is to find some path stored in mongo, or if there is no match, try do find first matching parent path.
I can't set dataStorage value from findOne callback is it any way to do that? Eaven if I find path it always run thru all path blocks.

Node is asynchronous, so your code must be written accordingly. An option is to use the async module, that has lots of tools to manage asynchronous flows.
For example, you could use the whilst function to manage your while loop:
find: function(res, param, callback) {
var parentPath = param.path,
dataStorage = null;
async.whilst(
function () { return parentPath !== '/'; },
function (done) {
collection.findOne({'paths' : parentPath}, {...}, function(err, data) {
if (data) {
dataStorage = data;
return callback(data, res);
}
parentPath = lib.removeLastBlockOfPath(parentPath);
done();
});
},
function (error) {
if (!dataStorage) return callback(someDefaultData, res);
}
);
}
Don't forget to install and require the async module:
var async = require('async');

Your code is written as if it is "traditional synchronous" -- which its not. You cannot check for dataStorage validity till results from findOne() come back -- so your checks need to be moved all the way into the inner "if (data)" statement. This is not a mongodb issue, this is purely how nodejs works and the fact that everything is asynchronous and works on callbacks.

Related

page renders before getting all the values sorted

I think the rendering takes place before the searching of the string on the files, i have tried different methods but don't seems to get this working. any help will be appreciated. im a noob on to the nodejs. im trying to get the id of the user and query and get all the data and there after see if he is in any of the lists given and finally render the page.
const j = [];
let name = '';
const filename = [];
var ext = '';
module.exports = function(app, express) {
app.use(bodyParser.urlencoded({ extended: false }));
app.use(bodyParser.json());
app.post('/cusdetails', isLoggedIn, function (req, res) {
var cusid=req.body.cusid;
var insertQuerys = "SELECT * FROM customer WHERE cusid=? ORDER BY rowid DESC LIMIT 1";
connection.query(insertQuerys,[cusid],
function(err, rows){
rows.forEach( (row) => {
name=row.fncus;
});
fs.readdir('./views/iplist', function(err, files) {
if (err)
throw err;
for (var index in files) {
j.push(files[index])
}
j.forEach(function(value) {
var k = require('path').resolve(__dirname, '../views/iplist/',value);
fs.exists(k, function(fileok){
if(fileok) {
fs.readFile(k, function(err, content) {
if (err) throw err;
if (content.indexOf(name) > -1) {
ext = path.extname(k);
filename.push(path.basename(k, ext));
}
});
}
else {
console.log(" FileNotExist ");
}
});
});
});
console.log(filename);
res.render('cusdetails.ejs', {rows: rows, user:req.user , aml: filename });
});
})
You can create simple Promise wrapper and then use it inside async/await function to pause execution until resolved.
// use mysql2 package as it provides promise, less work to write promise wrappers
const mysql = require('mysql2/promise');
// create the connection to database
const connection = mysql.createConnection({
host: 'localhost',
user: 'root',
database: 'test'
});
// sample wrapper
function some(k) {
// more advisable to have local variables, why do you need this to be array?
var filename = [];
return new Promise((resolve, reject) => {
// doing this is also not recommended check nodejs documentation **fs.exists** for more info
fs.exists(k, function(fileok){
if(fileok) {
fs.readFile(k, function(err, content) {
if (err) reject(err);
if (content.indexOf(name) > -1) {
ext = path.extname(k);
filename.push(path.basename(k, ext));
resolve(filename)
}
});
}
else {
// reject(new Error("FileNotExist"))
console.log(" FileNotExist ");
}
});
})
}
// note the use of async
app.post('/cusdetails', isLoggedIn, async function (req, res) {
var cusid=req.body.cusid;
var insertQuerys = "SELECT * FROM customer WHERE cusid=? ORDER BY rowid DESC LIMIT 1";
// using await to pause excution, waits till query is finished
const [rows] = await connection.query(insertQuerys,[cusid])
rows.forEach( (row) => {
name=row.fncus;
});
// then you can
var result = await some(k)
...
Note however this way you loose the advantage of concurrent execution, as it's kindoff blocking. If the result of one call is not used in another, you can execute in parallel and await for result to achieve sequencing like
const [rows] = connection.query(insertQuerys,[cusid])
var result = some(k)
console.log(await rows) // do something
console.log(await result) // do something
JavaScript is asynchronous. This means that if you have a function with a callback (i.e. your query), the callback will be called asynchronously, at an unknown time, while the other code executes.
You need to look up some tutorials how to deal with callbacks, to get a proper understanding of it. Another method is using async/await and/or promises.
Basically, if you take the following code:
console.log("this will print first");
setTimeout(function () {
console.log("this will print last");
}, 1000);
console.log("this will print second");
If you run the code above, the top level is executed synchronously, so, it first calls console.log, then it executes setTimeout, which is synchronous. It sets a timeout, then says "I'm ready", and the code continues to the other console.log. After 1 second (1000 milliseconds), the callback in the setTimeout function is executed, and only then that console.log is called. You can not make the rest of the code wait this way, you need to restructure your code or read into promises.

Foreach loop with mongoDB call / node.js

result.forEach(element => {
//Get each element
console.log("LOOP");
dbo.collection("users").findOne({email: emailGiven, "friends.email": element.email},function(errT, resultT) {
if (errT){
console.log("Query Error Inside!");
res.status(errT.status); // or use err.statusCode instead
console.log(errT);
//db.close();
//return res.send(errT.message);
}
else {
if (resultT) {
var oneUser = {
email: element.email,
username: element.username,
fullName: element.fullName,
status: resultT
};
//console.log(resultT);
foundUsers.push(oneUser);
} else {
//Not found means not added or pending
var oneUser = {
email: element.email,
username: element.username,
fullName: element.fullName,
status: 0
};
foundUsers.push(oneUser);
//console.log(emailGiven + " " + element.email)
console.log(oneUser);
}
}
});
});
i have an object array for each elemant i would like to do mongoDB call for each element and depending on the results i wanna push the results in an array as im doing, the problem is that mongoDb is async so my main thread finished before i can push results to the array foundUsers, how may i fix this issue?
As you said, need to do handle an asynchronous operation into a synchronous loop. For doing this, you can use async library. It is so useful in such operatinos.
Just install async module in your project first
npm install --save async
Afterwards, you can do sth like this:
// for use with Node-style callbacks...
var async = require("async");
var obj = {dev: "/dev.json", test: "/test.json", prod: "/prod.json"};
var configs = {};
async.forEachOf(obj, (value, key, callback) => {
fs.readFile(__dirname + value, "utf8", (err, data) => {
if (err) return callback(err);
try {
configs[key] = JSON.parse(data);
} catch (e) {
return callback(e);
}
callback();
});
}, err => {
if (err) console.error(err.message);
// configs is now a map of JSON data
doSomethingWith(configs);
});
For working with this library, it uses async.forEachOf function instead of simple forEach loop. Three parameters is sent to this function.
The 1st parameter that is passed to async.forEachOf is an array to iterate over it (obj).
The 2nd parameter is a callback function that apply over each item in obj.
The 3rd or the last parameter that is passed to async.forEachOf function, is another callback function too. It is called when iteration process over every item in obj has finished.

Trying to make my own RxJs observable

I'm trying to convert an existing API to work with RxJS... fairly new to node, and very new to RxJs, so please bear with me.
I have an existing API (getNextMessage), that either blocks (asynchronously), or returns a new item or error via a node-style (err, val) callback, when the something becomes available.
so it looks something like:
getNextMessage(nodeStyleCompletionCallback);
You could think of getNextMessage like an http request, that completes in the future, when the server responds, but you do need to call getNextMessage again, once a message is received, to keep getting new items from the server.
So, in order to make it into an observable collection, I have to get RxJs to keep calling my getNextMessage function until the subscriber is disposed();
Basically, I'm trying to create my own RxJs observable collection.
The problems are:
I don't know how to make subscriber.dispose() kill the async.forever
I probably shouldn't be using async.forever in the first place
I'm not sure I should be even getting 'completed' for each message - shouldn't that be at the end of a sequence
I'd like to eventually remove the need for using fromNodeCallback, to have a first class RxJS observable
Clearly I'm a little confused.
Would love a bit of help, thanks!
Here is my existing code:
var Rx = require('rx');
var port = require('../lib/port');
var async = require('async');
function observableReceive(portName)
{
var observerCallback;
var listenPort = new port(portName);
var disposed = false;
var asyncReceive = function(asyncCallback)
{
listenPort.getNextMessage(
function(error, json)
{
observerCallback(error, json);
if (!disposed)
setImmediate(asyncCallback);
}
);
}
return function(outerCallback)
{
observerCallback = outerCallback;
async.forever(asyncReceive);
}
}
var receive = Rx.Observable.fromNodeCallback(observableReceive('rxtest'));
var source = receive();
var subscription = source.forEach(
function (json)
{
console.log('receive completed: ' + JSON.stringify(json));
},
function (error) {
console.log("receive failed: " + error.toString());
},
function () {
console.log('Completed');
subscription.dispose();
}
);
So here's probably what I would do.
var Rx = require('Rx');
// This is just for kicks. You have your own getNextMessage to use. ;)
var getNextMessage = (function(){
var i = 1;
return function (callback) {
setTimeout(function () {
if (i > 10) {
callback("lawdy lawd it's ova' ten, ya'll.");
} else {
callback(undefined, i++);
}
}, 5);
};
}());
// This just makes an observable version of getNextMessage.
var nextMessageAsObservable = Rx.Observable.create(function (o) {
getNextMessage(function (err, val) {
if (err) {
o.onError(err);
} else {
o.onNext(val);
o.onCompleted();
}
});
});
// This repeats the call to getNextMessage as many times (11) as you want.
// "take" will cancel the subscription after receiving 11 items.
nextMessageAsObservable
.repeat()
.take(11)
.subscribe(
function (x) { console.log('next', x); },
function (err) { console.log('error', err); },
function () { console.log('done'); }
);
I realize this is over a year old, but I think a better solution for this would be to make use of recursive scheduling instead:
Rx.Observable.forever = function(next, scheduler) {
scheduler = scheduler || Rx.Scheduler.default,
//Internally wrap the the callback into an observable
next = Rx.Observable.fromNodeCallback(next);
return Rx.Observable.create(function(observer) {
var disposable = new Rx.SingleAssignmentDisposable(),
hasState = false;
disposable.setDisposable(scheduler.scheduleRecursiveWithState(null,
function(state, self) {
hasState && observer.onNext(state);
hasState = false;
next().subscribe(function(x){
hasState = true;
self(x);
}, observer.onError.bind(observer));
}));
return disposable;
});
};
The idea here is that you can schedule new items once the previous one has completed. You call next() which invokes the passed in method and when it returns a value, you schedule the next item for invocation.
You can then use it like so:
Rx.Observable.forever(getNextMessage)
.take(11)
.subscribe(function(message) {
console.log(message);
});
See a working example here

Iterating over a mongodb cursor serially (waiting for callbacks before moving to next document)

Using mongoskin, I can do a query like this, which will return a cursor:
myCollection.find({}, function(err, resultCursor) {
resultCursor.each(function(err, result) {
}
}
However, I'd like to call some async functions for each document, and only move on to the next item on the cursor after this has called back (similar to the eachSeries structure in the async.js module). E.g:
myCollection.find({}, function(err, resultCursor) {
resultCursor.each(function(err, result) {
externalAsyncFunction(result, function(err) {
//externalAsyncFunction completed - now want to move to next doc
});
}
}
How could I do this?
Thanks
UPDATE:
I don't wan't to use toArray() as this is a large batch operation, and the results might not fit in memory in one go.
A more modern approach that uses async/await:
const cursor = db.collection("foo").find({});
while(await cursor.hasNext()) {
const doc = await cursor.next();
// process doc here
}
Notes:
This may be even more simple to do when async iterators arrive.
You'll probably want to add try/catch for error checking.
The containing function should be async or the code should be wrapped in (async function() { ... })() since it uses await.
If you want, add await new Promise(resolve => setTimeout(resolve, 1000)); (pause for 1 second) at the end of the while loop to show that it does process docs one after the other.
If you don't want to load all of the results into memory using toArray, you can iterate using the cursor with something like the following.
myCollection.find({}, function(err, resultCursor) {
function processItem(err, item) {
if(item === null) {
return; // All done!
}
externalAsyncFunction(item, function(err) {
resultCursor.nextObject(processItem);
});
}
resultCursor.nextObject(processItem);
}
since node.js v10.3 you can use async iterator
const cursor = db.collection('foo').find({});
for await (const doc of cursor) {
// do your thing
// you can even use `await myAsyncOperation()` here
}
Jake Archibald wrote a great blog post about async iterators, that I came to know after reading #user993683's answer.
This works with large dataset by using setImmediate:
var cursor = collection.find({filter...}).cursor();
cursor.nextObject(function fn(err, item) {
if (err || !item) return;
setImmediate(fnAction, item, arg1, arg2, function() {
cursor.nextObject(fn);
});
});
function fnAction(item, arg1, arg2, callback) {
// Here you can do whatever you want to do with your item.
return callback();
}
If someone is looking for a Promise way of doing this (as opposed to using callbacks of nextObject), here it is. I am using Node v4.2.2 and mongo driver v2.1.7. This is kind of an asyncSeries version of Cursor.forEach():
function forEachSeries(cursor, iterator) {
return new Promise(function(resolve, reject) {
var count = 0;
function processDoc(doc) {
if (doc != null) {
count++;
return iterator(doc).then(function() {
return cursor.next().then(processDoc);
});
} else {
resolve(count);
}
}
cursor.next().then(processDoc);
});
}
To use this, pass the cursor and an iterator that operates on each document asynchronously (like you would for Cursor.forEach). The iterator needs to return a promise, like most mongodb native driver functions do.
Say, you want to update all documents in the collection test. This is how you would do it:
var theDb;
MongoClient.connect(dbUrl).then(function(db) {
theDb = db; // save it, we'll need to close the connection when done.
var cur = db.collection('test').find();
return forEachSeries(cur, function(doc) { // this is the iterator
return db.collection('test').updateOne(
{_id: doc._id},
{$set: {updated: true}} // or whatever else you need to change
);
// updateOne returns a promise, if not supplied a callback. Just return it.
});
})
.then(function(count) {
console.log("All Done. Processed", count, "records");
theDb.close();
})
You can do something like this using the async lib. The key point here is to check if the current doc is null. If it is, it means you are finished.
async.series([
function (cb) {
cursor.each(function (err, doc) {
if (err) {
cb(err);
} else if (doc === null) {
cb();
} else {
console.log(doc);
array.push(doc);
}
});
}
], function (err) {
callback(err, array);
});
You could use a Future:
myCollection.find({}, function(err, resultCursor) {
resultCursor.count(Meteor.bindEnvironment(function(err,count){
for(var i=0;i<count;i++)
{
var itemFuture=new Future();
resultCursor.nextObject(function(err,item)){
itemFuture.result(item);
}
var item=itemFuture.wait();
//do what you want with the item,
//and continue with the loop if so
}
}));
});
You can get the result in an Array and iterate using a recursive function, something like this.
myCollection.find({}).toArray(function (err, items) {
var count = items.length;
var fn = function () {
externalAsyncFuntion(items[count], function () {
count -= 1;
if (count) fn();
})
}
fn();
});
Edit:
This is only applicable for small datasets, for larger one's you should use cursors as mentioned in other answers.
A more modern approach that uses for await:
const cursor = db.collection("foo").find({});
for await(const doc of cursor) {
// process doc here with await
await processDoc(doc);
}
You could use simple setTimeOut's. This is an example in typescript running on nodejs (I am using promises via the 'when' module but it can be done without them as well):
import mongodb = require("mongodb");
var dbServer = new mongodb.Server('localhost', 27017, {auto_reconnect: true}, {});
var db = new mongodb.Db('myDb', dbServer);
var util = require('util');
var when = require('when'); //npm install when
var dbDefer = when.defer();
db.open(function() {
console.log('db opened...');
dbDefer.resolve(db);
});
dbDefer.promise.then(function(db : mongodb.Db){
db.collection('myCollection', function (error, dataCol){
if(error) {
console.error(error); return;
}
var doneReading = when.defer();
var processOneRecordAsync = function(record) : When.Promise{
var result = when.defer();
setTimeout (function() {
//simulate a variable-length operation
console.log(util.inspect(record));
result.resolve('record processed');
}, Math.random()*5);
return result.promise;
}
var runCursor = function (cursor : MongoCursor){
cursor.next(function(error : any, record : any){
if (error){
console.log('an error occurred: ' + error);
return;
}
if (record){
processOneRecordAsync(record).then(function(r){
setTimeout(function() {runCursor(cursor)}, 1);
});
}
else{
//cursor up
doneReading.resolve('done reading data.');
}
});
}
dataCol.find({}, function(error, cursor : MongoCursor){
if (!error)
{
setTimeout(function() {runCursor(cursor)}, 1);
}
});
doneReading.promise.then(function(message : string){
//message='done reading data'
console.log(message);
});
});
});

how to solve 'this' problems with node libraries like async and request

I've written a node script that gets some data by requesting REST API data (using the library request). It consists of a couple of functions like so:
var data = { /* object to store all data */ },
function getKloutData() {
request(url, function() { /* store data */}
}
// and a function for twitter data
Because I want to do some stuff after fetching all the I used the library async to run all the fetch functions like so:
async.parallel([ getTwitterData, getKloutData ], function() {
console.log('done');
});
This all works fine, however I wanted to put everything inside a object pattern so I could fetch multiple accounts at the same time:
function Fetcher(name) {
this.userID = ''
this.user = { /* data */ }
this.init();
}
Fetcher.prototype.init = function() {
async.parallel([ this.getTwitterData, this.getKloutData ], function() {
console.log('done');
});
}
Fetcher.prototype.getKloutData = function(callback) {
request(url, function () { /* store data */ });
};
This doesn't work because async and request change the this context. The only way I could get around it is by binding everything I pass through async and request:
Fetcher.prototype.init = function() {
async.parallel([ this.getTwitterData.bind(this), this.getKloutData.bind(this) ], function() {
console.log('done');
});
}
Fetcher.prototype.getKloutData = function(callback) {
function saveData() {
/* store data */
}
request(url, saveData.bind(this);
};
Am I doing something basic wrong or something? I think reverting to the script and forking it to child_processes creates to much overhead.
You're doing it exactly right.
The alternative is to keep a reference to the object always in context instead of using bind, but that requires some gymnastics:
Fetcher.prototype.init = function() {
var self = this;
async.parallel([
function(){ return self.getTwitterData() },
function(){ return self.getKloutData() }
], function() {
console.log('done');
});
}
Fetcher.prototype.getKloutData = function(callback) {
var self = this;
function saveData() {
// store data
self.blah();
}
request(url, saveData);
};
You can also do the binding beforehand:
Fetcher.prototype.bindAll = function(){
this.getKloutData = this.prototype.getKloutData.bind(this);
this.getTwitterData = this.prototype.getTwitterData.bind(this);
};
Fetcher.prototype.init = function(){
this.bindAll();
async.parallel([ this.getTwitterData, this.getKloutData ], function() {
console.log('done');
});
};
You can save this into another variable:
var me = this;
Then me is your this.
Instantiate object with this function:
function newClass(klass) {
var obj = new klass;
$.map(obj, function(value, key) {
if (typeof value == "function") {
obj[key] = value.bind(obj);
}
});
return obj;
}
This will do automatic binding of all function, so you will get object in habitual OOP style,
when methods inside objects has context of its object.
So you instantiate you objects not through the:
var obj = new Fetcher();
But:
var obj = newClass(Fetcher);

Resources