im trying to count how many people of every gender are there in a json list passed by the client with a POST request (on Node.js server). I have problems understanding javascript asynchronization, callbacks and closures.
What i want is:
getting a list from the client,
for every entry ask my collection if that is a m, a f or a u,
count how many fs, ms and us there are,
send an array to the client with the three values.
I always get "Cant set headers after they are sent" or similar errors due to async execution. I tried different callback orders and many different options.
This is how the functions on the server looks like:
app.post('/genderize', function(req, res){
createCounter("conto", req, function(req,contat ){
count(req, contat);
}).then(res.send( result ));
});
function createCounter( nome, req, callback ) {
result = [0,0,0];
var contatore = function(){
var m = 0;
var f = 0;
var u = 0;
addM = function(){ console.log( "m++ "+result[1]);result[1]++; };
addF = function(){ f++; };
addU = function(){ u++; };
getM = function(){ return this.m;};
getResult = function(){
console.log( result+ " * "+ getM() + " * " + this.u + " * "+ this.f );
return result;
};
return {
addM: addM,
addF: addF,
addU: addU,
getResult: getResult
};
}
callback( req, contatore() );
}
function count( req, counter ){
var collection = db.get('nomi');
var data = req.body.data;
data.forEach(function(value, i){
collection.find({ nome : req.body.data[i].name.split(" ")[0].toUpperCase() }, { fields: {_id:0, nome:0}}, function (err, docs) {
if (!isEmptyObject(docs)) {
docs = JSON.parse(JSON.stringify(docs));;
if(docs[0].sesso == "M"){
counter.addM();
} else {
counter.addF();
}
} else {
counter.addU();
}
});
});
}
There are several issues with this example, but the main thing that you missed is that when you perform your database query, the collection.find call will return immediately, but will only execute its callback (function(err, docs)) at some later time after the database has replied.
Here's a working rewrite:
app.post('/genderize', function(req, res) {
if (!req.body.data || req.body.data.length === undefined) {
return res.status(400).send('Invalid request body.');
}
countGenders(db.get('nomi'), req.body.data, function (err, genders) {
if (err) return res.status(500).send('Unable to process request.');
res.send([genders.M, genders.F, genders.U]);
});
});
function getGenderFromName(collection, name, next) {
collection.find({nome : name.split(" ")[0].toUpperCase()}, {fields: {_id:0, nome:0}}, function (err, docs) {
if (err) return next(err);
var gender = 'U';
if (docs && docs.length > 0) {
gender = (docs[0].sesso == "M") ? 'M' : 'F';
}
next(null, gender);
});
}
function countGenders(collection, data, next) {
var result = { M: 0, F: 0, U: 0 };
var series = function(i) {
if (i == data.length) return next(null, result);
getGenderFromName(collection, data[i].name, function(err, gender) {
if (err) return next(err);
result[gender]++;
series(i+1);
});
};
series(0);
}
Lets review the changes:
Removed the createCounter structure. No need for a heavy, get/set pattern for this simple example.
Checked for error values in every asynchronous callback
if (err) return next(err);
Within a route handler, typically you will want to end the request with a res.status(500).send(). In most other cases, return next(err) will 'bubble' the error up.
Moved the database query into a new function, getGenderFromName. It mostly retains your original code. This was optional, but substantially improves the readability of the count function.
Finally, rewrote the count function using an appropriate asynchronous iteration pattern, courtesy of http://book.mixu.net/node/ch7.html. Mixu gives a very easy to understand explanation of asynchronous node, give it a read.
An even better option would be use the excellent async module. You could rewrite the count method as
function countGenders(collection, data, next) {
var result = { M: 0, F: 0, U: 0 };
async.eachSeries(
data,
function (value, next) {
getGenderFromName(collection, value.name, function(err, gender) {
if (err) return next(err);
result[gender]++;
next();
});
},
function (err) { next(err, results); }
);
}
Async includes lots of different control flow methods to use, not just simple iterations.
Here is a better way to do this. This really cleans up the asynchronous nature of javascript. Checkout the async library that I am using here.
var collection = db.get('nomi');
var async = require('async');
app.post('/genderize', function(req, res){
let countingObject = {
females: 0,
males: 0,
unknown: 0
};
async.each(req.body.data, function(name, callback) {
collection.findOne({ nome : name.split(" ")[0].toUpperCase() }, { fields: {_id:0, nome:0}}, function (err, nameObject) {
//instead, maybe check if it is male, female, or otherwise mark as unknown?
if (!isEmptyObject(nameObject)) {
//this object probably has getters that you could use instead
nameObject = JSON.parse(JSON.stringify(nameObject));
if(nameObject.sesso == "M"){
countingObject.males++;
} else {
countingObject.females++;
}
} else {
countingObject.unknown++;
}
callback();
});
}, function() {
res.setHeader('Content-Header', 'application/json');
res.send(JSON.stringify(countingCallback));
});
});
Related
I have a REST API in Node.js using Mongoose. I have the following function that does something specific for my application. The problem is that I set test.questions value and after a particular loop, I find it is losing scope of those variables. What is the problem here? Here is my code:
randomizeTest = (req, res) => {
const test = new Test;
let questions: String[] = [];
let num = 5;
while (num >= 1) {
var self = this;
this.quesmodel.count().exec(function (err, count) {
var random = Math.floor(Math.random() * count)
self.quesmodel.findOne().skip(random).exec(
function (err, result) {
questions.push(result._id);
test.questions = questions;
console.log(test.questions); // prints data
});
});
num--;
}
console.log(test.questions); // prints nothing
test.save(function (err, test) {
if (err) {
res.sendStatus(400);
console.error(err);
} else {
res.status(200).json({ test });
}
});
}
After Navid's answer, I tried this now:
let questions: String[] = [];
let num = 5;
var self = this;
asyncLoop(questions, function (item, next) {
while (num >= 1) {
self.quesmodel.count().exec(function (err, count) {
var random = Math.floor(Math.random() * count)
self.quesmodel.findOne().skip(random).exec(
function (err, result) {
questions.push(result._id);
test.questions = questions;
next();
});
});
num--;
}
}, function () {
console.log(test.questions);
console.log('Finished!');
});
In nodejs functions with some I/O job run asynchronously so when one thread is looping inside while loop and executing database I/O jobs, another one is running the rest of your code and executing console.log(test.questions); with its previous values which is empty.
PS. one good way to handle these type of problems is using some async libraries to run your loops sequentially like node-async-loop.
var asyncLoop = require('node-async-loop');
var array = ['item0', 'item1', 'item2'];
asyncLoop(array, function (item, next)
{
do.some.action(item, function (err) //database operations come here
{
if (err)
{
next(err);
return;
}
next();
});
}, function (err)
{
if (err)
{
console.error('Error: ' + err.message);
return;
}
// the rest of your code like console.log(test.questions); goes here
console.log('Finished!');
});
this might help you but its not the best way to do it:
let questions: String[] = [];
let num = Array.from(Array(5).keys()); //this makes num = [0, 1, 2, 3, 4]
var self = this;
asyncLoop(num, function (item, next) {
self.quesmodel.count().exec(function (err, count) {
var random = Math.floor(Math.random() * count)
self.quesmodel.findOne().skip(random).exec(function (err, result) {
questions.push(result._id);
test.questions = questions;
next();
});
});
}, function (err) {
if(err)
throw err;
else {
console.log(test.questions);
console.log('Finished!');
}
});
Here's a solution using the promises built into mongodb (which are a much better way to manage multiple asynchronous operations) and then simplifying things a bit with async/await so your loop actually runs serially:
randomizeTest = async (req, res) => {
const test = new Test();
let questions: String[] = [];
test.questions = questions;
try {
for (let num = 5; num >= 1; --num) {
let count = await this.quesmodel.count();
let random = Math.floor(Math.random() * count);
let result = await this.quesmodel.findOne().skip(random).exec();
questions.push(result._id);
}
console.log(test.questions); // prints final results
await test.save();
res.status(200).json({ test });
} catch(e) {
res.sendStatus(500);
console.error(e);
}
}
FYI, your scheme for picking a random record from a collection is subject to a race condition if there are other processes modifying that collection while you are selecting the random record. This is because there's a time period between when you do .count() and .skip(random) and the collection could be changed in that time window. There are multiple other techniques for picking a random item that each have their own situations where they are best.
I am definitely sure this is problem with your loop. Please use async in place of while.
So i have this 2-layer query in node.js, each query could return multiple results. My code actually just ignores that for now. This is the best i can get, it seems working.
How to correct it please, i don't know how to callback for the 2nd one.
Also the db.close() is always called before the 2nd query finishes, even i have serialize().
var getInfo1Db = function(callback) {
var db = new sqlite3.Database("DB.sqlite3");
var cnt = 0;
var info1JsonObj = [];
db.all("select * from Info1DB",
function(err, rows) {
db.serialize(function() {
for(var ii=0, len=rows.length; ii<len; ii++) {
var t2 = rows[ii].info1;
var doorId = ...
db.all("select * from DoorDB where ObjectID=" + doorId,
function(err, row2) {
if(err) {
} else {
var doorName = row2[0]...
var info1JsonElem = {
"DoorName" : doorName
};
info1JsonObj.push(info1JsonElem);
cnt++;
if(cnt === rows.length) {
callback(null, info1JsonObj);
}
}
}
); // for the only door info based on door id
} // for each row of info1
db.close(); // why this finishes before the 2nd db.all
} ); // end of serialize
});
};
You can't implement nested query in sqlite3's normal way. ( I mean you even can't do it in the callback hell way, because the sqlite3 need to close the connection before another query called. otherwise you will always got error)
You have to use Promise, async and await to do this.
( it's worth to spend 30 minutes to learn these 3 words )
Step1. define a async function like this:
async query_1() {
new Promise(resolve => {
db = ...
db.serialize( () => {
db.get('select .. from ... where id = 1', [], (error, row) => {
// here is the KEY: put the result into resolve
// this equals to the "return" statement in non-sync method.
resolve(row)
}
})
db.close()
})
}
and also implement your query_2 function like this:
async query_2() {
let query_1_result = await this.query_1()
db = ...
db.serialize( () => {
db.get('select .. from ... where dependency_id = ' + query_1_result, [], (error, row) => {
// other code here...
}
})
db.close()
}
refer to my answer: https://stackoverflow.com/a/67881159/445908
How about using 2 function to do these ?
function db_query1(your_param,...., callback){
// database operation
db.run( sql , [param,...] , function(err,rows){
if(err) // return
else{
// get rows with callback
callback(null, rows);
}
});
}
function db_query2(your_param,...., callback){
// database operation
db.run( sql , [param,...] , function(err,rows){
if(err) // return
else{
// get rows with callback
callback(null, rows);
}
});
}
And call these function:
db_query1(....,function(err,result1){
if(err) ...// return
// do the things with result1
// And then call query2
db_query2(....,function(err,result2){
if(err) ...// return
// do the things with result1
});
});
Hope this will help :)
You can use Promises.all, an array and the second callback for node sqlite3 db.each() that is executed when all rows have been fetched. Node Sqlite3 db.each usage to simplify the nested query and
I cannot really get the meaning of the variables you are using thus I assume that each row in Info1DB has a one-to-many relationship with DoorDB on the field doorId.
async function getInfo (callback) {
sql = "select * from Info1DB;";
numVersions = 0;
countVersions = 0;
info1JsonObj = [];
db.serialize(function() {
db.each(sql, [], (err, info1Row) => {
sql = "select * from DoorDB where ObjectID=?;";
info1Row.doors = [];
doorId = ...
db.each(sql, [doorId], (err, doorRow) => {
info1Row.doors.push(new Promise((resolve, reject) => {
if (err) {
reject(err);
} else {
resolve(doorRow);
}
}));
}, (err, num) => {
Promise.all(info1Row.doors)
.then((doors) => {
info1Row.doors = doors;
info1JsonObj.push(info1Row);
countVersions++;
if (countVersions == numVersions) {
callback(null, info1JsonObj);
}
}).catch((err) => {
callback(err, null);
});
});
}, (err, versions) => {
numVersions = versions;
});
});
}
I have three blocks of code where block one executes first and the result of first block is passed to bloack 2 and then the final result is then passed to the third block which has to send data to the route.
But at the end the return is undefined.
function getUserKey(userRole, callback) {
//keys value is stored and returned
var keys = base.menuModel.find({ 'name' : userRole }, function (err, result) {
if (!err) {
var menu = JSON.stringify(result);
menu = JSON.parse(menu);
var menuKeys = [];
for(i = 0;i < Object.keys(menu[0].permissions[0]).length;i++) {
menuKeys.push((Object.keys(menu[0].permissions[0])[i]));
}
callback(null,menuKeys);
//returns menukeys to be shown
}
else {
return err;
}
});
}
n is holding the menu keys
function userMenuData(n, callback) {
var filterResult = base.globalMenuModel.find({"title" : { $in : n}},function (err, result) {
if (!err) {
callback(null,result);
}
else {
return err;
}
});
}
var userMenu = function(userRole,callback) {
var userMenuTemp = async.compose(userMenuData, getUserKey);
var sendData = userRole is passed and the result is obtained
userMenuTemp(userRole,function(err,result) {
return result; // data success
});
console.log(sendData); //undefined
return sendData;
}
here i want to pass sendData to route in node.js
but at the console i am getting undefined.
Thanks for any help
It's the async nature of node that's getting you. The console.log is happening before any of those functions are returned. You want to check out a library that does promises like Q http://documentup.com/kriskowal/q/
I'm new to Node from the lands of C#, PHP and Python. I've been working days in many variations of the same problem - how can I retrieve a set of data, based on that data, retrieve another set, then render the results out. I've tried the method below, event based (client.on("row")) and the async module and I can't get any to produce the right results. In the end, I'd like to pass a projects object with tasks added to Express to render.
Could anyone help me out of this hole?
exports.index = function(req, res){
req.session.user_id = 1;
if (req.session == undefined || req.session.user_id == null || req.session.user_id < 0) {
res.redirect('/login');
} else {
var pg = require('pg');
var conString = "postgres://jason#localhost:5432/simpleproject";
var client = new pg.Client(conString);
client.connect(function(err) {
client.query("SELECT * FROM project", function(err, projects) {
for (var i=0; i<projects.rowCount; i++) {
var project = projects.rows[i];
client.query("SELECT * FROM task WHERE project_id="+projects.rows[i].id, function(err, subrows) {
if (subrows.rowCount > 0) {
project.tasks = subrows.rows;
console.log("adding tasks");
} else {
project.tasks = null;
}
if (i==projects.rowCount) {
console.log("rendering");
res.render('main', { title: 'My Projects', projects: projects });
}
});
}
if (err != null) { console.log(err); }
}
);
});
}
};
UPDATE: Meryn below provides a good solution to my issue, just to share that information, in the end, below his code with a little touch up to get it to operate: (thanks Meryn!)
var async = require('async');
exports.index = function(req, res){
req.session.user_id = 1;
if (req.session == undefined || req.session.user_id == null || req.session.user_id < 0) {
res.redirect('/login');
} else {
var pg = require('pg');
var conString = "postgres://jason#localhost:5432/simpleproject";
var client = new pg.Client(conString);
var addTasksToProject = function(projectRow, cb) { // called once for each project row
client.query("SELECT * FROM task WHERE project_id="+projectRow.id, function(err, result) {
console.log("tasks");
if(err) return cb(err); // let Async know there was an error. Further processing will stop
projectRow.tasks = result.rows;
cb(null); // no error, continue with next projectRow, if any
});
};
client.connect(function(err) {
client.query("SELECT * FROM project", function(err, projects) {
console.log("projects");
if (err) return console.error(err);
async.each(projects.rows, addTasksToProject, function(err) {
if (err) return console.error(err);
// all project rows have been handled now
console.log(projects.rows);
res.render('main', { title: 'My Projects', projects: projects.rows});
});
});
});
}
};
You need to familiarize yourself with asynchronous flow-control. It can be tricky because the async functions (postgres queries in this case) will execute right after another in the same turn of the event loop, while the results come trickling in in subsequent turns.
For your code example, this effectively means that i will be set to projects.rowCount-1 and project will be set to projects.rows[project.rowCount-1] almost instantly, while the queries have been queued up. They stay like this after the result for the queries come in. Not what you want.
The quickes solution is to use the Async library. https://github.com/caolan/async . This will handle the tedious bean-counting for you.
For this particular example, you'd replace the code within the client.connect callback with something like
addTasksToProject = function(projectRow, cb) { // called once for each project row
client.query("SELECT * FROM task WHERE project_id="+projectRow.id, function(err, result) {
if(err) return cb(err) // let Async know there was an error. Further processing will stop
projectRow.tasks = result.rows
cb(null) // no error, continue with next projectRow, if any
}
}
client.query("SELECT * FROM project", function(err, projects) {
if (err) return console.error(err)
async.each(projects.rows, addTasksToProject, function(err) {
if (err) return console.error(err)
// all project rows have been handled now
res.render('main', { title: 'My Projects', projects: project.rows});
})
}
Note that because how Javascript object references work, the objects part of the project.rows array will be actually modified in place. This wouldn't be the case if you'd actually try to assign a new value to the projectRow variable.
I am new to javascript and node.js and this is my first post, so please bear with me.
I am using ntwitter to get all previous tweets of a specific user.
My problem is that if the user has more than 200 tweets, I need to create a loop and I am not sure if I do it right.
This is the async function that gets the 200 latest tweets:
exports.getUserTimeline = function(user, callback) {
twit.getUserTimeline({ screen_name: user, count: 200 }, function(err, data) {
if (err) {
return callback(err);
}
callback(err, data);
});
}
I found a solution to do this using a recursive function, but it's quite ugly.. How can I improve it ?
exports.getUserHistory = function(user, callback) {
recursiveSearch(user, callback);
function recursiveSearch(user, callback, lastId, data) {
var data = data || []
, args = {screen_name: user, count: 200};
if(typeof lastId != "undefined") args.max_id = lastId;
twit.getUserTimeline(args, function(err, subdata) {
if (err) {
console.log('Twitter search failed!');
return callback(err);
}
if (data.length !== 0) subdata.shift();
data = data.concat(subdata);
var lastId = parseInt(data[data.length-1].id_str);
if (subdata.length !== 0) {
recursiveSearch(user, callback, lastId, data);
} else {
callback(err, data);
}
});
}
}
Thank's a lot!
Update: This is the improved (refactored) function suggested by hunterloftis with two modifications:
property max_id should not be specified on the first iteration
the case where the user exists but no tweets have been posted must be handled
code:
function getUserHistory(user, done) {
var data = [];
search();
function search(lastId) {
var args = {
screen_name: user,
count: 200,
include_rts: 1
};
if(lastId) args.max_id = lastId;
twit.getUserTimeline(args, onTimeline);
function onTimeline(err, chunk) {
if (err) {
console.log('Twitter search failed!');
return done(err);
}
if (!chunk.length) {
console.log('User has not tweeted yet');
return done(err);
}
//Get rid of the first element of each iteration (not the first time)
if (data.length) chunk.shift();
data = data.concat(chunk);
var thisId = parseInt(data[data.length - 1].id_str);
if (chunk.length) return search(thisId);
console.log(data.length + ' tweets imported');
return done(undefined, data);
}
}
}
When retrieving tweets I noticed that my tweet count wasn't always the same as the 'statuses_count' property of the user. It took me some time to figure out that this difference corresponds to the number of deleted tweets :)
Does your recursive function work? Doesn't look too bad to me. I might refactor it just a little into something more like this:
function getUserHistory(user, done) {
var data = [];
search();
function search(lastId) {
var args = {
screen_name: user,
count: 200,
max_id: lastId
};
twit.getUserTimeline(args, onTimeline);
function onTimeline(err, chunk) {
if (err) {
console.log('Twitter search failed!');
return done(err);
}
if (data.length) chunk.shift(); // What is this for?
data = data.concat(chunk);
var thisId = parseInt(data[data.length - 1].id_str);
if (chunk.length) return search(thisId);
return done(undefined, data);
}
}
}