Nodejs MongoDB updating documents with asynchronous function - node.js

I'm new to NodeJS and MongoDB. Is there a way to update a document using asynchronous functions in nodejs? I have a collection of websites with documents: _id, name and registrant. I want to insert a value in registrar using an asynchronous function. The function is a system call, and I just parse the output that I want. Running the code, the async function logs the information I want but does not store in the database. Any suggestions on how I can solve this? Thanks.
/*
* { _id: 53448014b15c693931000002,
* name: 'google.com',
* registrant: 'defaultval' } */
var MongoClient = require('mongodb').MongoClient;
var ObjectID = require('mongodb').ObjectID;
var id = '53448014b15c693931000002';
var domain = 'google.com';
MongoClient.connect('mongodb://127.0.0.1:27017/mydb', function(err, db) {
if (err) throw err;
var collection = db.collection('websites');
collection.findAndModify({_id:new ObjectID(id)},
{},
{$set: {registrant: test(domain, function (output) {
var t = output.split("\n");
for (var i = 0; i < t.length; ++i) {
if (t[i].indexOf("Registrant Organization:") != -1) {
console.log(t[i].substring(t[i].indexOf(":") + 2, t[i].length));//prints correct value, need this to store in registrant doc
return t[i].substring(t[i].indexOf(":") + 2, t[i].length);
}
}
})}},
{},
function(err, object) {
if (err) console.log(err.message);
else {
console.log(object);
}
db.close();
});
});
var test = function(domain, cb) {
var sys = require('sys');
var exec = require('child_process').exec;
var child = exec('whois ' + domain, function(error, stdout, stderr) {
cb(stdout);
});
}

Start one process on core:
var cluster = require('cluster')
, numCPUs = require('os').cpus().length
, windows = require('os').platform() == 'win32';
if(cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.pid + ' died');
});
return;
}
Create function to be async:
function mongodbDriverQuery(callback) {
collection.findOne({ id: getRandomNumber()}, function(err, world) {
callback(err, world);
});
}
http.createServer(function (req, res) {
// JSON response object
var hello = {message: "Hello, World!"};
var helloStr = "Hello, World!";
var path = url.parse(req.url).pathname;
// mysql on windows is not supported
if (windows && (path.substr(0, 3) == '/my' || path == '/update')) {
path = '/doesntexist';
}
switch (path) {
case '/mongodbdriver':
// Database Test
var values = url.parse(req.url, true);
var queries = values.query.queries || 1;
var queryFunctions = new Array(queries);
for (var i = 0; i < queries; i += 1) {
queryFunctions[i] = mongodbDriverQuery;
}
res.writeHead(200, {'Content-Type': 'application/json; charset=UTF-8'});
// and run it
async.parallel(queryFunctions, function(err, results) {
if (queries == 1) {
results = results[0];
}
res.end(JSON.stringify(results));
});
break;

Related

nodejs + Mongodb: Inserting into two collections in sequence repeats last value in second

I am using the following to insert into MongoDB.
var tagData = JSON.parse(data);
var allTags = tagData.tags;
for (var j = 0; j < allTags.length; j++) {
var p = allTags[j].tagId.toString();
for (var k = 0; k < loggerParams.length; k++) {
var q = Object.keys(loggerParams[k]).toString();
if (p === q) {
// Prepare raw data tag
var tagRawDoc = {};
// Simple key-value assignment here
// Document prepared; ready to insert into MongoDB
database.addDocument('tagraw', tagRawDoc, function (err) {
if (err) {
log.info(util.format('Error adding document to tagrawdatas. %s', err.message));
throw err;
} else {
// Prepare history tag
var historyTagDoc = {};
historyTagDoc.tagNameAlias = tagRawDoc.tagNameAlias;
// Simple key-value assignment here
// Document prepared; ready to insert into MongoDB
database.addDocument('taghistory', historyTagDoc, function (err) {
if (err) {
log.info(util.format('Error adding document to tagrawdatas. %s', err.message));
throw err;
}
});
}
});
// Match found; exit loop
break;
}
}
}
The loggerParms is a simple JSON document read from file else-where. It allows for look-up in this code to build the document to be inserted. There will be 12 values in the allTags array. These 12 values are inserted successfully into the tagraw collection. However, in taghistory collection, the values from the last (or most recent) entry made into tagraw collection is repeated 12 times. Why does this happen?
The database.addDocument is shown below. It is a part of this article I am trying to replicate.
var MongoClient = require('mongodb').MongoClient;
var assert = require('assert');
var logger = require('../../util/logger');
var util = require('util');
function DB() {
this.db = "empty";
this.log = logger().getLogger('mongoMange-DB');
}
DB.prototype.connect = function(uri, callback) {
this.log.info(util.format('About to connect to DB'));
if (this.db != "empty") {
callback();
this.log.info('Already connected to database.');
} else {
var _this = this;
MongoClient.connect(uri, function(err, database) {
if (err) {
_this.log.info(util.format('Error connecting to DB: %s', err.message));
callback(err);
} else {
_this.db = database;
_this.log.info(util.format('Connected to database.'));
callback();
}
})
}
}
DB.prototype.close = function(callback) {
log.info('Closing database');
this.db.close();
this.log.info('Closed database');
callback();
}
DB.prototype.addDocument = function(coll, doc, callback) {
var collection = this.db.collection(coll);
var _this = this;
collection.insertOne(doc, function(err, result) {
if (err) {
_this.log.info(util.format('Error inserting document: %s', err.message));
callback(err.message);
} else {
_this.log.info(util.format('Inserted document into %s collection.', coll));
callback();
}
});
};
module.exports = DB;
That's because you are mixing a/multiple synchronous for and asynchronous code with database.addDocument which cause issues with function scope in nodejs.
A simple example of this kind of thing:
for(var i = 0; i < 10; i++){
setTimeout(() => console.log(i), 0);
}
You should use a package like async to handle flow control when iterating arrays/object asynchronously.
Simple example of your code refactored to use async:
var async = require('async');
var tagData = JSON.parse(data);
var allTags = tagData.tags;
async.each(allTags, function(tag, done){
var p = tag.tagId.toString();
var loggerParam = loggerParams.find(function(loggerParam){
var q = Object.keys(loggerParam).toString();
return p === q;
});
var tagRawDoc = {};
// Simple key-value assignment here
// Document prepared; ready to insert into MongoDB
return database.addDocument('tagraw', tagRawDoc, function (err){
if (err) return done(err);
// Prepare history tag
var historyTagDoc = {};
historyTagDoc.tagNameAlias = tagRawDoc.tagNameAlias;
// Simple key-value assignment here
// Document prepared; ready to insert into MongoDB
return database.addDocument('taghistory', historyTagDoc, done);
});
}, (err) => {
if(err) throw err;
console.log('All done');
});

Redis in Nodejs for loop not working properly

I have a for loop like below which isn't getting executed as expected.
var redis = require('redis');
var client = redis.createClient();
var arr = [{title:"title1"},{title:"title2"},{title:"title3"},{title:"title4"}];
for(var i =0; i<arr.length; i++){
//console.log(arr[i]);
var obj1 = arr[i];
client.get(obj1.title, function(err, response){
if(err){
console.log(err);
}
if(response){
if(i%3==0){
client.del(obj1.title);
}else{
client.incr(obj1.title);
}
}else{
client.set(obj1.title, 1);
}
});
}
The output on running the below code afterwards was
for(var i=0; i<arr.length; i++){
client.get(arr[i].title, redis.print);
}
The output:
Reply: null
Reply: null
Reply: null
Reply: null
Reply: null
Reply: null
Reply: 2
which was not what i expected, since all values except the one divisible by 3 should be atleast 1;
Please create a new function. In the new function, you can delete, increment or creating the new key.
The below code works fine for me. Please check.
var redis = require('redis');
var client = redis.createClient();
var arr = [ {
title : "title1"
}, {
title : "title2"
}, {
title : "title3"
}, {
title : "title4"
} ];
function delOrIncr(obj1, i) {
client.get(obj1.title, function(err, response) {
if (err) {
console.log(err);
}
if (response) {
if (i % 3 === 0) {
console.log('Deleting >' + obj1.title);
client.del(obj1.title);
} else {
console.log('Increment >' + obj1.title);
client.incr(obj1.title);
}
} else {
console.log('Creating new >' + obj1.title);
client.set(obj1.title, 1);
}
});
}
for (var i = 0; i < arr.length; i++) {
delOrIncr(arr[i], i);
}
Note:-
Please run the get as a separate program to check the result of the above program.
var redis = require('redis');
var client = redis.createClient();
var arr = [{title:"title1"},{title:"title2"},{title:"title3"},{title:"title4"}];
for(var i =0; i<arr.length; i++){
//console.log(arr[i]); // this is cool
var obj1 = arr[i];
client.get(obj1.title, function(err, response){
if(err){
console.log(err);
}
if(response){
if(i%3==0){
// mistake 1:
// due to async op,loop will already be over and i will be 3 here
// mistake 2:
// obj1 will be arr[3] here, not what you were expecting :D
client.del(obj1.title);
}else{
client.incr(obj1.title);
}`enter code here`
}else{
// so only the last obj gets stored.
client.set(obj1.title, 1);
}
});
}

Node.js async - build object from loop, then do something with object

I'm trying to run a function and once that function is complete, then run another function. The first function reads a CSV file, makes a GET request, and builds an object. The second function uses that newly created object to create a new CSV file.
The problem I'm having is that the new CSV file is being created prior to the GET requests finishing.
I'm using async.parallel to set the flow, but not able to get the logic right.
I'd love to know what I'm doing wrong and better understand how node thinks about these tasks.
// Require
var request = require('request');
var fs = require('fs');
var json2csv = require('json2csv');
var csv = require('csv');
var async = require('async');
// Params
var emailHunter_apiKey = '0000';
var emails = [];
var fields = ['email'];
var i = 0;
// Start
async.parallel([
function(callback){
setTimeout(function(){
var file = fs.readFileSync('file.csv');
csv.parse(file, {delimiter: ','}, function (err, data) {
for (var key in data) {
if (i < 5) {
if (data.hasOwnProperty(key)) {
var h = data[key];
if (h[5] != '') {
var url = h[5];
url = url.replace('//', '');
url = url.replace('www.', '');
request('https://api.emailhunter.co/v1/search?domain=' + url + '&api_key=' + emailHunter_apiKey + '', function (error, response, body) {
if (!error && response.statusCode == 200) {
var json = JSON.parse(body);
for (var subObj in json) {
if (json.hasOwnProperty(subObj) && subObj == 'emails') {
var emailObj = json[subObj];
for (var key in emailObj) {
var email = {
'email': emailObj[key]['value']
};
emails.push(email);
}
}
}
}
});
}
}
}
i++;
}
});
callback(null, emails);
}, 200);
console.log(emails);
}
],
function(err, results){
json2csv({data: results, fields: fields}, function (err, csv) {
if (err) console.log(err);
fs.writeFile('export.csv', csv, function (err) {
if (err) throw err;
console.log('file saved');
});
});
console.log(results);
});
As laggingreflex mentioned, you're using async incorrectly.
First you should build a an array of functions that you want to execute in parallel. And then use async to execute them.
Furthermore, your callback was getting executed immediately because csv.parse() is an async function. Therefore node fires it immediately and then executes callback(). You need to move the callback inside of parse().
Try this...
// Params
var emailHunter_apiKey = '0000';
var emails = [];
var fields = ['email'];
var i = 0;
var functionsToRunAsync = [];
var file = fs.readFileSync('file.csv');
csv.parse(file, {delimiter: ','}, function (err, data) {
for (var key in data) {
if (i < 5) {
if (data.hasOwnProperty(key)) {
var h = data[key];
if (h[5] != '') {
var url = h[5];
url = url.replace('//', '');
url = url.replace('www.', '');
// add a new function to an array, to be executed later
functionsToRunAsync.push(function(callback) {
request('https://api.emailhunter.co/v1/search?domain=' + url + '&api_key=' + emailHunter_apiKey + '', function (error, response, body) {
if (!error && response.statusCode == 200) {
var json = JSON.parse(body);
for (var subObj in json) {
if (json.hasOwnProperty(subObj) && subObj == 'emails') {
var emailObj = json[subObj];
for (var key in emailObj) {
var email = {
'email': emailObj[key]['value']
};
emails.push(email);
// callback to tell async this function is complete
callback()
}
}
}
} else {
// callback to tell async this function is complete
callback
}
});
});
}
}
}
i++;
}
// now that we have all of the functions in an array, we run them in parallel
async.parallel(
functionsToRunAsync,
function(err, results) { // all async functions complete
json2csv({data: results, fields: fields}, function (err, csv) {
if (err) console.log(err);
fs.writeFile('export.csv', csv, function (err) {
if (err) throw err;
console.log('file saved');
});
});
console.log(results);
});
});

Using npm async to run in parallel async methods and return a unified response

I have 2 async methods that can run independently one from each other. I would like to call a callback once both are finished. I have tried using async.parallel() (npm) but this seems to be for non async methods. How can I implement this?
Here is my async.parallel call(); note that asyncTasks is my function array, where the functions are async.
async.parallel(asyncTasks, function(resultFinal){
console.log("--------->>>>> message: "+JSON.stringify(resultFinal));
console.log("");
callback(new RequestResponse(true, resultFinal));
});
In short, what I really want is a way to execute multiple async methods in parallel and consider that method finished when the callback provided for that function is triggered.
UPDATE
for a better understanding, I've included the two functions I am using
getGroups
var getGroups = function (callback_async_1) { //get groups + members
pg.connect(datebasePath, function (err, client, done) {
var s = squel.select();
s.from("groups_members");
s.where("user_id = ?", userId);
console.log("query: " + s.toString());
client.query(s.toString(), function (err, result) { //get groups ids in which i am a member
if (err) {
console.error("error...1 " + err);
callback_async_1(responseData);
} else {
// console.log("init -- get from group_members " + JSON.stringify(result.rows));
var groupIds = [];
if (result.rows.length > 0) {
for (var i = 0; i < result.rows.length; i++) {
groupIds.push(result.rows[i].group_id); // create group ids list
}
// console.log("group ids : " + groupIds);
}
if (groupIds.length === 0) {
callback_async_1(responseData);
}
var s = squel.select();
s.from("groups");
s.where("id IN ?", groupIds);
client.query(s.toString(), function (err, result2) { // retrieve all the groups in which i take part
if (err) {
console.error("error...2 " + err);
callback_async_1(responseData);
return;
} else {
// var groupIds2 = [];
// console.log("init -- get from groups " + JSON.stringify(result2.rows));
var groups = [];
// var groups_members = [];
for (var i = 0; i < result2.rows.length; i++) {
groups.push(result2.rows[i]); // adding group info to list
// var groupId = result2.rows[i].id;
// groupIds2.push(groupId);
}
// console.log("");
//console.log(" ------->>>> " + JSON.stringify(groups));
// console.log("");
// responseData.groups = groups;
responseData.push({ //pushing groups into response
"groups": groups
});
var s = squel.select();
s.from("groups_members");
s.where("group_id IN ?", groupIds);
client.query(s.toString(), function (err, result3) { // get all the members in my groups
//console.log("get from group_members --- " + JSON.stringify(result3.rows));
var groupMembers = [];
for (var i = 0; i < result3.rows.length; i++) {
groupMembers.push({
groupMember: result3.rows[i] // pushing all the group members
});
}
//console.log("");
// console.log(" ------->>>> " + JSON.stringify(groupMembers));
// console.log("");
responseData.push({
"groupsMembers": groupMembers
});
// console.log("resulting json till now; Groups : " + JSON.stringify(responseData));
//fetching now events
var s = squel.select();
s.from("events");
s.where("group_id IN ?", groupIds);
client.query(s.toString(), function (err, result4) { //selecting all events that have my groups
if (err) {
console.error("error...3 " + err);
callback_async_1(responseData);
return;
} else {
var events = [];
for (var i = 0; i < result4.rows.length; i++) {
events.push(result4.rows[i]);
}
// responseData.events = events;
responseData.push({
"events": events
});
//responseData.push (events);
callback_async_1(responseData);
// asyncTasks[1](callback);
}
});
});
}
});
}
});
done();
});
};
getRegisteredContacts
var getRegisteredContacts = function (callback_async_2) { // get registered contacts
pg.connect(datebasePath, function (err, client, done) {
//get contacts that are registered
var s = squel.select();
s.from("users");
s.where("phone_number IN ?", arrayOfContacts);
client.query(s.toString(), function (err, result5) { // retriving registered contacts -- should be run with async parallel, it does not depend on the other calls
if (err) {
console.error(err);
callback_async_2(responseData);
} else {
if (result5.rows.length > 0) {
var contacts = [];
for (var i = 0; i < result5.rows.length; i++) {
contacts.push(result5.rows[i]);
}
responseData.push({
"registeredContacts": contacts
});
}
//console.log("");
//console.log(" ------->>>> " + JSON.stringify(events));
// console.log("");
// console.log("final ---> " + JSON.stringify(responseData));
callback_async_2(responseData);
}
});
done();
});
};
You need your task function to take a parameter which you then call when the task is done
var task = function(callback){
console.log('Task');
callback(null);
};
When you are then doing something async within the task then your task would look like
var task = function(callback){
console.log('Task');
request.get('http://www.google.com', function (error, response, body){
console.log('Task - ' + response.statusCode);
callback(null);
});
};
Example
var async = require('async');
var request = require('request');
var task1 = function(callback){
console.log('Task 1');
callback(null);
};
var task2 = function(callback){
console.log('Task 2');
request.get('http://www.google.com', function (error, response, body){
console.log('Task 2 - ' + response.statusCode);
callback(null);
});
};
var asyncTasks = [task1, task2];
async.parallel(asyncTasks, function(err, result){
console.log('--DONE--');
});
Outputs
Task 1
Task 2
Task 2 - 200
--DONE--
Based on your new code listing the most obvious thing is done() is called too early for both of your tasks. It needs to be like
var getRegisteredContacts = function (callback_async_2) {
pg.connect(datebasePath, function (err, client, done) {
var s = squel.select();
s.from("users");
s.where("phone_number IN ?", arrayOfContacts);
client.query(s.toString(), function (err, result5) {
done(); // <---- done() to be here
if (err) {
//
} else {
//
}
callback_async_2();
});
});
};
You should also lint your code. If you had you would have noticed that you had not checked if there was an err for callback pg.connect (and also keep it nicer to read correctly)

Node.js + Mongoose consume all memory on inserts

I need to fill Mongo collection with about 80 million of records like
//Example
{
"_id" : "4gtvCPATZ",
"isActivated" : false
}
Where _id is randomly generated. I do this using Node.js Express and Mongoose:
app.get('/api/codes', function(req, res) {
for (var i = 0; i < 100000; i++) {
var code = new Code({
_id: randomStr(9),
isActivated: 0
});
code.save();
code = null;
}
res.render('index');
});
function randomStr(m) {
var m = m || 9;
s = '', r = 'ABCDEFGHIJKLMNPQRSTUVWXYZabcdefghijklmnpqrstuvwxyz123456789';
for (var i = 0; i < m; i++) {
s += r.charAt(Math.floor(Math.random() * r.length));
}
return s;
};
On 'index' jade layout has JS code that reloads the page to generate next 100000 records:
script(type='text/javascript').
location.href='http://localhost:3000/api/codes'
node.js process starts to consume memory and after 4-5 page reloads hangs up having 1GB of memory.
What am I doing wrong?
UPDATE:
Considering the robertklep's comment I've updated the code and it works properly:
app.get('/api/codes', function(req, res) {
var count = 0;
async.whilst(
function () { return count < 100000; },
function (callback) {
count++;
var code = new Code({
_id: randomStr(9),
isActivated: 0
});
code.save(function(){
callback();
});
},
function (err) {
res.render('index');
}
);
});
UPDATE 2: I've tested the freakish's advice and made my app work faster:
var MongoDB = require("mongodb");
app.get('/api/codes', function(req, res) {
var MongoClient = require('mongodb').MongoClient
, format = require('util').format;
MongoClient.connect('mongodb://127.0.0.1:27017/code', function(err, db) {
var collection = db.collection('codes');
var count = 0;
async.whilst(
function () { return count < 10; },
function (callback) {
count++;
var docs = [];
for (var i = 0; i < 100000; i++) {
docs.push({
_id: randomStr(9),
isActivated: 0
});
}
collection.insert(docs, function(err, docs) {
callback();
});
},
function (err) {
res.render('index');
}
);
})
});
Now it writes about 1M records in 60-70 seconds.
Thanks!

Resources