I am trying to write a code with NodeJS where I grab data from an external API and then populate them in MongoDB using Mongoose. In between that, I'll check to see if that particular already exists in Mongo or not. Below is my code.
router.route('/report') // the REST api address
.post(function(req, res) // calling a POST
{
console.log('calling report API');
var object = "report/" + reportID; // related to the API
var parameters = '&limit=100' // related to the API
var url = link + object + apiKey + parameters; // related to the API
var data = "";
https.get(url, function callback(response)
{
response.setEncoding("utf8");
response.on("data", function(chunk)
{
data += chunk.toString() + "";
});
response.on("end", function()
{
var jsonData = JSON.parse(data);
var array = jsonData['results']; // data is return in array of objects. accessing only a particular array
var length = array.length;
console.log(length);
for (var i = 0; i < length; i++)
{
var report = new Report(array.pop()); // Report is the schema model defined.
console.log('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^');
console.log(i);
console.log('*****************************');
console.log(report);
console.log('*****************************');
// console.log(report['id']);
/*report.save(function(err)
{
if(err)
res.send(err);
});*/
Report.find({id:report['id']}).count(function(err, count) // checks if the id of that specific data already exists in Mongo
{
console.log(count);
console.log('*****************************');
if (count == 0) // if the count = 0, meaning not exist, then only save
{
report.save(function(err)
{
console.log('saved');
if(err)
res.send(err);
});
}
});
};
res.json({
message: 'Grabbed Report'
});
});
response.on("error", console.error);
});
})
My problem is that since NodeJS callbacks are parallel, it is not getting called sequentially. My end result would be something like this :
Calling report API
console.log(length) = 100
^^^^^^^^^^^^^^^^^^^^^^^^
console.log(i) = starts with 0
*******************************
console.log(report) = the data which will be stored inside Mongo
*******************************
number 3 - 7 repeats 100 times as the length is equals to 100
console.log(count) = either 0 or 1
number 9 repeats 100 times
console.log('saved')
number 11 repeats 100 times
Lastly, only the last out of 100 data is stored into Mongo
What I need is some sort of technique or method to handle these callbacks which are executing one after the other and not sequentially following the loop. I am pretty sure this is the problem as my other REST APIs are all working.
I have looked into async methods, promises, recursive functions and a couple others non which I could really understand how to solve this problem. I really hope someone can shed some light into this matter.
Feel free also to correct me if I did any mistakes in the way I'm asking the question. This is my first question posted in StackOverflow.
This problem is termed as the "callback hell".
There's lots of other approaches like using Promise and Async libraries you'll find.
I'm more excited about the native async ES7 will bring,
which you can actually start using today with transpiler library Babel.
But by far the simplest approach I've found is the following:
You take out the long callback functions and define them outside.
router.route('/report') // the REST api address
.post(calling_a_POST)
function calling_a_POST(req, res) {
...
var data = "";
https.get(url, function callback(response) {
...
response.on("end", response_on_end_callback); // --> take out
response.on("error", console.error);
});
}
function response_on_end_callback() { // <-- define here
...
for (var i = 0; i < length; i++) {
var report = new Report(array.pop());
...
Report.find({ id: report['id'] })
.count(Report_find_count_callback); // --> take out
};
res.json({
message: 'Grabbed Report'
});
}
function Report_find_count_callback(err, count) { // <-- define here
...
if (count == 0) {
report.save(function(err) { // !! report is undefined here
console.log('saved');
if (err)
res.send(err); // !! res is undefined here
});
}
}
A caveat is that you won't be able to access all the variables inside what used to be the callback,
because you've taken them out of the scope.
This could be solved with a "dependency injection" wrapper of sorts to pass the required variables.
router.route('/report') // the REST api address
.post(calling_a_POST)
function calling_a_POST(req, res) {
...
var data = "";
https.get(url, function callback(response) {
...
response.on("end", function(err, data){ // take these arguments
response_on_end(err, data, res); // plus the needed variables
});
response.on("error", console.error);
});
}
function response_on_end(err, data, res) { // and pass them to function defined outside
...
for (var i = 0; i < length; i++) {
var report = new Report(array.pop());
...
Report.find({ id: report['id'] })
.count(function(err, count){
Report_find_count(err, count, report, res); // same here
});
};
res.json({ // res is now available
message: 'Grabbed Report'
});
}
function Report_find_count(err, count, report, res) { // same here
...
if (count == 0) {
report.save(function(err) { // report is now available
console.log('saved');
if (err)
res.send(err); // res is now available
});
}
}
When I execute the response_on_end function, I am getting the undefined:1 unexpected token u error.
I am pretty much sure it has something to do with this line: var jsonData = JSON.parse(data)
My response_on_end is as below: var jsonData = JSON.parse(data); // problem here
I realize I made an error here:
function calling_a_POST(req, res) {
...
var data = "";
https.get(url, function callback(response) {
...
//sponse.on("end", function(err, data){
response.on("end", function(err){ // data shouldn't be here
response_on_end(err, data, res);
});
response.on("error", console.error);
});
}
Another problem I could forsee, which actually may not arise here but still would be better to talk about anyways.
The data variable, since it's a string which is a primitive type unlike an object, it is "passed by value".
More info
It's better to wrap the variable in an object and pass the object, because objects in javascript are always "passed by reference".
function calling_a_POST(req, res) {
...
// var data = ""; //
var data_wrapper = {};
data_wrapper.data = {}; // wrap it in an object
https.get(url, function callback(response) {
...
response.on("data", function(chunk){
data_wrapper.data += chunk.toString() + ""; // use the dot notation to reference
});
response.on("end", function(err){
response_on_end(err, data_wrapper, res); // and pass that object
});
response.on("error", console.error);
});
}
function response_on_end_callback(err, data_wrapper, res) {
var data = data_wrapper.data; // later redefine the variable
...
for (var i = 0; i < length; i++) {
var report = new Report(array.pop());
...
You can use async library for controlling your execution flows. And there are also iterators for working with arrays.
Related
I have a problem with call back functions and loops in nodejs how can I do so that the response should be send after the call back functions execution completion
app.post('/data', function(req,res){
var send = []
for (var i =0; i < p_objReq.body.quantity; i++) {
Instruments.count({//condetion}, function(err, count){
//using count and other parameters I am generating the code which is unique
Instruments.find({id: 'rondom_generated_code'},function(err, instrumentsCount){
if(instrumentsCount.length == 0){
send.push(rondom_generated_code)
if(send.length == p_objReq.body.quantity)
p_objRes.json({success : true, data : send})
}
else{
Instruments.count({//condetion}, function(err, count){
//using count and other parameters I am generating the code which is unique
send.push(rondom_generated_code)
if(send.length == p_objReq.body.quantity)
p_objRes.json({success : true, data : send})
})
}
})
})
}
})
when i wrote like this its sending the same random code that is last generated one. I tried removing the whole thing and written in function and called back but its also not working
One solution is to use Q.js, which is one of Promise library. More APIs of Q.js, please refer to the link in it. Here is one sample codes may could help you, If I catch you correctly for your question.
var Promise = require('Q');
app.post('/data', function(req,res){
var send = [];
var p = function () {
var deferred = Q.defer();
Instruments.count({condetion}, function(err, count){
//using count and other parameters I am generating the code which is unique
if (err) {
deferred.reject(new Error(error));
} else {
send.push(randomnumber)
}
});
return deferred.promise;
}
var ps = [];
for (var i =0; i < p_objReq.body.quantity; i++) {
ps.push(p);
}
Q.all(ps).then(function(){ res.json({success : true, data : send}); });
});
Considering that my server.js looks almost like this. Just send you the relevant part. I did not receive anything from the query, I do have data in the database, and "sendNotification" is triggered by the jQuery function in the client. Everything works and since var notis = []; returns an empty value and is what is shows as response. I know I have to debug SQL and that's what I'm going to do but anyway want to be sure of this other things. So my questions are:
1) Is a right syntax for node.js, considering this async behavior? (which I still don't understand )
2) The query always should be inside of the "io.sockets.on('connection')" part?
connection = mysql.createConnection({
host: 'localhost',
user: '',
password: "",
database: 'table' //put your database name
}),
...
connection.connect(function(err) {
// connected! (unless `err` is set)
console.log(err);
});
…
var sqlquery = function(uID,vs){
var notis = [];
connection.query("SELECT * FROM notification WHERE kid = ? AND v = ? ORDER BY id DESC",[uID,vs])
.on("result", function (data){
return notis.push(data);
});
};
io.sockets.on('connection', function(socket) {
...
socket.on("sendNotification", function(data) {
var roomBName = data.room_name.replace("room-",""),
found = [];
var roomSelected = _.find(rooms, function (room) { return room.id == roomBName });
for (var person in people) {
for (var i = 0, numAttending = roomSelected.peopleAttending.length; i < numAttending; i++) {
if (people[person].name == roomSelected.peopleAttending[i]) {
found.push(person);
}
}
}
for (var i = 0, numFound = found.length; i < numFound; i++) {
**result = sqlquery(9,2);**
io.to(found[i]).emit('notification', result);
};
});
Your sqlquery() function will not accomplish anything useful. Because connection.query() is asynchronous, that means it provides the response sometime LATER after sqlquery() has already finished.
The only way in node.js to use an async result is to actually use it in the callback that provides it. You don't just stuff it into some other variable and expect the result to be there for you in other code. Instead, you use it inside that callback or you call some other function from the callback and pass it the data.
Here's one way, you could change your sqlquery() function:
var sqlquery = function(uID, vs, callback){
connection.query("SELECT * FROM notification WHERE kid = ? AND v = ? ORDER BY id DESC",[uID,vs])
.on("result", function (data){
callback(null, data);
});
// need to add error handling here if the query returns an error
// by calling callback(err)
};
Then, you could use the sqlquery function like this:
found.forEach(function(person, index) {
sqlquery(..., function(err, result) {
if (err) {
// handle an error here
} else {
io.to(person).emit('notification', result);
}
});
});
And, it looks like you probably have similar async issues in other places too like in connection.connect().
In addition to #jfriend00, this could be done with new ES6 feature Promise :
var sqlquery = function(uID, vs){
return new Promise(function(resolve, reject){
connection.query("SELECT * FROM notification WHERE kid = ? AND v = ? ORDER BY id DESC",[uID,vs])
.on("result", function (data){
resolve(data);
});
});
};
Now you can use it like :
found.forEach(function(person, index) {
sqlquery(...)
.then(function(result){
io.to(person).emit('notification', result);
});
});
I am trying to use getStat() from WebRTC's api to see if it provides any useful info measure latency and other video streaming data. The problem is that there's not much info of how to use it.
Even older existing examples are pretty rare but the api has changed since then.
For example, my set up:
peerconnection.getStats(function(stats) {
console.log(stats); } ));
This returns a RTCStatsResponse object with 2 functions
RTCStatsResponse {result: function, namedItem: function}
Trying to call that result() function returns an array of RTCStatsReport objects with type 'googLibjingleSession' for the 1st object and type 'googTrack' for the 2nd object. The other nameItem function is undefined when trying to call it
[RTCStatsReport, RTCStatsReport]
From what little info available (https://groups.google.com/forum/#!topic/discuss-webrtc/fpr4yn4-3sg), I would be getting alot more RTCStatObjects with more useful info than I am currently getting.
Does anyone have experience with using webrtc's getStats? I believe I may not be doing this correctly
The following solution works for me.
Creating peer connection
pc = new RTCPeerConnection(pc_config, pc_constraints);
Adding onaddstream handler
pc.onaddstream = onRemoteStreamAdded;
The handler itself
var onRemoteStreamAdded = function(event) {
attachMediaStream(remoteVideo, event.stream);
remoteStream = event.stream;
getStats(pc);
};
Pay attention on the getStats function called from the handler, the function is following
function getStats(peer) {
myGetStats(peer, function (results) {
for (var i = 0; i < results.length; ++i) {
var res = results[i];
console.log(res);
}
setTimeout(function () {
getStats(peer);
}, 1000);
});
}
The myGetStats function is a wrapper to make it possible universal in different browsers;
function myGetStats(peer, callback) {
if (!!navigator.mozGetUserMedia) {
peer.getStats(
function (res) {
var items = [];
res.forEach(function (result) {
items.push(result);
});
callback(items);
},
callback
);
} else {
peer.getStats(function (res) {
var items = [];
res.result().forEach(function (result) {
var item = {};
result.names().forEach(function (name) {
item[name] = result.stat(name);
});
item.id = result.id;
item.type = result.type;
item.timestamp = result.timestamp;
items.push(item);
});
callback(items);
});
}
};
Every second it will get statistics and print raw object into console log. You can parse the log and then change the code, getting necessary object's field.
I have an HTTP Get request and I want to parse the response and save it to my database.
If i call crawl(i) independentely i get good results. But i have to call crawl() from 1 to 2000.
I get good results but some responses seem to get lost and some responses are duplicates. I don't think I understand how to call thousands of asynchronous functions. I am using the async module queue function but so far I am still missing some data and still have some duplicates. What am I doing wrong here? Thanks for your help.
What i am crawling
My node functions :
function getOptions(i) {
return {
host: 'magicseaweed.com',
path: '/syndicate/rss/index.php?id='+i+'&unit=uk',
method: 'GET'
}
};
function crawl(i){
var req = http.request(getOptions(i), function(res) {
res.on('data', function (body) {
parseLocation(body);
});
});
req.end();
}
function parseLocation(body){
parser.parseString(body, function(err, result) {
if(result && typeof result.rss != 'undefined') {
var locationTitle = result.rss.channel[0].title;
var locationString = result.rss.channel[0].item[0].link[0];
var location = new Location({
id: locationString.split('/')[2],
name: locationTitle
});
location.save();
}
});
}
N = 2 //# of simultaneous tasks
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
q.drain = function() {
console.log('Crawling done.');
}
for(var i = 0; i < 100; i++){
q.push({url: 'http://magicseaweed.com/syndicate/rss/index.php?id='+i+'&unit=uk'});
}
[EDIT] WELL, after a lot of testing it seems that the service I am crawling cannot handle so many request that fast. Because when I do each requests sequentially, I can get all the good responses.
Is there a way to SLOW DOWN ASYNC queue method?
You should have a look at this great module, async which simplifies async tasks like this. You can use queue, simple example:
N = # of simultaneous tasks
var q = async.queue(function (task, callback) {
somehttprequestfunction(task.url, function(){
callback();
}
}, N);
q.drain = function() {
console.log('all items have been processed');
}
for(var i = 0; i < 2000; i++){
q.push({url:"http://somewebsite.com/"+i+"/feed/"});
}
It will have a window of ongoing actions and the tasks room will be available for a future task if you only invoke the callback function. Difference is, your code now opens 2000 connections immidiately and obviously the failure rate is high. Limiting it to a reasonable value, 5,10,20 (depends on site and connection) will result in a better sucess rate. If a request fails, you can always try it again, or push the task to another async queue for another trial. The key point is to invoke callback() in queue function, so that a room will be available when it is done.
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
You'are executing next task immediately after starting the previous one, in this way, the queue is just meaningless. You should modify your code like this:
// first, modify your 'crawl' function to take a callback argument, and call this callback after the job is done.
// then
var q = async.queue(function (task, next/* name this argument as 'next' is more meaningful */) {
crawl(task.url, function () {
// after this one is done, start next one.
next();
});
// or, more simple way, crawl(task.url, next);
}, N);
Another option if you want. Vanilla JS without fancy libraries.
var incrementer = 0;
var resultsArray = [];
var myInterval = setInterval(function() {
incrementer++
if(incrementer == 100){
clearInterval(myInterval)
//when done parse results array
}
//make request here
//push request result to array here
}, 500);
Invokes the function every half second. Easy way to force sync and exit after x requests.
I know I am a little late to the question, however here is a solution I wrote to slow down the number of requests when testing an api endpoint, using node 4 or node 5:
var fs = require('fs');
var supertest = require('supertest');
var request = supertest("http://sometesturl.com/api/test/v1/")
var Helper = require('./check.helper');
var basicAuth = Helper.basicAuth;
var options = Helper.options;
fs.readFile('test.txt', function(err, data){
var parsedItems = JSON.parse(data);
var urlparts = []
// create a queue
for (let year of range(1975, 2016)) {
for (var make in parsedItems[year]){
console.log(year, make, '/models/' + year + '/' + make)
urlparts.push({urlpart:'/models/' + year + '/' + make, year: year, make: make})
}
}
// start dequeue
waitDequeue();
// This function calls itself after the makeRequest promise completes
function waitDequeue(){
var item = urlparts.pop()
if (item){
makeRequest(item)
.then(function(){
// wait this time before next dequeue
setTimeout(function() {
waitDequeue();
}, 3000);
})
} else {
write(parsedItems)
}
}
// make a request, mutate parsedItems then resolve
function makeRequest(item){
return new Promise((resolve, reject)=>{
request
.get(item.urlpart)
.set(options.auth[0], options.auth[1])
.set(options.type[0], options.type[1])
.end(function(err, res) {
if (err) return done1(err);
console.log(res.body)
res.body.forEach(function(model){
parsedItems[item.year][item.make][model] = {}
});
resolve()
})
})
}
// write the results back to the file
function write(parsedItems){
fs.writeFile('test.txt', JSON.stringify(parsedItems, null, 4), function(err){
console.log(err)
})
}
})
A little late but I have found this works!
Using async you can slow down the queue by using whilst inside the task handler eg:
var q = async.priorityQueue(function(task, callback) {
// your code process here for each task
//when ready to complete the task delay it by calling
async.whilst( //wait 6 seconds
function() {
return count < 10;
},
function(callback) {
count++;
setTimeout(function() {
callback(null, count);
}, 1000);
},
function (err, n) {
// n seconds have passed
callback(); //callback to q handler
}
); //whilst
} , 5);
In a node.js / Mongoose project, I have a schema which contains references to external image files.
var PageSchema = new Schema({
title: String
, media: {
digest: String
, name: String
}
});
Those files have additional properties which are stored in the file itself: url, width, height, exif fields, etc. Those fields will need to be populated before the model being sent to res.render().
For some fields, things are synchronous and a virtual just does the job:
PageSchema.virtual('media.url').get(function () {
return appPaths.fileUrl(this.media);
});
However, width / height, or exif fields require async calls. I thought of using middleware to populate them, but this does not seem to work:
PageSchema.post('init', function(next) {
var media = this.media;
var fileName = filedb.absoluteFilePath(media);
im.identify(fileName, function(err, features) {
if (err) {
media.width = 0;
media.height = 0;
} else {
media.width = features.width;
media.height = features.height;
}
next();
});
});
What am I doing wrong? Is there a common design pattern for solving this kind of problem? (Other than duplicating this information in the database itself?)
The real problem here is that mongoose currently seems to have a wonky implementation of post callbacks. While pre('init',function(next){ ... }); works as you expect, post('init',function(next){ ... }); does not actually get passed a next function. In fact, the post init callback does not receive any arguments whatsoever when it is called.
As such, I usually write a wrapper for my query callbacks to make a sort of DIY middleware:
var setAsyncVirtuals = function(callback){
return function(err, docs){
if(err) return callback(err);
var i = done = docs.length;
if(i > 0)
while(i--){
(function(i){
var filename = getFilename();
im.identify(filename, function(err, features) {
if (err) {
docs[i].media.width = 0;
docs[i].media.height = 0;
} else {
docs[i].media.width = features.width;
docs[i].media.height = features.height;
}
done--;
if(done <= 0) callback(null, docs);
});
})(i); // bind i to hold value for async call
}
else callback(null, docs);
}
}
then
Page.find({}, setAsyncVirtuals(function(err,docs){
res.send(docs); // these have media.width & media.height assigned
}));