how call a callback function, after all the functions are finished?
All functions must start at the same time, and when all functions finish running, run the callback
function step_one(callback){
parse1site();
parse2site();
parse3site();
parse4site();
parse5site();
parse6site();
parse7site();
parse8site();
parse9site();
parse10site();
parse11site();
parse12site();
parse13site();
parse14site();
parse15site();
parse16site();
parse17site();
parse18site();
parse19site();
parse20site();
}
Example function
function parse1site(){
var URL = "https://site1.com";
needle.get(URL, function(error, response){
if (!error && response.statusCode == 200){
data["site1"] = response.body;
console.log("OK");
} else{
console.log("error");
}
});
}
I would change your usage of needle to the promise API and then use Promise.all
var p1 = needle('get', 'https://server.com/posts/12');
var p2 = needle('get', 'https://server.com/posts/13');
//...
Promise.all([p1,p2]).then((data)=>{
// here you will get the response of all of your requests in array data
});
Related
I've written a script in node using two different functions getPosts() and getContent() supplying callback within them in order to print the result calling a standalone function getResult(). The selectors defined within my script is flawless.
However, when I execute my script, It prints nothing. It doesn't throw any error either. I tried to mimic the logic provied by Neil in this post.
How can I make it a go?
I've written so far:
var request = require('request');
var cheerio = require('cheerio');
const url = 'https://stackoverflow.com/questions/tagged/web-scraping';
function getPosts(callback){
request(url, function (error,response, html) {
if (!error && response.statusCode == 200){
var $ = cheerio.load(html);
$('.summary .question-hyperlink').each(function(){
var items = $(this).text();
var links = $(this).attr("href");
callback(items,links);
});
}
});
}
function getContent(item,link,callback){
request(link, function (error,response, html) {
if (!error && response.statusCode == 200){
var $ = cheerio.load(html);
var proLink = $('.user-details > a').eq(0).attr("href");
callback({item,link,proLink});
}
});
}
function getResult() {
getPosts(function(item,link) {
getContent(item,link,function(output){
console.log(output);
});
});
}
getResult();
The link value that you receive from getPosts is a relative link which means that the request fails. You can extract the hostname inside its own variable and create the full URL from the hostname + the relative link.
const host = 'https://stackoverflow.com';
const url = '/questions/tagged/web-scraping';
// ...
function getContent(item,link,callback){
// Here we use the absolute URL
request(host + link, function (error,response, html) {
if (!error && response.statusCode == 200){
var $ = cheerio.load(html);
var proLink = $('.user-details > a').eq(0).attr("href");
callback({item,link,proLink});
}
});
}
I have a post method, whose request input is a list of newLink objects (newLink with attributes linkUrl and status)
I am using async.map for iterating over my URLs to check if the links are active or not.
newLinks contains links like {www.google.com,www.nourl.com,www.xyz.com} I am expecting like after all the request are processed and setting corresponding status as true or false, I want this to send using res.send(newLinks)
But the console is giving the below results: "www.google.com is up", then calling res.send(), then executing "www.nourl.com is up" and "www.xyz.com is up"
So basically here, after the first url request , my code below is executing the function outside the async loop. I thought async will only allow the next piece of code to execute only after all the urls are validated.
app.post('/myposturl', function(req , res){
var request = require('request');
let linkDetails= req.body.linkDetails;
var i = 0;
async.map(linkDetails, function(newLink, callback) {
var Url = "url";
var url = newLink.linkUrl;
var proxiedRequest = request.defaults({'proxy': Url});
proxiedRequest(url , function (error, response, body) {
if(error){
console.log('Err: '+ error);
}
if (!error) {
if(response.statusCode == 200 || response.statusCode == 201 ||
response.statusCode == 202){
console.log(url + ' is up!!');
newLink.isActive = true;
}
if(response.statusCode == 301 || response.statusCode == 302){
console.log(url + ' is redirecting us!!');
return false;
}
}
});
callback();
} , function(err, linkDetails) {
res.send(linkDetails);
});
//tried res.send here as well.
});
}
The callback of async.map should invoke inside proxiedRequest. What your code is doing now: invoke callback immediately before the proxiedRequest finished. Also return false; does not work in asynchronous function. You should return the new status like this callback(null, newLink). After all the request are processed, the newLinkDetails will be the array of all newLink.
Note, that since this function applies the iteratee to each item in parallel, there is no guarantee that the iteratee functions will complete in order.
If you need to keep the order, user mapSeries insted.
Please read the doc of async.map for more. Hope it helps.
app.post('/myposturl', function(req , res){
//other codes
async.map(linkDetails, function(newLink, callback) {
//other codes
proxiedRequest(url , function (error, response, body) {
if(error){
console.log('Err: '+ error);
callback(error);
//^ ^ ^ ^ ^
// Validation failed, return from here
}
else {
//some validation & set newLink.isActive
callback(null, newLink);
// ^ ^ ^ ^ ^
//return newLink status by invoking the callback
}
});
}, function(err, newLinkDetails) {
//err = if any Validation failed
// now all the request are processed,newLinkDetails is array of all newLink's
res.send(newLinkDetails);
});
});
Usually when using async.js, I follow these two principles:
Always call the callback at least AND at most once during your async function.
Call the callback only when the async function is complete OR if an error has occurred. If the latter occurs, call the callback passing the error AND stop further execution of the async function e.g. return callback(error)
I would revise your code as below:
var request = require('request');
app.post('/myposturl', function (req , res) {
async.mapSeries(req.body.linkDetails || [], function(newLink, callback) {
var Url = "url";
var proxiedRequest = request.defaults({ 'proxy': Url });
proxiedRequest(newLink.linkUrl, function (err, response, body) {
if (err)
return callback(err);
// I'm assuming you don't want to stop checking the links for bad status codes
if ([301, 302].indexOf(response.statusCode) > -1){
return callback(null, url + ' is redirecting us!!');
if ([200, 201, 202].indexOf(response.statusCode) == -1) {
return callback(null, url + ' came back with ' + response.statusCode);
console.log(url + ' is up!!');
newLink.isActive = true;
callback(null, newLink);
});
}, function (err, linkDetails) {
// when all links get checked, it will come down here
// or if an error occurs during the iteration, it will come down here
console.log(err, linkdetails);
res.send(linkDetails);
});
});
If you only want to get back active links, you may also want to check out async.filterSeries(). Here the callback would need to be passed a boolean in its second argument.
Basically what I wanted to do is wait and get the first two functions results and pass that value to 3rd function. Therefore with the Node Q module I tried the below code.
getAddressDetail("", 51.528308, -0.3817812).then(function (pickupLoc) {
return pickupLoc.location;
}).then(function (pickupLocation) {
var drop = getAddressDetail(, 51.528308, -0.3817812);
return [pickupLocation,drop.location];
})
.then(function (pickupLocation, dropLocation) {
console.log("#####" + pickupLocation +"$$$" + dropLocation)
})
.done();
EDIT
function getAddressDetail(location = "", lat, long) {
var deferred = Q.defer();
var getLocation = "";
if (location == '' || location == 'undefined') {
var url = 'https://maps.googleapis.com/maps/api/geocode/json?key={APIKEY}&latlng=' + lat + ',' + long + '&sensor=false';
request({
url: url,
json: true
}, function(error, response, body) {
if (!error && response.statusCode === 200) {
getLocation = body.results[0].formatted_address;
deferred.resolve({
'location': getLocation
});
//console.log("*******" + getLocation);
}
})
} else {
getLocation = location;
deferred.resolve({
'location': getLocation
});
}
return deferred.promise;
}
However this code doesn't return value which defined in the 2nd then code "dropLocation", it return as undefined. Do you see any issues in here?
Thanks in advance.
There are a couple of dodgy things happening in your code.
return [pickupLocation,drop.location] wil result in 1 parameter (an array) in the next handler, so instead of .then(function (pickupLocation, dropLocation) {}) you should use .then(function (results) {})
var drop = getAddressDetail(, 51.528308, -0.3817812);looks like it isn't a promise because you immediately get the location from the result. (return [pickupLocation,drop.location];) So why then not fetch this value in the next handler?
If getAddressDetail() DOES return a promise, just compose an array of promises and use the .spread() operation as the result of the first promise is not required in the second promise.
An example;
var promiseArray = [];
promiseArray.push(getAddressDetail("", 51.528308, -0.3817812));
promiseArray.push(getAddressDetail("", 51.528308, -0.3817812));
Q.spread(promiseArray, function(pickupLocation, drop){
// first parameter (pickupLocation) = result of first promise
// second parameter (drop) = result of second promise
});
In my main code, I do the following:
var module = require('./module')
module.FooA(module.FooB);
module.js contains the next code:
var request = require('request'); //using of npm "request"
exports.FooB = function(data){ /*operations on data here*/ };
exports.FooA = function(callback){
var url = some_link;
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
callback(body);
};
});
};
The issue is that apparently, callback(body) doesn't run even if the conditions meet. var result = request(url) followed by exports.FooB(result) does the job, but as far as I can see, obviously does not act like a callback, and would produce troubles.
What is the proper way of defining a callback function in such a case? Do I need at all, or it is actually synchronous and I missed to notice it?
Use first function callback params with error, this is an default in node.js core and is google for your project functions.
And like #ShanSan commend, use console.log, console.error or console.trace for debug.
Example:
var request = require('request'); //using of npm "request"
exports.FooB = function(error, data){ /*operations on data here*/ };
exports.FooA = function(callback){
var url = some_link;
request(url, function (error, response, body) {
if (error || response.statusCode != 200) {
// pass error to callback and if use return you dont need other code block bellow
console.error('Error in request', error);
return callback(error, null);
}
// here run if dont have errors
// if need more info use the console.log(request); or console.log(body);
// use error in first param in callback functions
callback(null, body);
});
};
I have var movieRecommendation which is being populated from data coming from Mongo DB. Issue is Mongoose Movie.findOne() call is asycn call which is not allowing me to get my final populated movieRecommendation which I need to send back as response.
exports.getRecommendation=function(req,res){
var movieRecommendation = [];
var id=req.params.id;
console.log('----- Get User Recommendation - ' + id);
var url = 'http://52.8.48.113:8080/recommender-server/recommender/v1/recommendations/'+id+'.do';
//make http get request
request({
url: url,
json: true
}, function (error, response, recommendations) {
// res.json(recommendations);
if (!error && response.statusCode === 200) {
recommendations.forEach(function(entry) {
**Movie.findOne({'id':parseInt(entry.itemId)},function(err, movieData){**
entry.movie = movieData;
movieRecommendation.push(entry);
//console.log('rec', movieRecommendation);
console.log(movieRecommendation.length);
});
});
}
console.log("====Final========"+movieRecommendation.length);
//Output = 0
});
res.json(movieRecommendation); // Here movieRecommendation is coming as black Array
};
Please let me know how I can get finally populated movieRecommendation var at end to make it available for response.
For this type of issues we can use Async library. To populate the data finally once all the operations done, we can use async.each collection from Async Library.
For example:
NOTE:Install Async by this command
npm install async to use async library
var async = require("async");
var recomenmendations = [{"data2" : "value2"} , {"data1" : "value2"}, {"data3" : "value3"}, {"data4" : "value4"} ]
var movieRecommendation = [];
async.each(recomenmendations,
function(recomenmendationItem, callback){
console.log("Here you can query the required data using current recomenmendations ITEM");
console.log(recomenmendationItem);
callback();
// Movie.find({'id':parseInt(recomenmendationItem.itemId)},function(err, movieData){
// recomenmendationItem.movie = movieData;
// movieRecommendation.push(entry);
// callback();
// });
},
function(err){
console.log("here you can send your resopnse");
console.log("This section will be executed once all the recomenmendations are processed");
//res.json(movieRecommendation)
}
);
You can query the mongoDB as shown with comment section. You should use callback() once all the operations performed for an iteration.
As I mentioned in one of my comments, use the callback passed to the iterator function and call it inside the Movie.findOne() callback. That way, async.each will know when each step has finished:
async.each(recomendations, function (recomendationItem, callback) {
Movie.findOne({'id':parseInt(entry.itemId)},function(err, movieData){
if (err) return callback(err); // if you have an error on you search, just pass it to the iterator callback
recommendationItem.movie = movieData;
movieRecommendation.push(recommendationItem);
callback();
});
}, function (error) {
if (error) return res.json ({ error: error }); // you should also check if an error ocurred
res.json(movieRecomendation);
});
Just to point out: you can also use async.eachSeries, that will just call the next step of your iteration when the previous one has returned (if that matters to you, but I think it's not your case though) and it has the same signature.
#Vivek Panday replace the following code inside your exports.getRecommendation function to get your expected output. We don't need to use the count variable if we use the callback function. And an important thing is we have to use callback(); once all the process done. I think you have not used callback function properly in The example you have worked out. Use the following code If there is any issue please let me know.
var async = require('async');
var request = require("request");
var movieRecommendation = [];
var id=req.params.id;
console.log('----- Get User Recommendation - ' + id);
var url = 'http://52.8.48.113:8080/recommender-server/recommender/v1/recommendations/'+id+'.do';
//make http get request
request({
url: url,
json: true
}, function (error, response, recommendations) {
if (!error && response.statusCode === 200) {
console.log('recommendation lenght '+ recommendations.length);
async.each(recommendations,
function(recommendationItem, callback){
Movie.findOne({'id':parseInt(recommendationItem.itemId)},function(err, movieData){
recommendationItem.movie = movieData;
movieRecommendation.push(recommendationItem);
//you have to use callback(); once all your process is done
callback();
});
},
function(err){
//you should use this function, this will be execute once all the process done
console.log(movieRecommendation);
console.log("finally callback");
res.json(movieRecommendation);
}
);
}
});
I have tried as per given suggestion above ..
var async = require("async");
var recomenmendations = [{"data2" : "value2"} , {"data1" : "value2"}, {"data3" : "value3"}, {"data4" : "value4"} ]
var movieRecommendation = [];
async.each(recomenmendations,
function(recomenmendationItem, callback){
console.log("Here you can query the required data using current recomenmendations ITEM");
console.log(recomenmendationItem);
// Movie.find({'id':parseInt(recomenmendationItem.itemId)},function(err, movieData){
recomenmendationItem.movie = movieData;
movieRecommendation.push(entry);
console.log("any data"); // line y
});
callback();
},
function(err){
console.log("here you can send your resopnse"); // line x
console.log("This section will be executed once all the
recomenmendations are processed");
//res.json(movieRecommendation)
}
);
But still face same issue line x is printing before line y ,which is making again same issue.
However I have tried something given below and achieved expected result .
exports.getRecommendation=function(req,res){
var movieRecommendation = [];
var id=req.params.id;
console.log('----- Get User Recommendation - ' + id);
var url = 'http://52.8.48.113:8080/recommender-server/recommender/v1/recommendations/'+id+'.do';
//make http get request
request({
url: url,
json: true
}, function (error, response, recommendations) {
// res.json(recommendations);
if (!error && response.statusCode === 200) {
console.log('recommendation lenght '+ recommendations.length);
// recommendations.forEach(function(entry) {
var count=0;
async.each(recommendations,function(recommendationItem){
// console.log(recommendationItem);
Movie.findOne({'id':parseInt(recommendationItem.itemId)},function(err, movieData){
recommendationItem.movie = movieData;
movieRecommendation.push(recommendationItem);
count ++;
console.log('final res length : ' + movieRecommendation.length);
console.log('final res length count : ' + count +' and item recomm lenght ' + recommendations.length );
if(count === recommendations.length){
console.log(' =====Final=====> here you can send your response =========' + movieRecommendation.length);
res.json(movieRecommendation);
}
});
// callback();
});
}
});
};
Still I am open for any feedback and suggestions.