I'm new into Promises.
I use Bookshelf.js as ORM.
I fetch a number of webpages, get person info (about actors) from those pages and add them into my database if they don't exist.
But there's a problem, even though console.log(name) returns actor names in the right order, my query checks for only one actor, the latest one, which is 9.
What's wrong here?
var entities = require("entities");
var request = require('request');
var cheerio = require('cheerio');
// create promisified version of request()
function requestPromise(options) {
return new Promise(function (resolve, reject) {
request(options, function (err, resp, body) {
if (err) return reject(err);
resolve(body);
});
});
}
var person = require('./models').person;
app.get('/fetch', function (req, res) {
var promises = [];
var headers = {
'User-Agent': req.headers['user-agent'],
'Content-Type': 'application/json; charset=utf-8'
};
for (var i = 1; i < 10; i++) {
promises.push(requestPromise({url: "http://www.example.com/person/" + i + "/personname.html", headers: headers}));
}
Promise.all(promises).then(function (data) {
// iterate through all the data here
for (var i = 0; i < data.length; i++) {
if ($ = cheerio.load(data[i])) {
var links = $("#container");
var name = links.find('span[itemprop="name"]').html(); // name
if (name == null) {
console.log("null name returned, do nothing");
} else {
name = entities.decodeHTML(name);
console.log(name); // returns names in the right order
// does this person exist in the database?
person.where('id', i).fetch().then(function (result) {
if (result) {
console.log(i + "exists");
} else {
console.log(i + " doesn't exist");
// returns "9 doesn't exists" 9 times instead of
// checking each ID individually, why?
}
});
}
} else {
console.log("can't open");
}
}
}, function (err) {
// error occurred here
console.log(err);
});
});
EDIT #2
Now the order is broken and my ID's aren't the same with the site's I fetch data from. I see ID's like 11 and 13 even though I iterate from 1 to 5 and it seems to overrule something since it adds duplicate entries.
Here's what I'm trying to do in a nutshell. "Visit these urls in order and add the data you fetch (e.g. names) in the same order (id1 = name1; id2 = name2, etc) to the database".
app.get('/fetch', function (req, res) {
var promises = [];
var headers = {
'User-Agent': req.headers['user-agent'],
'Content-Type': 'application/json; charset=utf-8'
};
for (var i = 1; i < 5; i++) {
promises.push(requestPromise({url: "http://example.com/person/ + i + "/personname.html", headers: headers}));
}
Promise.all(promises).then(function (data) {
// iterate through all the data here
data.forEach(function (item, i) {
var $ = cheerio.load(item);
var name = $("#container span[itemprop='name']").text();
if (!name) {
console.log("null name returned, do nothing");
} else {
// name exists
person.where('id', i).fetch({require: true}).then(function (p) {
console.log(i + " exists");
}).catch(function () {
console.log(i + " does not exist");
new person({id: i, name: name}).save(null, {method: 'insert'}).then(function () {
console.log("success" + i);
});
});
}
}, function (err) {
// error occurred here
console.log(err);
});
});
});
When you run your code through jshint, you will see a warning that says
Don't make functions within a loop.
In this piece of code the callback inside then does not run in sync with the enclosing for loop. It runs whenever the database has fetched your result.
person.where('id', i).fetch().then(function (result) {
if (result) {
console.log(i + "exists");
} else {
console.log(i + " doesn't exist");
}
});
Therefore, when that callback runs eventually, the loop has long finished. Your callback function holds a reference to the loop counter i - which, by now, has the value 9.
It's better to use a function that accepts a parameter than to refer to a loop counter.
Luckily node makes this easy, you can use the forEach array function:
data.forEach(function (item, i) {
var $ = cheerio.load(item);
var name = $("#container span[itemprop='name']").text();
if (!name) {
console.log("null name returned, do nothing");
} else {
console.log("successfully scraped name: " + name);
person.where('id', i).fetch({require: true}).then(function (p) {
console.log(i + " exists");
}).catch(function () {
console.log(i + " does not exist");
});
}
});
Note that you can make Bookshelf.js throw instead of silently passing over non-existing records with {require: true}.
More generally speaking, I don't see a real connection between scraping the name from a website and retrieving a model from the database. These two things should probably be done in separate functions that each return an individual promise for the respective thing. That way, requests to the database can run in parallel with requests to the web server.
It looks like you need a closure for person.where('id', i).fetch().
also use node-fetch instead of hand rolling request-promise.
Related
I am trying to finish my script, but for some reason i don't know, it refuses to execute in the order i put it in.
I've tried placing a 'wait' function between the JoinRequest update function and the following code, but when run, it acts as if the function call and wait function were the other way round, countering the point of the wait().
const Roblox = require('noblox.js')
var fs = require('fs');
var joinRequests = []
...
function wait(ms) {
var d = new Date();
var d2 = null;
do { d2 = new Date(); }
while(d2-d < ms*1000);
};
...
function updateJReqs() {
Roblox.getJoinRequests(4745601).then((array) => {
var i;
var final = [];
for(i = 0; i < array.length; i++) {
final.push(array[i].username);
};
if(final === '') {
final = '-None';
};
joinRequests = final
console.log('Updated join requests.')
});
}
function check() {
setTimeout(() => {
fs.readFile('Request.txt',encoding = 'utf-8', function(err, data) {
if (err) {
check();
} else {
updateJReqs(); //for some reason this function is executed alongside the below, not before it.
// Tried putting wait(x) in here.
console.log('Request received: ' + data)
var solution = joinRequests
console.log('Fuffiling request with ' + solution)
fufillRequest(solution)
fs.unlink('Request.txt', function(err) {
if(err) throw err;
});
check();
}
});
}, 400)
}
check();
The script is supposed to wait until a file is created (accomplished), update the list of join requests (accomplished) and then create a new file with the list of join requests in(not accomplished).
if I understand your code you work with async code, you need to return a promise in updateJReqs and add a condition of leaving from the function because you have an infinite recursion
function updateJReqs() {
return new Promise(resolve => {
Roblox.getJoinRequests(4745601).then((array) => {
var i;
var final = [];
for(i = 0; i < array.length; i++) {
final.push(array[i].username);
};
if(final === '') {
final = '-None';
};
joinRequests = final
console.log('Updated join requests.')
resolve();
});
}
}
async function check() {
setTimeout(() => {
fs.readFile('Request.txt',encoding = 'utf-8', function(err, data) {
if (err) {
await check();
} else {
await updateJReqs();
// Tried putting wait(x) in here.
console.log('Request received: ' + data)
var solution = joinRequests
console.log('Fuffiling request with ' + solution)
fufillRequest(solution)
fs.unlink('Request.txt', function(err) {
if(err) throw err;
});
// you dont have an exit from your function check();
return 'Success';
}
});
}, 400)
}
check().then(res => console.log(res));
I have an API that i tweaked to make 2 calls to my DB and pull down information .. I can see the results in my console log so i know it is working
The next part is when it renders the view i need to show the results in two places
Here is the code for the API that makes 2 calls to the DB
function apples(req, res, next) {
sql.connect(config, function () {
var request = new sql.Request();
request.query("select price from table WHERE fruit = 'apples'", function(err, recordsetapples) {
var arrayLength = recordsetapples.length;
for (var i = 0; i < arrayLength; i++) {
console.log(recordsetapples[i]["price"]);
};
res.render('index', { resultsapples: recordsetapples });
return next();
});
});
};
function pear(req, res, next) {
sql.connect(config, function () {
var request = new sql.Request();
request.query("select price from table WHERE fruit = 'pear'", function(err, recordsetpear) {
var arrayLength = recordsetpear.length;
for (var i = 0; i < arrayLength; i++) {
console.log(recordsetpear[i]["price"]);
};
res.render('index', { resultspear: recordsetpear });
next();
});
});
};
app.get('/fruit', apples, pear);
So after that runs I can see the price print in console log .. Then i see this error
Cannot read property 'length' of undefined
What i expect to see if the price appear ... To get that info i have this code
tr
th.hidden-phone Fruit
th.hidden-phone Price
tr
each val in resultsapples
td.hidden-phone Apples
td.hidden-phone !{val.price}
tr
each val in resultspear
td.hidden-phone Pears
td.hidden-phone !{val.price}
The problem is your view expects both lists at the same time but you attempt to render the view twice with each list separately, which means in either scenario one list in the view will be undefined.
Even if you were to fix this, this approach won't work anyway because after the first res.render the HTTP response will end and return to the client. Ideally you would want to make one trip to the DB for both resultsets and then render the view e.g.
sql.connect(config, () => {
const request = new sql.Request();
request.query("select price from table WHERE fruit = 'apples' OR fruit = 'pear'", (err, result) => {
res.render('index', {
resultsapples: result.recordsets[0],
resultspear: result.recordsets[1]
});
});
});
As James mentioned your callbacks are async so you're trying to render the view twice. You also need some error handling in your sql functions
function apples(cb) {
sql.connect(config, function () {
var request = new sql.Request();
request.query("select price from table WHERE fruit = 'apples'", function(err, recordsetapples) {
if(err) {
return cb(err);
}
var arrayLength = recordsetapples.length;
for (var i = 0; i < arrayLength; i++) {
console.log(recordsetapples[i]["price"]);
};
cb(false, recordsetapples);
});
});
};
function pear(cb) {
sql.connect(config, function () {
var request = new sql.Request();
request.query("select price from table WHERE fruit = 'pear'", function(err, recordsetpear) {
if(err){
return cb(err)
}
var arrayLength = recordsetpear.length;
for (var i = 0; i < arrayLength; i++) {
console.log(recordsetpear[i]["price"]);
};
cb(false,recordsetpear);
});
});
};
app.get('/fruit', (req,res) => {
apples((appleerr,appleset) => {
if(appleerr){
//render error page
} else {
pear((pearerr, pearset) => {
if(pearerr) {
//render error page
} else {
return res.render('index', {
resultapples: appleset,
resultpears: pearset
});
}
})
}
});
});
Now for the record, I'm not a fan of nesting the callbacks like this so I would actually recommend you look at Promises and/or async/await but I'm not sure on your coding level so I didn't want to throw too many concepts at you at once.
Also whereas James has merged your SQL statements into one (which is probably the right approach for you) I kept them separate not knowing if you were reusing these individual pieces of code elsewhere and as such didn't want to combine them.
If you are interested in the promise implementation it might look as follows:
function apples() {
return new Promise((resolve,reject) => {
sql.connect(config, function () {
var request = new sql.Request();
request.query("select price from table WHERE fruit = 'apples'", function(err, recordsetapples) {
if(err) {
reject(err);
}
var arrayLength = recordsetapples.length;
for (var i = 0; i < arrayLength; i++) {
console.log(recordsetapples[i]["price"]);
};
resolve(recordsetapples);
});
});
};
function pear() {
return new Promise((resolve,reject) => {
sql.connect(config, function () {
var request = new sql.Request();
request.query("select price from table WHERE fruit = 'pear'", function(err, recordsetpear) {
if(err){
reject(err)
}
var arrayLength = recordsetpear.length;
for (var i = 0; i < arrayLength; i++) {
console.log(recordsetpear[i]["price"]);
};
resolve(recordsetpear);
});
});
});
};
app.get('/fruit', (req,res) => {
var applePromise = apples()
var pearsPromise = applePromise.then((appleSet)) {
return pear()
}
Promise.all([applePromise,pearsPromise]).then((([appleSet,pearSet]) => {
res.render('index', {
resultapples: appleSet,
resultpear: pearSet
});
}).catch((err) => {
//render error
})
});
I am trying to do dns.reverse() on a list of ip using async.parallel().
The code is as follows:
var functions = [];
for (var i = 0; i < data.length; i++) {
var ip = data[i].ip;
var x = function(callback) {
dns.reverse(ip, (err, hostnames) => {
if (err) {
log.error("Error resolving hostname for [" + ip + '] ' + err);
return callback(null, err);
}
callback(null, hostnames);
});
};
functions.push(x);
}
async.parallel(functions, (err, results) => {
for(var i = 0; i < data.length; i++) {
data[i]['hostnames'] = results[i];
}
handler(null, data);
});
What is happening is dns.reverse() is getting called with the same ip (the last one in data array) for all the calls. May be I am doing something wrong. Can somebody explain what is my mistake?
The first callback is executed after the entire for loop finished, because it's async.
The value of ip will be the one in the last iteration of the loop.
You could put some console.log to realize what's really happening.
The correct way of doing it might be:
async.parallel(data.map(({ ip }) => callback => {
dns.reverse(ip, callback)
}), (err, results) => {
for (var i = 0; i < data.length; i++) {
data[i]['hostnames'] = results[i];
}
handler(null, data);
})
Create a new array of functions based on each ip.
Every function will call it's callback as dns.reverse.
Also, it might be better to return a new data array, not changing data inside the loop:
(err, results) => {
const result = data.map((data, index) => ({
...data,
hostnames: results[index]
})
handler(null, result);
})
Thanks to #apokryfos I got a hint. To get the code working I just need to use let instead of var while declaring ip.
var functions = [];
for (var i = 0; i < data.length; i++) {
let ip = data[i].ip;
var x = function(callback) {
dns.reverse(ip, (err, hostnames) => {
if (err) {
log.error("Error resolving hostname for [" + ip + '] ' + err);
return callback(null, err);
}
callback(null, hostnames);
});
};
functions.push(x);
}
async.parallel(functions, (err, results) => {
for(var i = 0; i < data.length; i++) {
data[i]['hostnames'] = results[i];
}
handler(null, data);
});
For anybody interested in understanding following might be helpful: How do JavaScript closures work?
im trying to count how many people of every gender are there in a json list passed by the client with a POST request (on Node.js server). I have problems understanding javascript asynchronization, callbacks and closures.
What i want is:
getting a list from the client,
for every entry ask my collection if that is a m, a f or a u,
count how many fs, ms and us there are,
send an array to the client with the three values.
I always get "Cant set headers after they are sent" or similar errors due to async execution. I tried different callback orders and many different options.
This is how the functions on the server looks like:
app.post('/genderize', function(req, res){
createCounter("conto", req, function(req,contat ){
count(req, contat);
}).then(res.send( result ));
});
function createCounter( nome, req, callback ) {
result = [0,0,0];
var contatore = function(){
var m = 0;
var f = 0;
var u = 0;
addM = function(){ console.log( "m++ "+result[1]);result[1]++; };
addF = function(){ f++; };
addU = function(){ u++; };
getM = function(){ return this.m;};
getResult = function(){
console.log( result+ " * "+ getM() + " * " + this.u + " * "+ this.f );
return result;
};
return {
addM: addM,
addF: addF,
addU: addU,
getResult: getResult
};
}
callback( req, contatore() );
}
function count( req, counter ){
var collection = db.get('nomi');
var data = req.body.data;
data.forEach(function(value, i){
collection.find({ nome : req.body.data[i].name.split(" ")[0].toUpperCase() }, { fields: {_id:0, nome:0}}, function (err, docs) {
if (!isEmptyObject(docs)) {
docs = JSON.parse(JSON.stringify(docs));;
if(docs[0].sesso == "M"){
counter.addM();
} else {
counter.addF();
}
} else {
counter.addU();
}
});
});
}
There are several issues with this example, but the main thing that you missed is that when you perform your database query, the collection.find call will return immediately, but will only execute its callback (function(err, docs)) at some later time after the database has replied.
Here's a working rewrite:
app.post('/genderize', function(req, res) {
if (!req.body.data || req.body.data.length === undefined) {
return res.status(400).send('Invalid request body.');
}
countGenders(db.get('nomi'), req.body.data, function (err, genders) {
if (err) return res.status(500).send('Unable to process request.');
res.send([genders.M, genders.F, genders.U]);
});
});
function getGenderFromName(collection, name, next) {
collection.find({nome : name.split(" ")[0].toUpperCase()}, {fields: {_id:0, nome:0}}, function (err, docs) {
if (err) return next(err);
var gender = 'U';
if (docs && docs.length > 0) {
gender = (docs[0].sesso == "M") ? 'M' : 'F';
}
next(null, gender);
});
}
function countGenders(collection, data, next) {
var result = { M: 0, F: 0, U: 0 };
var series = function(i) {
if (i == data.length) return next(null, result);
getGenderFromName(collection, data[i].name, function(err, gender) {
if (err) return next(err);
result[gender]++;
series(i+1);
});
};
series(0);
}
Lets review the changes:
Removed the createCounter structure. No need for a heavy, get/set pattern for this simple example.
Checked for error values in every asynchronous callback
if (err) return next(err);
Within a route handler, typically you will want to end the request with a res.status(500).send(). In most other cases, return next(err) will 'bubble' the error up.
Moved the database query into a new function, getGenderFromName. It mostly retains your original code. This was optional, but substantially improves the readability of the count function.
Finally, rewrote the count function using an appropriate asynchronous iteration pattern, courtesy of http://book.mixu.net/node/ch7.html. Mixu gives a very easy to understand explanation of asynchronous node, give it a read.
An even better option would be use the excellent async module. You could rewrite the count method as
function countGenders(collection, data, next) {
var result = { M: 0, F: 0, U: 0 };
async.eachSeries(
data,
function (value, next) {
getGenderFromName(collection, value.name, function(err, gender) {
if (err) return next(err);
result[gender]++;
next();
});
},
function (err) { next(err, results); }
);
}
Async includes lots of different control flow methods to use, not just simple iterations.
Here is a better way to do this. This really cleans up the asynchronous nature of javascript. Checkout the async library that I am using here.
var collection = db.get('nomi');
var async = require('async');
app.post('/genderize', function(req, res){
let countingObject = {
females: 0,
males: 0,
unknown: 0
};
async.each(req.body.data, function(name, callback) {
collection.findOne({ nome : name.split(" ")[0].toUpperCase() }, { fields: {_id:0, nome:0}}, function (err, nameObject) {
//instead, maybe check if it is male, female, or otherwise mark as unknown?
if (!isEmptyObject(nameObject)) {
//this object probably has getters that you could use instead
nameObject = JSON.parse(JSON.stringify(nameObject));
if(nameObject.sesso == "M"){
countingObject.males++;
} else {
countingObject.females++;
}
} else {
countingObject.unknown++;
}
callback();
});
}, function() {
res.setHeader('Content-Header', 'application/json');
res.send(JSON.stringify(countingCallback));
});
});
I have 2 async methods that can run independently one from each other. I would like to call a callback once both are finished. I have tried using async.parallel() (npm) but this seems to be for non async methods. How can I implement this?
Here is my async.parallel call(); note that asyncTasks is my function array, where the functions are async.
async.parallel(asyncTasks, function(resultFinal){
console.log("--------->>>>> message: "+JSON.stringify(resultFinal));
console.log("");
callback(new RequestResponse(true, resultFinal));
});
In short, what I really want is a way to execute multiple async methods in parallel and consider that method finished when the callback provided for that function is triggered.
UPDATE
for a better understanding, I've included the two functions I am using
getGroups
var getGroups = function (callback_async_1) { //get groups + members
pg.connect(datebasePath, function (err, client, done) {
var s = squel.select();
s.from("groups_members");
s.where("user_id = ?", userId);
console.log("query: " + s.toString());
client.query(s.toString(), function (err, result) { //get groups ids in which i am a member
if (err) {
console.error("error...1 " + err);
callback_async_1(responseData);
} else {
// console.log("init -- get from group_members " + JSON.stringify(result.rows));
var groupIds = [];
if (result.rows.length > 0) {
for (var i = 0; i < result.rows.length; i++) {
groupIds.push(result.rows[i].group_id); // create group ids list
}
// console.log("group ids : " + groupIds);
}
if (groupIds.length === 0) {
callback_async_1(responseData);
}
var s = squel.select();
s.from("groups");
s.where("id IN ?", groupIds);
client.query(s.toString(), function (err, result2) { // retrieve all the groups in which i take part
if (err) {
console.error("error...2 " + err);
callback_async_1(responseData);
return;
} else {
// var groupIds2 = [];
// console.log("init -- get from groups " + JSON.stringify(result2.rows));
var groups = [];
// var groups_members = [];
for (var i = 0; i < result2.rows.length; i++) {
groups.push(result2.rows[i]); // adding group info to list
// var groupId = result2.rows[i].id;
// groupIds2.push(groupId);
}
// console.log("");
//console.log(" ------->>>> " + JSON.stringify(groups));
// console.log("");
// responseData.groups = groups;
responseData.push({ //pushing groups into response
"groups": groups
});
var s = squel.select();
s.from("groups_members");
s.where("group_id IN ?", groupIds);
client.query(s.toString(), function (err, result3) { // get all the members in my groups
//console.log("get from group_members --- " + JSON.stringify(result3.rows));
var groupMembers = [];
for (var i = 0; i < result3.rows.length; i++) {
groupMembers.push({
groupMember: result3.rows[i] // pushing all the group members
});
}
//console.log("");
// console.log(" ------->>>> " + JSON.stringify(groupMembers));
// console.log("");
responseData.push({
"groupsMembers": groupMembers
});
// console.log("resulting json till now; Groups : " + JSON.stringify(responseData));
//fetching now events
var s = squel.select();
s.from("events");
s.where("group_id IN ?", groupIds);
client.query(s.toString(), function (err, result4) { //selecting all events that have my groups
if (err) {
console.error("error...3 " + err);
callback_async_1(responseData);
return;
} else {
var events = [];
for (var i = 0; i < result4.rows.length; i++) {
events.push(result4.rows[i]);
}
// responseData.events = events;
responseData.push({
"events": events
});
//responseData.push (events);
callback_async_1(responseData);
// asyncTasks[1](callback);
}
});
});
}
});
}
});
done();
});
};
getRegisteredContacts
var getRegisteredContacts = function (callback_async_2) { // get registered contacts
pg.connect(datebasePath, function (err, client, done) {
//get contacts that are registered
var s = squel.select();
s.from("users");
s.where("phone_number IN ?", arrayOfContacts);
client.query(s.toString(), function (err, result5) { // retriving registered contacts -- should be run with async parallel, it does not depend on the other calls
if (err) {
console.error(err);
callback_async_2(responseData);
} else {
if (result5.rows.length > 0) {
var contacts = [];
for (var i = 0; i < result5.rows.length; i++) {
contacts.push(result5.rows[i]);
}
responseData.push({
"registeredContacts": contacts
});
}
//console.log("");
//console.log(" ------->>>> " + JSON.stringify(events));
// console.log("");
// console.log("final ---> " + JSON.stringify(responseData));
callback_async_2(responseData);
}
});
done();
});
};
You need your task function to take a parameter which you then call when the task is done
var task = function(callback){
console.log('Task');
callback(null);
};
When you are then doing something async within the task then your task would look like
var task = function(callback){
console.log('Task');
request.get('http://www.google.com', function (error, response, body){
console.log('Task - ' + response.statusCode);
callback(null);
});
};
Example
var async = require('async');
var request = require('request');
var task1 = function(callback){
console.log('Task 1');
callback(null);
};
var task2 = function(callback){
console.log('Task 2');
request.get('http://www.google.com', function (error, response, body){
console.log('Task 2 - ' + response.statusCode);
callback(null);
});
};
var asyncTasks = [task1, task2];
async.parallel(asyncTasks, function(err, result){
console.log('--DONE--');
});
Outputs
Task 1
Task 2
Task 2 - 200
--DONE--
Based on your new code listing the most obvious thing is done() is called too early for both of your tasks. It needs to be like
var getRegisteredContacts = function (callback_async_2) {
pg.connect(datebasePath, function (err, client, done) {
var s = squel.select();
s.from("users");
s.where("phone_number IN ?", arrayOfContacts);
client.query(s.toString(), function (err, result5) {
done(); // <---- done() to be here
if (err) {
//
} else {
//
}
callback_async_2();
});
});
};
You should also lint your code. If you had you would have noticed that you had not checked if there was an err for callback pg.connect (and also keep it nicer to read correctly)