Wrapping nightmare in a promise / Processing one url at a time - node.js

I'm attempting to use nightmarejs to scrape information from a few websites. The problem that I'm running into is, I only want to open one window at a time and wait for it to close before processing the next url.
// index.js
var urls = // an array of urls.
var l = 10; // urls.length;
while (l--) {
// g.findById(id).then()....
// y.findById(id).then()....
UrlProcessing.findById(id).then(function(results) {
console.log(results);
});
}
Now the findByid:
//UrlProcessing.js
class UrlProcessing {
findById(id) {
var address = id;
return new Promise(function (resolve, reject) {
vo(function*(address) {
var nightmare = new Nightmare({show: true});
var link = yield nightmare
.goto(address)
.wait(2000)
.evaluate(function() {
return document.getElementsByTagName('html')[0].innerHTML;
});
yield nightmare.end();
return yield link;
})(address, function(err, res) {
if(err) reject(err);
resolve(res);
});
});
}
module.exports = UrlProcessing;
}
Any suggestions on how I can achieve this? I want to perform each findById from within the while loop.

Without modifying findById, you can simulate series- or waterfall-like behavior using reduce:
var urls = ['http://www.yahoo.com', 'http://example.com', 'http://w3c.org'];
urls.reduce(function(accumulator, url) {
return accumulator.then(function(results) {
return findById(url)
.then(function(result) {
results.push(result);
return results;
});
});
}, Promise.resolve([])).then(function(results){
//do what you need to do with the results
});
For completeness' sake, and because I had to make a couple of touchups, the findById method with my (slight) modifications:
function findById(address) {
return new Promise(function(resolve, reject) {
vo(function * (address) {
var nightmare = new Nightmare({
show: true
});
var link = yield nightmare
.goto(address)
.wait(2000)
.evaluate(function() {
return document.getElementsByTagName('html')[0].innerHTML;
});
yield nightmare.end();
return link;
})(address, function(err, res) {
if (err) reject(err);
resolve(res);
});
});
}
... all that being said, I'm not sure this approach is best. Why do you want them one at at a time in separate Nightmare instances? I realize this doesn't totally fit your original implementation, but this may be something you want to consider - you could change findById around to accept an array instead of a single URL and also (optionally) use the same Nightmare instance. Calling findById:
var urls = ['http://www.yahoo.com', 'http://example.com', 'http://w3c.org'];
findById(urls)
.then(function(results) {
//do what you need to do with the results
});
... and findById itself:
function findById(addresses) {
return new Promise(function(resolve, reject) {
vo(function * (addresses) {
var nightmare = new Nightmare({
show: true
});
var results = [];
for (var i = 0; i < addresses.length; i++) {
results.push(yield nightmare
.goto(addresses[i])
.wait(2000)
.evaluate(function() {
return document.getElementsByTagName('html')[0].innerHTML;
}));
}
yield nightmare.end();
return results;
})(addresses, function(err, res) {
if (err) reject(err);
resolve(res);
});
});
}
Of course, if you still wanted fresh Nightmare instances every time, you could move the constructor call and the call to .end() inside of the for loop.

You are doing everything almost correct, now all you need to do is, sequentialize the promises i.e chain them, you can take a look at this answer.
Just change your code to use reduce:
// index.js
urls.reduce( function(promise, url){
return promise.then(function(){
return url.findById(id);
}).then(function(results){
console.log(results);
});
}, Promise.resolve())
.then(function(){
console.log('All done');
});
in more condensed ES6 form, it would be:
urls.reduce( (p, url) => p.then(() => url.findById(id)).then(r => console.log(r)), Promise.resolve())
.then(() => console.log('All done') );

Related

Not able to return value from promise in Nodejs

I have written the following code in Nodejs which is saving data in MongoDB:
function insertDoc(db,data){
return new Promise(resolve => {
callback=db.collection('AnalysisCollection').insertOne(data).then(function(response,obj){
console.log("Inserted record");
resolve(obj);
//console.log(obj);
// response.on('end',function(){
// resolve(obj);
// });
//return resolve(obj);
}).then(() => { return obj }
).catch(function(error){
throw new Error(error);
});
})
}
I am calling the above function from the main function like this:
async function cosmosDBConnect(nluResultJSON){
try{
//console.log("Inserting to cosmos DB");
console.log(nluResultJSON);
var url = config.cosmos_endpoint;
var result="";
var data = JSON.parse(JSON.stringify(nluResultJSON));
MongoClient.connect(url, function(err, client) {
assert.equal(null, err);
var db = client.db('NLUAnalysisDB');
// insertDoc(db, data, function() {
result=insertDoc(db, data, function() {
console.log(result);
client.close();
//return data._id;
});
});
}
catch (e) {
console.log(e);
}
}
module.exports = { cosmosDBConnect };
But in cosmosDBConnect, I am getting 'undefined' for the result, though in insertDoc I am getting the output for'obj' with _id for the inserted record.
Please help me to return this _id to cosmosDBConnect.
You are use callbacks inside of async function, which creates internal scopes. So your return aplies to them instead of whole function. You should use Promise-based methods inside of async function using await (without callbacks) or wrap whole function into own Promise otherwise.
Example:
function cosmosDBConnect(nluResultJSON) {
return new Promise((resolve, reject) => {
var url = config.cosmos_endpoint;
var result = '';
var data = JSON.parse(JSON.stringify(nluResultJSON));
MongoClient.connect(url, function(err, client) {
if (err) return reject(err);
assert.equal(null, err);
var db = client.db('NLUAnalysisDB');
insertDoc(db, data).then(obj => {
console.log(obj);
client.close();
return resolve(data._id);
});
});
});
}
Also you need to understand that your insertDoc return Promise and do not accept callback you tried to pass.
Ref: async function
result = insertDoc(db, data).then((data) => {
console.log(data);
}).catch(err => console.error(err));

Asynchronous Results Assigned to a Sequential Array

I'm doing Exercise 9 of LearnYouNode. The goal of the exercise is to print the contents of the HTTP results in the order of the arguments given on the command line. Everything seems to be working correctly, but they are not staying in order. I realize that having the jobId inside the callback is wrong because it won't execute until it completes, but I'm still completely blocked on how to make it behave as intended. Just a FYI, I'm trying to do this without using Async or any other libraries for educational purposes. Also, any other tips based on my coding not related to my problem would be appreciated!
const http = require('http');
urls = process.argv.slice(2, process.argv.length);
function multiGetter (urlList, callback) {
var results = new Array(urlList.length);
var current = 0;
var completed = 0;
var hasErrors = false;
function done(err) {
if(err) {
hasErrors = true;
return callback(err);
}
if(++completed === urlList.length && !hasErrors) {
callback(null, results);
}
}
urls.forEach( (url) => {
http.get(url, (res) => {
let jobId = current;
current++;
results[jobId] = '';
res.setEncoding('utf8')
.on('error', (e) => { console.error(e.message); })
.on('data', (data) => { results[jobId] += data; })
.on('end', () => { done(null); });
}).on('error', console.error);
});
}
multiGetter(urls, (err, contents) => {
if (err) {
console.error;
}
contents.forEach(result => {
console.log(result);
});
});
One way of doing this could be the following:
change the results variable into an object instead of an array: var results = {};
assign to jobId the value of url instead of current (you can get rid of the current variable)
Finally, in your callback at the bottom, change the iteration to:
urls.forEach(url => {
console.log(contents[url]);
});

Get data after map function

not able to get items. it return [] . but it show correct on console.log(item). i think before my map() runs complete. it print all data. how to solve this issue. i am new in node.
function getBlockUsers() {
return new Promise(function (resolve, reject) {
BlockUser.find({userId:req.user._id}).populate("blockedId").lean().exec(function (err,result) {
if(err){
reject({"msg":"failed to getting block user."})
}else{
var results = [];
result.map(function(item){
Vehicle.findOne({userId:item.blockedId}).lean().exec(function(err,vehicle){
if(vehicle){
item.vehicleId = vehicle._id;
item.vehicleModel = vehicle.model;
}
results.push(item)
console.log(item)
});
});
resolve(results);
}
})
});
}
Because you use an async function in the map function wish is synchronous you need to create an array of promise and use Promise.all before the resolve to wait for all the results.
The code bellow should fix your issue.
function getBlockUsers() {
return new Promise(function (resolve, reject) {
BlockUser.find({userId:req.user._id}).populate("blockedId").lean().exec(function (err,result) {
if(err){
reject({"msg":"failed to getting block user."})
}else{
var results = result.map(function(item){
// don't forget to return in the map function
return new Promise(function (resolve1, reject1) {
Vehicle.findOne({userId:item.blockedId}).lean().exec(function(err,vehicle){
if (err) return reject1(err)
if(vehicle) {
item.vehicleId = vehicle._id;
item.vehicleModel = vehicle.model;
}
resolve1(item)
});
})
});
// here you wait all the promises of results
resolve(Promise.all(results));
}
})
});
}
The problem is you have non-blocking code inside your result.map().
You should try using just one DB query. Then resolve all the items in the exec callback. Otherwise use a promise for the original query.
Vehicle.find({ $in: { userId: result.map( item => item.blockedId) }}).lean().exec( (err, results) => {
// add the vehicle / model ids to each item in results
resolve(results)
})

Do something async with underscore map

function addSomething(data) {
var defer = q.defer();
data = _.map(data, function(item) {
item['something'] = callSomethingAsync();
return item;
});
return defer.promise;
}
How can I handle this problem. The only way I found is using Async.js.
But maybe there is a better way using $q?
EDIT:
function getScopes(item) {
var defer = q.defer();
var query = "SELECT somevalue FROM Something WHERE ID = '" + item.id + "'";
mysql.query(query, function(err, data) {
if (err) {
defer.reject(err);
} else {
item[newkey] = data
defer.resolve(item);
}
});
defer.resolve(data)
return defer.promise;
}
//add necessary scopes to the audit
function addScopes(data) {
var promises = _.map(data, function(item) {
return getScopes(item);
});
return Promise.all(promises);
}
How I can prevent using defer in the getScopes function?
Edit 2:
var query = "SELECT * FROM tiscope";
Q.nfcall(mysql.query, query).then(function(data) {
console.log(data);
});
there is nothing returned.
Here is how I use mysql:
var sql = require('mysql');
var connection = sql.createConnection({
host : 'xxx',
user : 'xxx',
password : 'xxx',
database : 'xxx'
});
connection.connect(function(err) {
if (err) {
console.error('error connecting: ' + err.stack);
} else {
console.log('mysql connection established');
}
});
module.exports = connection;
Maybe there is the mistake.
A lot of promise libraries provide a map function. Seems Q does not. No matter the the same can be accomplished with vanilla promises (and Q) anyway using the all function.
First things first. Avoid defer. It makes code more difficult to reason and maintain. There are only a few rare cases when defer is needed. The rest of the time a normal promise constructor/helper functions will work better.
Normal Promises Example
function addSomething() {
var promises = _.map(data, function(item) {
return callSomethingAsync(item);
});
return Promise.all(promises);
}
Q Promises Example
function addSomething() {
var promises = _.map(data, function(item) {
return callSomethingAsync(item);
});
return $q.all(promises);
}
Presumably callSomethingAsync returns a promise. If not use the promise constructor pattern:
function toPromise(asyncFn, args) {
return new Promise(function (resolve, reject) {
function callback(err, result) {
if (err) {
reject(err);
} else {
resolve(result);
}
}
asyncFn(callback, args);
});
}
function addSomething() {
var promises = _.map(data, function(item) {
return toPromise(callSomethingAsync, item);
});
return Promise.all(promises);
}

Chaining multiple chained promises with Q (loopback app)

Here is a scenario, I've implemented a loopback remote method which imports some data from REST connector to local postgresql connector.
I can do this for a single model
var importData = function (model, cb) {
migrateModel(model, cb)
.then(findImportInfo)
.then(fetchRemoteData)
.then(processFetchedData)
.then(updateImportInfo)
.then(countLocalData)
.then(importCompleted)
.catch(function (err) {
importFailed(err, cb);
})
.done(function () {
console.log('done');
});
};
So the chain does many thing and at the end importCompleted calls the provide cb which is the callback that returns the response to the REST API.
But I can't figure how to do this with multiple models and return each result. I tried something like this, it works actually but REST API never receives a result.
var importDataAll = function (app, cb) {
var models = app.models();
var deferred = Q.defer();
var promises = [];
var results = [];
function doCallback() {
cb(null, results);
}
models.forEach(function (model) {
if (typeof model.importData === 'function') {
migrateModel(model, model.definition.name, null)
.then(findImportInfo)
.then(fetchRemoteData)
.then(processFetchedData)
.then(updateImportInfo)
.then(countLocalData)
.then(function (prevResult) {
var deferred = Q.defer();
var remoteCount = prevResult.dataCount;
var localCount = prevResult.recordCount;
var result =
{
'time': new Date(),
'remoteCount': remoteCount,
'localCount': localCount,
'started': prevResult.started,
'completed': new Date()
}
results.push(result);
deferred.resolve(result);
return deferred.promise;
})
.catch(function (err) {
promises.reject(err);
})
}
});
return Q.allSettled(promises).then(doCallback);
};
I'm lost at that point, any ideas?
EDIT
Trying #Otze's answer I tried this also
var importDataAll = function (app, cb) {
var models = app.models().filter(function (element, index, array) {
return typeof element.importData === 'function';
});
var promises = models.map(function (model) {
migrateModel(model, model.definition.name, null)
.then(findImportInfo)
.then(fetchRemoteData)
.then(processFetchedData)
.then(updateImportInfo)
.then(countLocalData)
.then(importResult)
.catch(function (err) {
promises.reject(err);
})
});
Q.all(promises)
.then(function (resolvedPromises) {
cb(null, results);
});
};
But the result is the same, cb gets called early but the code actually runs in order. I just can't get the result to the response. I think it's never ends so the REST API gets no content after some time.
Have a look at Q.all or any of the other promise combination functions:
http://documentup.com/kriskowal/q/#combination
With Q.all you could do somehting like this:
var promises = myModels.map(doAllThePromiseThings);
Q.all(promises)
.then(function(resolvedPromises) {
doStuff();
});
Note that you need to return a promise from doAllThePromiseThings.
Since .then returns a promise you can simply do:
.then(function (prevResult) {
return {
'time': new Date(),
'remoteCount': prevResult.dataCount,
'localCount': prevResult.recordCount,
'started': prevResult.started,
'completed': new Date()
};
})
instead of
.then(function (prevResult) {
var deferred = Q.defer();
var remoteCount = prevResult.dataCount;
var localCount = prevResult.recordCount;
var result =
{
'time': new Date(),
'remoteCount': remoteCount,
'localCount': localCount,
'started': prevResult.started,
'completed': new Date()
}
results.push(result);
deferred.resolve(result);
return deferred.promise;
})
I use bluebird library's map method to accomplish such use cases:
https://github.com/petkaantonov/bluebird/blob/master/API.md#mapfunction-mapper--object-options---promise
var Promise = require('bluebird');
var importDataAll = function (app, cb) {
var models = app.models().filter(function (element, index, array) {
return typeof element.importData === 'function';
});
Promise.map(
models,
function(model) {
return migrateModel(model, model.definition.name, null) // don't want to break the promise chain
.then(findImportInfo)
.then(fetchRemoteData)
.then(processFetchedData)
.then(updateImportInfo)
.then(countLocalData)
.then(importResult)
.then(function(){
...
return Promise.resolve(); // don't want to break the promise chain
});
},
{concurrency: 1}
)
.then(function () {
debug('finished working on all the models one-by-one');
cb(null);
})
.catch(function (err) {
cb(err);
});

Resources