Nightmare.js conditional browsing - node.js

I'm trying to understand how I should make a nightmare.js script using "if-then" logic. For example
var Nightmare = require('nightmare');
var nightmare = Nightmare({
show: true,
paths: {
userData: '/dev/null'
}
});
nightmare
.goto('http://www.example.com/')
.wait('h1')
.evaluate(function() {
return document.querySelector('title').innerText;
})
// here: go to url1 if title == '123' otherwise to url2
.end()
.then(function() {
console.log('then', arguments);
}).catch(function() {
console.log('end', arguments);
});
How do I make this script go to a different url depending on the result of evaluate?

Since Nightmare is thenable, you can return it from a .then() to chain it like you would ordinary Promises.
var Nightmare = require('nightmare');
var nightmare = Nightmare({
show: true,
paths: {
userData: '/dev/null'
}
});
nightmare
.goto('http://www.example.com/')
.wait('h1')
.evaluate(function() {
return document.querySelector('title')
.innerText;
})
.then(function(title) {
if (title == 'someTitle') {
return nightmare.goto('http://www.yahoo.com');
} else {
return nightmare.goto('http://w3c.org');
}
})
.then(function() {
//since nightmare is `then`able, this `.then()` will
//execute the call chain described and returned in
//the previous `.then()`
return nightmare
//... other actions...
.end();
})
.then(function() {
console.log('done');
})
.catch(function() {
console.log('caught', arguments);
});
If you want a more synchronous-looking logic, you may want to consider using generators with vo or co. For example, the above rewritten with vo:
var Nightmare = require('nightmare');
var vo = require('vo');
vo(function * () {
var nightmare = Nightmare({
show: true,
paths: {
userData: '/dev/null'
}
});
var title = yield nightmare
.goto('http://www.example.com/')
.wait('h1')
.evaluate(function() {
return document.querySelector('title')
.innerText;
});
if (title == 'someTitle') {
yield nightmare.goto('http://www.yahoo.com');
} else {
yield nightmare.goto('http://w3c.org');
}
//... other actions...
yield nightmare.end();
})(function(err) {
if (err) {
console.log('caught', err);
} else {
console.log('done');
}
});

Related

TypeError in Node.js application

I've copied this example program from Node.js textbook:
var MongoClient = require('mongodb').MongoClient;
var website = {
url: 'http://www.google.com',
visits: 0
};
var findKey = {
url: 'www.google.com'
}
MongoClient.connect('mongodb://127.0.0.1:27017/demo', { useNewUrlParser: true }, function(err, client) {
var db = client.db('demo');
if(err) throw err;
var collection = db.collection('websites');
collection.insert(website, function(err, docs) {
var done = 0;
function onDone(err) {
done++;
if(done < 4) return;
collection.find(findKey).toArray(function(err, results) {
console.log('Visits:', results[0].visits);
//cleanup
collection.drop(function() {
client.close();
});
});
}
var incrementVisits = {
'$inc': {
'visits': 1
}
};
collection.update(findKey, incrementVisits, onDone);
collection.update(findKey, incrementVisits, onDone);
collection.update(findKey, incrementVisits, onDone);
collection.update(findKey, incrementVisits, onDone);
});
});
It throws this error when I run it:
/Users/me/Documents/Beginning NodeJS/node_modules/mongodb/lib/utils.js:132
throw err;
^
TypeError: Cannot read property 'visits' of undefined
at /Users/me/Documents/Beginning NodeJS/update/2update.js:26:43
at result (/Users/me/Documents/Beginning NodeJS/node_modules/mongodb/lib/utils.js:414:17)
at executeCallback (/Users/me/Documents/Beginning NodeJS/node_modules/mongodb/lib/utils.js:406:9)
at handleCallback (/Users/me/Documents/Beginning NodeJS/node_modules/mongodb/lib/utils.js:128:55)
at self.close (/Users/me/Documents/Beginning NodeJS/node_modules/mongodb/lib/cursor.js:905:60)
at handleCallback (/Users/me/Documents/Beginning NodeJS/node_modules/mongodb/lib/utils.js:128:55)
at completeClose (/Users/me/Documents/Beginning NodeJS/node_modules/mongodb/lib/cursor.js:1044:14)
at Cursor.close (/Users/me/Documents/Beginning NodeJS/node_modules/mongodb/lib/cursor.js:1057:10)
at /Users/me/Documents/Beginning NodeJS/node_modules/mongodb/lib/cursor.js:905:21
at handleCallback (/Users/me/Documents/Beginning NodeJS/node_modules/mongodb-core/lib/cursor.js:199:5)
I can't see whats wrong here but the textbook is a few years old and I've already had issues where the code was out of date and wouldn't work so I want to check if that is the case here.
It's a pretty horrible example you are following, but basically there are errors there essentially compounded from http:///www.google.com which is created as the value in the document is different to www.google.com, therefore you don't get a result and it's undefined when trying to read a property from an empty array.
The basic corrections would be to fix that, and actually use findOneAndUpdate() in all cases, since that will atomically return a document.
var MongoClient = require('mongodb').MongoClient;
var website = {
url: 'http://www.google.com',
visits: 0
};
var findKey = {
url: 'http://www.google.com'
}
MongoClient.connect('mongodb://127.0.0.1:27017/demo', { useNewUrlParser: true }, function(err, client) {
var db = client.db('demo');
if(err) throw err;
var collection = db.collection('websites');
collection.findOneAndUpdate(
findKey, website, { upsert: true },function(err, doc) {
var done = 0;
function onDone(err,doc) {
done++;
console.log("Visits: %s", doc.value.visits);
if (done >= 4) {
collection.drop(function(err) {
client.close();
});
}
}
var incrementVisits = {
'$inc': {
'visits': 1
}
};
var options = { returnOriginal: false };
collection.findOneAndUpdate(findKey, incrementVisits, options, onDone);
collection.findOneAndUpdate(findKey, incrementVisits, options, onDone);
collection.findOneAndUpdate(findKey, incrementVisits, options, onDone);
collection.findOneAndUpdate(findKey, incrementVisits, options, onDone);
});
});
Note those "four" calls at the end do not resolve immediately. These simply queue up async functions and there is no guaranteed order to their execution.
But the script will return:
Visits: 1
Visits: 2
Visits: 3
Visits: 4
A much better and "modern" example would instead be:
const { MongoClient } = require("mongodb");
const uri = "mongodb://localhost:27017/";
const options = { useNewUrlParser: true };
const website = {
url: 'http://www.google.com',
visits: 0
};
const findKey = { url: 'http://www.google.com' };
(async function() {
try {
const client = await MongoClient.connect(uri,options);
const db = client.db('demo');
const collection = db.collection('websites');
await collection.insertOne(website);
var times = 4;
while (times--) {
let doc = await collection.findOneAndUpdate(
findKey,
{ $inc: { visits: 1 } },
{ returnOriginal: false },
);
console.log("Visits: %s", doc.value.visits);
}
await collection.drop();
client.close();
} catch(e) {
console.error(e);
} finally {
process.exit();
}
})()
Since we actually await each call executed in the while loop, we guarantee that these are actually executed sequentially. We also await everything, so the code is clean and ordered and we can just hang up the database connection when everything is done, without waiting on callbacks to resolve or other methods.
It seems you Mongo instance returns some kind of error, which makes the results parameter undefined. So, check for errors in the line before (which you should do anyway, but maybe with a more sophisticated error handling):
collection.find(findKey).toArray(function(err, results) {
// this is added
if( err ) {
console.log( err );
return;
}
console.log('Visits:', results[0].visits);
//cleanup
collection.drop(function() {
client.close();
});
});
Instead of
console.log('Visits:', results[0].visits);
Try printing out :
console.log('Visits:', results[0]);
so that from results[0] you can check if there exits a property 'visits'

NodeJS Promise and Async Problems (Firebase)

The problem is with the promises and the async function. "All moved" is supposed to be logged after everything in async.each is done. But nothing is ever logged.
Here is my exports functions:
var courier_id = data.ref.parent.key;
return admin.database().ref("firewall_queue/"+courier_id+"/orders").once('value',function(orders){
//console.log(Object.keys(orders.val()));
async.each(Object.keys(orders.val()), function (order, callback) {
if(order != "none") {
return moveToWaitingFromFirewall(order).then(callback())
}
},
function (err) {
console.log("All moved");
return admin.database().ref("/firewall_queue/"+courier_id+"/orders/").remove().then(()=>{
return pushToPending(courier_id,data.ref.key);
})
});
})
Here is my moveToWaitingFromFirewall function:
function moveToWaitingFromFirewall(order_id){
var order = {};
order.id = order_id;
var promises = [];
promises.push(new Promise((resolve) => {
admin.database().ref("orders/"+order_id+"/zone").once('value').then(function(zone){
order.zone = zone.val();
resolve();
})
}))
promises.push(new Promise((resolve) => {
admin.database().ref("orders/"+order_id+"/time_order_placed").once('value').then(function(time_order_placed){
order.time = time_order_placed.val();
resolve();
})
}))
//grab zone and time first
return Promise.all(promises).then(()=>{
return admin.database().ref(order.zone+"/wait_order_queue/"+order.id).set(order.time);
})
}
JSON Firebase
"c98" : {
"orders" : {
"0333" : 123123,
"0345" : 12,
"0911" : 123,
"none" : "none"
}
Study this a little bit, and maybe apply to your current code.
Imagine admin.database().ref("orders/"+order_id+"/time_order_placed").once('value') is like delay(time)
// let delay = time => new Promise(res=>setTimeout(res,time));
let delay = function(time){
return new Promise(function(resolve,reject){
setTimeout(function(){
resolve();
},time);
});
}
let myPromise = function(order){
return Promise.all([
delay(500),
delay(500),
delay(1000).then(function(){
console.log('Order complete: ',order);
return; // returns undefined, so cb doesn't pass anything to cb(err), but use ()=>cb() to avoid this anyways.
})
]);
}
let orders = [1,2,3];
async.each(orders,function(order,cb){
myPromise(order)
.then(()=>cb())
.catch(err=>cb(err));
},function(err,data){
if(err){
console.log('Err',err);
}else{
console.log('all Finished');
}
});
<script src="https://cdnjs.cloudflare.com/ajax/libs/async/2.6.0/async.js"></script>

Promises with SequelizeJS

I need to use promises with SequelizeJS but I want to make sure that my code is clean...
My code :
var items = request.body.items,
promises;
promises = _.map(items, function(item) {
return ProjectModel.findById(item.id).then(function(findedProject) {
ProjectPhotoModel.findAll({ where: { projectId: findedProject.id } }).then(function(findedPhotos) {
var photoPromises = _.map(findedPhotos, function(photo) {
fs.unlink('./public/uploads/' + photo.name, function() {
photo.destroy();
});
});
Promise.all(photoPromises);
});
fs.unlink('./public/uploads/' + findedProject.thumbnail, function() {
findedProject.destroy();
});
});
});
Promise.all(promises).then(function() {
response.send('');
});
How to use nested promises ?

nodejs - test failing but callback being called

I have a module which I export and which has a method editHeroImage which I am trying to test using mocha, chai and sinon. The modules has two objects that are passed as arguments, connection and queries. These are mySql objects, one containing the connection to the database and the other the query strings which are defined in their separate modules. The expObj which I am exporting and trying to test is a "helper" module.
I have successfully tested other methods of this module in the same way I am trying to test this method, but, however when I run into methods which use the async module for some reason, my tests no longer behave as expected. I wonder if I am missing something in this particular case, because I have tested other modules and methods which also use async and have not come across this behaviour.
When I run the tests, it logs "HELLO!" as expected but the assertion that the callbackSpy has been called, fails.
I am losing my mind here! Please help! What is going on? Could there be contamination between test suits?
Method under test:
expObj.editHeroImage = function(connection, queries, postId, postData, callback) {
async.waterfall([
function(next) {
var qString = queries.getSinglePostById();
connection.query(qString, [postId], function(err, results) {
if (err) {
return next(err);
}
if (!results.length) {
console.log('NO POST FOUND WITH ID ' + postId);
return callback();
}
next(null, results[0].hero_image);
});
},
function(heroImageId, next) {
if (!heroImageId) {
console.log('HERO IMAGE IS NEW - NEXT TICK!');
return next();
}
// Delete resized images of hero image
var queryStr = queries.deleteResizedImages();
var resizedVals = [heroImageId];
connection.query(queryStr, resizedVals, function(err) {
if (err) {
return callback(err);
}
console.log('DELETED RESIZED IMAGES OF HERO IMAGE ' + heroImageId);
var qString = queries.updateHeroImagePath();
var values = [postData.hero_image, heroImageId];
return connection.query(qString, values, function(err, results) {
if (err) {
return next(err);
}
console.log('UPDATED HERO IMAGE ' + heroImageId + ' WITH PATH ' + postData.hero_image);
next('break');
});
});
},
function addHeroImage(next) {
var qString = queries.insertImage();
var values = [postData.hero_image, postId];
connection.query(qString, values, function(err, results) {
if (err) {
return next(err);
}
next(null, results.insertId);
});
},
function addHeroImagePathToPost(heroImageId, next) {
var qString = queries.saveHeroImageId();
var values = [heroImageId, postId];
connection.query(qString, values, function(err) {
if (err) {
return next(err);
}
next();
});
}
], function(err) {
if (err && err !== 'break') {
return callback(err);
}
console.log('HELLO!');
callback(null);
});
};
Test, with set-up:
'use strict';
var chai = require('chai');
var sinonChai = require("sinon-chai");
var proxyquire = require('proxyquire');
var sinon = require('sinon');
chai.use(sinonChai);
var expect = chai.expect;
describe('HELPERS', function() {
var testedModule,
callbackSpy,
fakeConnectionObj,
fakeQueriesObj,
fakePost,
fakeSnakeCaseObj,
queryStub,
connectionStub,
manageStub,
fakeCamelCaseObj;
beforeEach(function() {
fakePost = {};
fakeConnectionObj = {};
fakeQueriesObj = {
getPostIdFromImage: function() {},
insertResizedImages: function() {},
createPost: function() {},
getPostImages: function() {},
getPostsAlternativesImages: function() {},
getSinglePostById: function() {},
getAllImages: function() {},
insertImage: function() {},
deleteMainImage: function() {},
deleteResizedImages: function() {},
updateHeroImagePath: function() {},
saveHeroImageId: function() {}
};
afterEach(function() {
queryStub.resetBehavior();
});
fakeSnakeCaseObj = {
sub_title: '123',
hero_image: '456'
};
fakeCamelCaseObj = {
subTitle: '123',
heroImage: '456'
};
callbackSpy = sinon.spy();
queryStub = sinon.stub();
manageStub = sinon.stub();
connectionStub = {query: queryStub};
testedModule = proxyquire('./../../../../lib/modules/mySql/workers/helpers', {
'./../../../factories/notification-service': {
select: function() {
return {manageSns: manageStub};
}
}
});
});
it('edits hero image', function() {
var _post = {
id: '123',
title: 'vf',
sub_title: 'vf',
slug: 'vf',
reading_time: 4,
created_at: '123',
published_at: '123',
deleted_on: false,
hero_image: 'hero_image_path'
};
var _postId = '123';
queryStub.onCall(0).callsArgWith(2, null, [{hero_image: '55'}]);
queryStub.onCall(1).callsArgWith(2, null);
queryStub.onCall(2).callsArgWith(2, null);
testedModule.editHeroImage(connectionStub, fakeQueriesObj, _postId, _post, function() {
console.log(arguments); // --> {'0': null} as expected
callbackSpy.apply(null, arguments);
});
expect(callbackSpy).has.been.calledWith(null);
});
});
Your assertion is probably executing before your async function has returned.
There are a number of ways to ensure your async functions have finished executing. The cleanest is to format your mocha test differently.
describe('...', function () {
var callbackSpy;
before(function () {
var _post = {
id: '123',
title: 'vf',
sub_title: 'vf',
slug: 'vf',
reading_time: 4,
created_at: '123',
published_at: '123',
deleted_on: false,
hero_image: 'hero_image_path'
};
var _postId = '123';
queryStub.onCall(0).callsArgWith(2, null, [{
hero_image: '55'
}]);
queryStub.onCall(1).callsArgWith(2, null);
queryStub.onCall(2).callsArgWith(2, null);
return testedModule.editHeroImage(connectionStub, fakeQueriesObj, _postId, _post, function () {
console.log(arguments); // --> {'0': null} as expected
callbackSpy.apply(null, arguments);
});
});
it('edits hero image', function () {
expect(callbackSpy).has.been.calledWith(null);
});
});
Notice that I have wrapped your assertion in a describe block so we can use before. Your actual logic for setting up stubs and executing the class has been moved to the before block and a return added, this ensures the async function is complete before moving on to your assertions.
Your other tests may have passed, but they will also be susceptible to this and it is purely a timing issue.
Indeed #Varedis was right about it being a timing issue. However using your suggestion of wrapping the assertion in a describe bloack and using the before function to set-up the test resulted in my stubs no longer working correctly. However taking your suggestion about timing into account I managed to solve the issue by using the done callback within my test suit. By keeping the set-up I made a slight change and my tests suddenly passed:
it('edits hero image', function(done) {
var _post = {
id: '123',
title: 'vf',
sub_title: 'vf',
slug: 'vf',
reading_time: 4,
created_at: '123',
published_at: '123',
deleted_on: false,
hero_image: 'hero_image_path'
};
var _postId = '123';
queryStub.onCall(0).callsArgWith(2, null, [{hero_image: '55'}]);
queryStub.onCall(1).callsArgWith(2, null);
queryStub.onCall(2).callsArgWith(2, null);
testedModule.editHeroImage(connectionStub, fakeQueriesObj, _postId, _post, function() {
callbackSpy.apply(null, arguments);
expect(callbackSpy).has.been.calledWith(null);
expect(callbackSpy).has.not.been.calledWith('FDgdjghg');
done();
});
});

web scrapy by nightmare about loop ,output is not same every time

var Nightmare = require('nightmare');
var nightmare = Nightmare({ show: true });
var fs = require('fs');
vo = require('vo');
var result;
nightmare
.goto('http://jufa-kyusyu.jp/teams/')
.wait(1000)
.evaluate(function () {
var options = document.querySelectorAll('option'),i;
var values =[]
for (i = 0; i < options.length; ++i) {
values.push(options[i].value)
}
return values;
})
.then(function (values) {
for (var i = 0; i < values.length; i++) {
if(values[i] == "#") values[i] = "/teams/181.html";
nightmare
.goto("http://www.jufa-kyusyu.jp"+values[i])
.evaluate(function () {
var abc = document.querySelector('iframe[class="autoHeight"]').src.toString()
return abc;
})
.then(function (result) {
console.log(result)
})
.catch(function (error) {
console.error('Search failed:', error);
});}
})
.catch(function (error) {
console.error('Search failed:', error);
});
I want to scrapy the web information by nightmarejs looply.I dont know why have two result link is same and the result is changed in running every time.thank you.
You have to be careful when working with async calls inside a loop with Nightmare
Check this answer and this detailed explanation about the concept.
The main idea can be sumarized by this sentence:
Executing the operations in series requires arranging them to execute
in sequential order
The documentation shows how to achieve that using plain, vanilla js and also with vo
Here is a sneak peek on how to solve this loop issue with plain Javascript:
var urls = ['http://example1.com', 'http://example2.com', 'http://example3.com'];
urls.reduce(function(accumulator, url) {
return accumulator.then(function(results) {
return nightmare.goto(url)
.wait('body')
.title()
.then(function(result){
results.push(result);
return results;
});
});
}, Promise.resolve([])).then(function(results){
console.dir(results);
});
Basically what you need to do is queue all your calls in a list and trigger them using Promise.resolve

Resources