Returning wrong URL - node.js

I am using phantomejs-node for facebook login . Here is my nodejs code :
var phantom = require('phantom');
phantom.create(function(ph) {
ph.createPage(function(page) {
page.open("https://facebook.com", function(status) {
setTimeout(function () {
page.evaluate((function(URL) {
document.getElementById("email").value = "wrong username";
document.getElementById("pass").value = "wrong password";
document.getElementById("u_0_1").click();
return document.URL;
}), function(result) {
console.log('Page url is ' + result);
ph.exit();
}, 5000);
});
});
//page.render("page2.png");
});
});
Instead of returning https://www.facebook.com/login.php?login_attempt=1 , its returning https://www.facebook.com/ . By the way here is Phantomjs code that I am following :
var page = require('webpage').create();
page.open("http://www.facebook.com/login.php", function(status) {
if (status === "success") {
page.evaluate(function() {
document.getElementById("email").value = "#gmail.com";
document.getElementById("pass").value = "";
document.getElementById("u_0_1").click();
});
window.setTimeout(function() {
var url = page.evaluate(
function () {
return document.URL;
}
);
console.log( "- current url is " + url );
page.render("page.png");
phantom.exit();
}, 5000);
}
});

Try this code:
var phantom = require('phantom');
phantom.create(function(ph) {
ph.createPage(function(page) {
page.open("https://facebook.com", function(status) {
page.evaluate((function() {
document.getElementById("email").value = "#gmail.com";
document.getElementById("pass").value = "password";
document.getElementById("login_form").submit();
return;
}), function() {
console.log("loaded");
setTimeout(function(){
page.evaluate(function () {
return document.URL;
},function(result){
page.render("page2.png",function(){
console.log("done rendering");
});
console.log("Page url is "+result);
ph.exit();
});
},6000)
});
});
});
});
Hope this is helpfull :)

If you're tired of callback hell you could also give phridge a try. I've written this bridge because I didn't want to wrap all assignments and function calls with callbacks. It stringifies the given function and runs it inside PhantomJS.
A-0-'s solution would look like:
var page;
// creates a new PhantomJS process
phridge.spawn()
.then(function (phantom) {
return phantom.openPage("https://facebook.com");
})
.then(function (p) {
page = p;
return page.run(function (resolve) {
// this function runs inside PhantomJS
var page = this;
page.evaluate(function () {
document.getElementById("email").value = "#gmail.com";
document.getElementById("pass").value = "password";
document.getElementById("login_form").submit();
});
setTimeout(function () {
page.render("page2.png");
resolve();
}, 6000);
});
})
.then(function () {
// page2.png rendered
});

Related

Test using Nock in a scheduleJob

I'm getting a problem with test the http request in a scheduleJob node. Well, I've changed the crontime to run every second as suggested here for easier testing. The problem is, I don't think the Nock that I created to test the http request works fine. Because when I do the console.log('res : ', res); it either returns undefined or does not print anything as shown here : enter image description here.
Does anyone have any idea how to solve this?
Index.js
schedule = require('node-schedule'),
var dateObj = Date.now();
dateObj += 1000;
schedule.scheduleJob(new Date(dateObj), function(){
console.log("get's here..");
const url_req = "https://git.ecommchannels.com/api/v4/users";
request.get(
{
url_req,
qs : {private_token: token, per_page: 10}
}, function(err, res, body){
console.log('res : ', res);
var total = res.headers['x-total-pages']
console.log('res.headers', res.headers);
for( i=1; i<=total; i++ ){
url_req2 = "https://git.ecommchannels.com/api/v4/users";
request.get({
url_req2,
qs: {private_token : token, per_page : 10, page : i},
json : true
},
(err2, res2, body2) => {
body2.forEach(element => {
if(condition){
//do something
}
})
})
}
})
});
The unit test:
describe('test the http request', function(){
var clock;
const nockingGit = () =>{
nock('https://git.ecommchannels.com')
.defaultReplyHeaders({
'x-total-pages': 10
})
.get('/api/v4/users')
.query({private_token: 'blabla', per_page: 10})
.reply(201, {});
}
beforeEach(function () {
clock = sinon.useFakeTimers();
});
afterEach(function () {
clock.restore();
});
it('test the http request', (done)=>{
nockingGit();
setTimeout(function(){
done();
},3000)
clock.tick(3000);
})
})
edited
index.js
schedule.scheduleJob(new Date(dateObj), function(){
var now = new Date();
flag = true;
console.log(now.toLocaleString('en-US', { timeZone: 'Asia/Jakarta'}));
console.log("get's here..");
gitReq();
});
function gitReq(){
const url_req = "https://git.ecommchannels.com/api/v4/users";
request.get(
{
url_req,
qs : {private_token: token, per_page: 10}
}, function(err, res, body){
console.log('res : ', res);
var total = res.headers['x-total-pages']
// console.log('res.headers', res.headers);
for( i=1; i<=total; i++ ){
url_req2 = "https://git.ecommchannels.com/api/v4/users";
request.get({
url_req2,
qs: {private_token : token, per_page : 10, page : i},
json : true
},
// "https://git.ecommchannels.com/api/v4/users?private_token=" + token + "&per_page=10&page=" + i, { json: true },
(err2, res2, body2) => {
body2.forEach(element => {
if(condition){
//do something
}
})
})
}
})
}
Unit Test
*check whether the gitReq() is called - using sinon.spy (but I'm not sure whether the implementation is correct)
const sendReq = {
gitReq: ()=>{}}
describe('test the schedule job', function(){
var clock;
beforeEach(function () {
clock = sinon.useFakeTimers();
});
afterEach(function () {
clock.restore();
});
it('should call the gitReq once', (done)=>{
let gitReq = sinon.spy(sendReq,"gitReq");
sendReq.gitReq();
console.log("callcount : ", gitReq.callCount);
gitReq.restore();
setTimeout(function(){
expect(gitReq.calledOnce).to.be.true;
done();
},3000)
clock.tick(3000);
})})
*test the http request using nock that still returns undefined for the res.
describe('test the http request', function(){
const nockingGit = () =>{
nock('https://git.ecommchannels.com')
.defaultReplyHeaders({
'x-total-pages': 10
})
.get('/api/v4/users')
.query({private_token: 'UKz2cZP9CHz2DT_o2-s9', per_page: 10})
.reply(304, {});
}
it('test the http request', function(){
nockingGit();
_import.gitReq()
})
})
*here's the test result

PhantomJS includeJS not working

I'm trying to use PhantomJS in order to get data from a website like this:
app.get('/scrape', function(req, res) {
var _ph, _page;
phantom.create()
.then(function (ph) {
_ph = ph;
return ph.createPage();
})
.then(function (page) {
_page = page;
var url = "https://banc.nl/";
return page.open(url);
})
.then(function(page) {
page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js', function() {
page.evaluate(function() {
$('.sc-dnqmqq kaMmif').filter(function () {
var data = $(this);
price = data.children().first().text();
console.log("Price: " + price);
});
});
});
})
.catch(function(err) {
console.log("Error: " , err);
_page.close();
//_ph.exit();
});
});
The problem is that I get the following error: Error: TypeError: page.includeJs is not a function
If I uncomment the ph.exit() I will get a warning also: warn: exit() was called before waiting for commands to finish. Make sure you are not calling exit() prematurely.
I found several questions about this in SO but non of the answers solved my problem.
The latest version of phantomjs-node module uses Promises so we can rewrite scripts using async functions with await operator — much more readable:
var phantom = require('phantom');
var url = 'https://www.huobipro.com';
(async function(req, res) {
const instance = await phantom.create();
const page = await instance.createPage();
await page.on('onConsoleMessage', function(msg) {
console.info(msg);
});
await page.on('onError', function(msg) {
console.info(msg);
});
const status = await page.open(url);
await console.log('STATUS:', status);
// Wait a bit for javascript to load and run
await new Promise(resolve => setTimeout(resolve, 3000))
await page.includeJs('https://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js');
await page.evaluate(function() {
$('span[price]').filter(function () {
var data = $(this);
console.log("Price: " + data.text());
});
});
await instance.exit();
})();

how to get readStream with thunk in koa ?

I want to pipe a request in a koa controller, It's work:
var s = require('through2')();
var req = http.request(url, function(res) {
res.pipe(s);
})
req.end(null);
s.on('close', function() {
console.log('has close');
});
this.body = s;
But with thunk , it seems to be not work.
var s = stream();
yield thunk(url, s);
this.body = s;
Here is the thunk:
var thunk = function (url, s) {
return function(callback) {
var req = http.request(url, function(res) {
res.pipe(s);
});
s.on('close', function() {
callback(null, null);
console.log('req inner close');
});
req.end(null);
}
}
Use a promise for this (return a promise, not a thunk). Off the top of my head, so you may need to play around with it:
function run(url, s) {
return new Promise(function(resolve, reject) {
var req = http.request(url, function(res) {
res.pipe(s);
res.on('end', function() {
req.end();
});
});
req.on('error', function(err) {
return reject(err);
});
s.on('close', function() {
console.log('req inner close');
return resolve();
});
});
}
Then:
yield run(url, s);

Node js + PhantomJs: send data to page.evaluate

How can I send variable from server to page.evaluate ?
var test = 'Lorem Ipsum';
phantom = require('phantom')
phantom.create(function(ph){
ph.createPage(function(page) {
page.open("http://www.google.com", function(status) {
page.evaluate(function(){
$('body').html(test);
});
page.render('google.pdf', function(){
console.log('Page Rendered');
ph.exit();
});
});
});
});
Thank you in advance for your help.
Edit1
Now it look like
var message = function(){
return {message: 'Hello Word'};
};
phantom = require('phantom')
phantom.create(function(ph){
ph.createPage(function(page) {
page.open("http://www.google.com", function(status) {
page.evaluate(function(content){
$('body').html(content);
}, message);
page.render('google.pdf', function(){
console.log('Page Rendered');
ph.exit();
});
});
});
});
Now I haven't any error, but I don't know how can I handle this object to use it in page.evaluate
try with
page.evaluate(function (...) {...}, function (err, data){...}, arg1, arg2, ...);
example:
var message = 'hello world';
page.evaluate(function(content){
$('body').html(content);
return 'any data'
}, function (err, anydata) {}, message);
Adding jquery to page
page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js', function(err) {
//jQuery Loaded.
//Wait for a bit if site have AJAX
setTimeout(function() {
return page.evaluate(function() {
// USE JQUERY HERE
//
//
return
}, function(err, result) {
console.log(result);
ph.exit();
});
}, 3000);
});
see readme at:
https://github.com/alexscheelmeyer/node-phantom

page.set('content') doesn't work with dynamic content in phantomjs

I tried to use phantomjs for screen capturing my page with node-phantom bridge. Here is what I'm trying:
var phantom = require('node-phantom');
phantom.create(function (err, ph) {
return ph.createPage(function (err, page) {
return page.set('content', '<html><head></head><body><p>Hello</p></body></html>', function (err, status) {
return page.render('./content.png', function (err) {
ph.exit();
});
});
});
});
That works fine, but if I try to set content which contains javascript, that doesn't work. Please help me, why does it not work?
EDIT: This doesn't work:
var phantom = require('node-phantom');
phantom.create(function (err, ph) {
return ph.createPage(function (err, page) {
page.open("about:blank", function(err,status) {
page.evaluate(function() {
document.write('<html><head></head><body><script src="http://code.jquery.com/jquery-1.9.1.min.js"></script><script>$(function(){document.write("Hello from jQuery")})</script></body>');
});
setTimeout(function () {
return page.render('./content.png', function (err) {
ph.exit();
});
}, 5000);
});
});
JavaScript code needs some time to execute. Try to have a delay between setting the page content and calling render.
I am not sure why set content does not work, It seems to be a limitation of the phantomjs api. You can just use document.write.
var phantom = require('node-phantom');
phantom.create(function (err, ph) {
return ph.createPage(function (err, page) {
page.open("about:blank", function(err,status) {
page.evaluate(function() {
document.write('<html><body><script>document.write("<h1>Hello From JS</h1>");</script><p>Hello from html</p></body></html>');
});
return page.render('./content.png', function (err) {
ph.exit();
});
});
});
});
as ariya mentioned, time is needed. there is likely an 'onLoadFinished' event for this library (there is for the node lib i use). you can handle this without an arbitrary wait time by seeing my example at the bottom of this github issue: https://github.com/amir20/phantomjs-node/issues/68
Document.prototype.captureScreenshot = function(next) {
console.log(">> Rendering screencap for " + this.id)
var self = this;
phantom.create(function(ph) {
ph.createPage(function(page) {
page.setContent(self.html);
page.set("viewportSize", {
width: 1920,
height: 1080
});
page.set('onLoadFinished', function(success) {
var outputFile = './screenshots/screenshot-' + self.id + '.png';
page.render(outputFile);
ph.exit();
console.log(">> Render complete for " + self.id)
if (next)
next(outputFile);
})
});
});
}

Resources