page.set('content') doesn't work with dynamic content in phantomjs - node.js

I tried to use phantomjs for screen capturing my page with node-phantom bridge. Here is what I'm trying:
var phantom = require('node-phantom');
phantom.create(function (err, ph) {
return ph.createPage(function (err, page) {
return page.set('content', '<html><head></head><body><p>Hello</p></body></html>', function (err, status) {
return page.render('./content.png', function (err) {
ph.exit();
});
});
});
});
That works fine, but if I try to set content which contains javascript, that doesn't work. Please help me, why does it not work?
EDIT: This doesn't work:
var phantom = require('node-phantom');
phantom.create(function (err, ph) {
return ph.createPage(function (err, page) {
page.open("about:blank", function(err,status) {
page.evaluate(function() {
document.write('<html><head></head><body><script src="http://code.jquery.com/jquery-1.9.1.min.js"></script><script>$(function(){document.write("Hello from jQuery")})</script></body>');
});
setTimeout(function () {
return page.render('./content.png', function (err) {
ph.exit();
});
}, 5000);
});
});

JavaScript code needs some time to execute. Try to have a delay between setting the page content and calling render.

I am not sure why set content does not work, It seems to be a limitation of the phantomjs api. You can just use document.write.
var phantom = require('node-phantom');
phantom.create(function (err, ph) {
return ph.createPage(function (err, page) {
page.open("about:blank", function(err,status) {
page.evaluate(function() {
document.write('<html><body><script>document.write("<h1>Hello From JS</h1>");</script><p>Hello from html</p></body></html>');
});
return page.render('./content.png', function (err) {
ph.exit();
});
});
});
});

as ariya mentioned, time is needed. there is likely an 'onLoadFinished' event for this library (there is for the node lib i use). you can handle this without an arbitrary wait time by seeing my example at the bottom of this github issue: https://github.com/amir20/phantomjs-node/issues/68
Document.prototype.captureScreenshot = function(next) {
console.log(">> Rendering screencap for " + this.id)
var self = this;
phantom.create(function(ph) {
ph.createPage(function(page) {
page.setContent(self.html);
page.set("viewportSize", {
width: 1920,
height: 1080
});
page.set('onLoadFinished', function(success) {
var outputFile = './screenshots/screenshot-' + self.id + '.png';
page.render(outputFile);
ph.exit();
console.log(">> Render complete for " + self.id)
if (next)
next(outputFile);
})
});
});
}

Related

Cannot redirect after insertion

I want to redirect to a url after the insert is success :
$(document).ready(function() {
$("#btn").on("click", function() {
$.post({
url:"/track/admin/affecterMenusProfil",
data:$("#frm").serialize()
});
});
});
router.post("/affecterMenusProfil", function(req, res) {
var profil = req.body.profil, menus = req.body.menus;
connexion.query("delete from "+db.getPrefixNomTables()+"profil_menu where profil_id ="+profil, function (errDel, rowsDel) {
if (errDel)
throw errDel;
async.eachOf(menus, function(menu, position, cb) {
connexion.query("insert into "+db.getPrefixNomTables()+"profil_menu(menu_id, profil_id) values("+menu+", "+profil+")", function (err, rows) {
if (err) throw err;
cb();
});
}, function() {
res.redirect('/track/');
});
});
});
But at runtime there is no redirection : the page of insertion is still displayed ! So what is wrong in my codes ?
Server won't redirect if your request from ajax.
In order to redirect you have to implement redirection logic on client side on success
callback.
$(document).ready(function() {
$("#btn").on("click", function() {
$.post({
url:"/track/admin/affecterMenusProfil",
data:$("#frm").serialize(),
success: function(response) {
window.location.href = response.url;
}
});
});
});
router.post("/affecterMenusProfil", function(req, res) {
var profil = req.body.profil, menus = req.body.menus;
connexion.query("delete from "+db.getPrefixNomTables()+"profil_menu where profil_id ="+profil, function (errDel, rowsDel) {
if (errDel)
throw errDel;
async.eachOf(menus, function(menu, position, cb) {
connexion.query("insert into "+db.getPrefixNomTables()+"profil_menu(menu_id, profil_id) values("+menu+", "+profil+")", function (err, rows) {
if (err) throw err;
cb();
});
}, function() {
res.send({url: '/track/'});
});
});
});
You have to send URL from Server and redirect that URL from the client side as above.
I hope it will help you.

Unable to run a function synchronously in nodejs and express

I have used wikipedia-js for this project. This is my code for summary.js file.
var wikipedia = require("wikipedia-js");
var something = "initial";
module.exports = {
wikitext: function(topicname) {
console.log("Inside wikitex funciton :" + topicname);
var options = {
query: topicname,
format: "html",
summaryOnly: false,
lang: "en"
};
wikipedia.searchArticle(options, function(err, htmlWikiText) {
console.log("Inside seararticlefunciton :");
if (err) {
console.log("An error occurred[query=%s, error=%s]", topicname, err);
return;
}
console.log("Query successful[query=%s, html-formatted-wiki-text=%s]", topicname, htmlWikiText);
something = htmlWikiText;
});
return something;
},
};
This module I am using in /wiki/:topicname route. The corresponding code in index.js is like this.
router.get('/wiki/:topicname', function(req, res, next) {
var topicname = req.params.topicname;
console.log(topicname);
var first = summary.wikitext(topicname);
res.send("Hello "+first);
});
The problem is, everytime i visit a wiki/some-topic, the last return statement of summary.js executes before htmlWikiText is populated with content. So I always see hello initial on the browser page. Although after sometime it gets printed on terminal due to console.log statement.
So how should I resolve this issue?
I'm not going to try turning this code into synchronous. I'll just correct it to work as an asynchronous version.
You need to pass in callback to wikitext() and return the value in that callback. Here is the revised code of wikitext() and the route that calls it:
var wikipedia = require("wikipedia-js");
module.exports = {
wikitext: function(topicname, callback) {
console.log("Inside wikitex funciton :" + topicname);
var options = {
query: topicname,
format: "html",
summaryOnly: false,
lang: "en"
};
wikipedia.searchArticle(options, function(err, htmlWikiText) {
console.log("Inside seararticlefunciton :");
if (err) {
console.log("An error occurred[query=%s, error=%s]", topicname, err);
return callback(err);
}
console.log("Query successful[query=%s, html-formatted-wiki-text=%s]", topicname, htmlWikiText);
callback(null, htmlWikiText);
});
}
};
router.get('/wiki/:topicname', function(req, res, next) {
var topicname = req.params.topicname;
console.log(topicname);
summary.wikitext(topicname, function(err, result) {
if (err) {
return res.send(err);
}
if (!result) {
return res.send('No article found');
}
res.send("Hello "+result);
});
});

Node async.series trouble

While building a fairly complex scraper i stumbled upon a problem with a control flow of my code.
What's going on in code below:
1) request a URL
2) scrape NEWURL from the results
3) pass it to readability API as first async function
4) here comes the trouble — i never get the next async function which saves readabilityData to DB
How to solve this problem?
I'm new to JS, so please feel free to point out at any issues with my code.
request(URL, function(error, response, html) {
if (!error) {
var $ = cheerio.load(html);
NEWURL = data.find('a').attr('href');
readabilityData = {}
var articleUrl = 'https://readability.com/api/content/v1/parser?url=' + NEWURL + token;
async.series([
function(){
request(articleUrl, function(error, response, html) {
if (!error) {
readabilityData = response.toJSON();
}
});
},
function(readabilityData){
Article.findOne({
"link": url // here's the
}, function(err, link){
if(link) {
console.log(link)
} else {
var newArticle = new Article({
// write stuff to DB
});
newArticle.save(function (err, data) {
// save it
});
}
});
}
],
function(err){
console.log('all good — data written')
});
});
}
});
You need to call the callback parameter that's passed into the functions of the async.series call when each function's work is complete. That's how async.series knows that it can proceed to the next function. And don't redefine readabilityData as a function parameter when you're trying to use it to share data across the functions.
So something like:
var readabilityData = {};
async.series([
function(callback){
request(articleUrl, function(error, response, html) {
if (!error) {
readabilityData = response.toJSON();
}
callback(error);
});
},
function(callback){
Article.findOne({
"link": url // here's the
}, function(err, link){
if(link) {
console.log(link);
callback();
} else {
var newArticle = new Article({
// write stuff to DB
});
newArticle.save(function (err, data) {
// save it
callback(err);
});
}
});
}
],
function(err){
console.log('all good — data written')
});

Returning wrong URL

I am using phantomejs-node for facebook login . Here is my nodejs code :
var phantom = require('phantom');
phantom.create(function(ph) {
ph.createPage(function(page) {
page.open("https://facebook.com", function(status) {
setTimeout(function () {
page.evaluate((function(URL) {
document.getElementById("email").value = "wrong username";
document.getElementById("pass").value = "wrong password";
document.getElementById("u_0_1").click();
return document.URL;
}), function(result) {
console.log('Page url is ' + result);
ph.exit();
}, 5000);
});
});
//page.render("page2.png");
});
});
Instead of returning https://www.facebook.com/login.php?login_attempt=1 , its returning https://www.facebook.com/ . By the way here is Phantomjs code that I am following :
var page = require('webpage').create();
page.open("http://www.facebook.com/login.php", function(status) {
if (status === "success") {
page.evaluate(function() {
document.getElementById("email").value = "#gmail.com";
document.getElementById("pass").value = "";
document.getElementById("u_0_1").click();
});
window.setTimeout(function() {
var url = page.evaluate(
function () {
return document.URL;
}
);
console.log( "- current url is " + url );
page.render("page.png");
phantom.exit();
}, 5000);
}
});
Try this code:
var phantom = require('phantom');
phantom.create(function(ph) {
ph.createPage(function(page) {
page.open("https://facebook.com", function(status) {
page.evaluate((function() {
document.getElementById("email").value = "#gmail.com";
document.getElementById("pass").value = "password";
document.getElementById("login_form").submit();
return;
}), function() {
console.log("loaded");
setTimeout(function(){
page.evaluate(function () {
return document.URL;
},function(result){
page.render("page2.png",function(){
console.log("done rendering");
});
console.log("Page url is "+result);
ph.exit();
});
},6000)
});
});
});
});
Hope this is helpfull :)
If you're tired of callback hell you could also give phridge a try. I've written this bridge because I didn't want to wrap all assignments and function calls with callbacks. It stringifies the given function and runs it inside PhantomJS.
A-0-'s solution would look like:
var page;
// creates a new PhantomJS process
phridge.spawn()
.then(function (phantom) {
return phantom.openPage("https://facebook.com");
})
.then(function (p) {
page = p;
return page.run(function (resolve) {
// this function runs inside PhantomJS
var page = this;
page.evaluate(function () {
document.getElementById("email").value = "#gmail.com";
document.getElementById("pass").value = "password";
document.getElementById("login_form").submit();
});
setTimeout(function () {
page.render("page2.png");
resolve();
}, 6000);
});
})
.then(function () {
// page2.png rendered
});

Node js + PhantomJs: send data to page.evaluate

How can I send variable from server to page.evaluate ?
var test = 'Lorem Ipsum';
phantom = require('phantom')
phantom.create(function(ph){
ph.createPage(function(page) {
page.open("http://www.google.com", function(status) {
page.evaluate(function(){
$('body').html(test);
});
page.render('google.pdf', function(){
console.log('Page Rendered');
ph.exit();
});
});
});
});
Thank you in advance for your help.
Edit1
Now it look like
var message = function(){
return {message: 'Hello Word'};
};
phantom = require('phantom')
phantom.create(function(ph){
ph.createPage(function(page) {
page.open("http://www.google.com", function(status) {
page.evaluate(function(content){
$('body').html(content);
}, message);
page.render('google.pdf', function(){
console.log('Page Rendered');
ph.exit();
});
});
});
});
Now I haven't any error, but I don't know how can I handle this object to use it in page.evaluate
try with
page.evaluate(function (...) {...}, function (err, data){...}, arg1, arg2, ...);
example:
var message = 'hello world';
page.evaluate(function(content){
$('body').html(content);
return 'any data'
}, function (err, anydata) {}, message);
Adding jquery to page
page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js', function(err) {
//jQuery Loaded.
//Wait for a bit if site have AJAX
setTimeout(function() {
return page.evaluate(function() {
// USE JQUERY HERE
//
//
return
}, function(err, result) {
console.log(result);
ph.exit();
});
}, 3000);
});
see readme at:
https://github.com/alexscheelmeyer/node-phantom

Resources