How to Use CasperJS in node.js? - node.js

I would like to use CasperJS in node.js.
I have referred to the following URL's to use CasperJS in node.js:
https://github.com/sgentle/phantomjs-node
http://casperjs.org/index.html#faq-executable
With the help of the above URLs I have written the following code:
//DISPLAY=:0 node test2.js
var phantom = require('phantom');
console.log('Hello, world!');
phantom.create(function (ph) {
ph.casperPath = '/opt/libs/casperjs'
ph.injectJs('/opt/libs/casperjs/bin/bootstrap.js');
var casper = require('casper').create();
casper.start('http://google.fr/');
casper.thenEvaluate(function (term) {
document.querySelector('input[name="q"]').setAttribute('value', term);
document.querySelector('form[name="f"]').submit();
}, {
term: 'CasperJS'
});
casper.then(function () {
// Click on 1st result link
this.click('h3.r a');
});
casper.then(function () {
console.log('clicked ok, new location is ' + this.getCurrentUrl());
});
casper.run();
});
When I run this code, I got the following error:
ERROR MSG:
tz#tz-ubuntu:/opt/workspaces/TestPhantomjs$ DISPLAY=:0 node test2.js
Hello, world!
Error: Cannot find module 'casper'
at Function._resolveFilename (module.js:332:11)
at Function._load (module.js:279:25)
at Module.require (module.js:354:17)
at require (module.js:370:17)
at /opt/workspaces/TestPhantomjs/test2.js:6:14
at Object.<anonymous> (/opt/workspaces/TestPhantomjs/node_modules/phantom/phantom.js:82:43)
at EventEmitter.<anonymous> (/opt/workspaces/TestPhantomjs/node_modules/phantom/node_modules/dnode/index.js:215:30)
at EventEmitter.emit (events.js:67:17)
at handleMethods (/opt/workspaces/TestPhantomjs/node_modules/phantom/node_modules/dnode-protocol/index.js:138:14)
at EventEmitter.handle (/opt/workspaces/TestPhantomjs/node_modules/phantom/node_modules/dnode-protocol/index.js:98:13)
phantom stdout: Unable to load casper environment: Error: Failed to resolve module fs, tried fs

You can use SpookyJS to drive CasperJS from Node.

https://groups.google.com/group/casperjs/browse_thread/thread/641e9e6dff50fb0a/e67aaef5ab4ec918?hl=zh-CN#e67aaef5ab4ec918
Nicolas Perriault
2012/2/27 天猪 蓝虫. :
I wan to use casperjs in nodejs.
and refs to:
https://github.com/sgentle/phantomjs-node and
http://casperjs.org/index.html#faq-executable
You can't run CasperJS that way; QtWebKit and V8 don't share the same
js environment (and event loop), so your node.js app won't be able to
load and use a CasperJS module. You have to run your CasperJS script
separately using a subprocess call, like this one on github. I
don't plan to make CasperJS compatible with phantomjs-node because it
uses alert()-based dirty hacks I'm not easy with.
Cheers,
-- Nicolas Perriault

CasperJS includes a web server to talk to the outside world. Node (using request, superagent etc) can now talk to casper over HTTP.
In scraper.js:
#!/usr/bin/env casperjs
// I AM NOT NODEJS
// I AM CASPER JS
// I RUN IN QTWEBKIT, NOT V8
var casper = require('casper').create();
var server = require('webserver').create();
var ipAndPort = '127.0.0.1:8585';
server.listen(ipAndPort, function(request, response) {
casper.start('https://connect.data.com/login');
casper.userAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36");
casper.then(function(){
// lots of code here, and a few more cassper.then()s
});
casper.run(function(){
console.log('\n\nFinished')
response.statusCode = 200;
var body = JSON.stringify({
phoneNumber: '1800-YOLO-SWAG'
})
response.write(body);
response.close();
});
});
You can now run scraper.js as a web server:
chmod +x scraper.js
./scraper.js
You should run it as a Linux service just like you would for a node app.

One solution (which worked for me) is to start and stop your server on a per-test basis. For example, I have a runtests.coffee which looks like:
http = require 'http'
glob = require 'glob'
spawn = require('child_process').spawn
db = require './db' # Contains all database stuff.
webapp = require './webapp' # Contains all of the Express stuff.
db.connect 'test' # Connects to the db server and creates an empty test db.
server = http.createServer webapp.makeApp()
server.listen 0, ->
port = server.address().port
process.env.URL = "http://localhost:#{ port }"
glob 'tests/*', (err, filenames) ->
child = spawn 'casperjs', ['test'].concat(filenames)
child.stdout.on 'data', (msg) -> process.stdout.write msg
child.stderr.on 'data', (msg) -> process.stderr.write msg
child.on 'exit', (code) ->
db.disconnect() # Drops the test db.
server.close()
process.exit code
And my CasperJS tests in tests/ look like:
URL = require('system').env.URL # Note, Casper code here, not Node.
casper.test.begin 'Test something', 1, (test) ->
casper.start "#{ URL }/welcome"
casper.then ->
test.assertHttpStatus 200
# ....
casper.run ->
test.done()

It basically means that your script can't find Casper; have you checked the path and made sure that
/opt/libs/casperjs
and:
/opt/libs/casperjs/bin/bootstrap.js
Are accessible by a website user ? considering the location it's probably not likely.
/opt is a unix path, but the website will be looking in {websiterootpath}/opt.
I'd create a subfolder 'casperjs' in the root folder of your website and copy the contents of
/opt/libs/casperjs
To there.
Then change your paths from
/opt/libs/casperjs
To
/casperjs

I tried to run casper by node cron job too,
here's my solution
in casper.js echo your response:
casper.then(function() {
var comments = this.evaluate(getComments);
this.echo(JSON.stringify(comments));
})
use node-cmd in node file casper_wrapper.js:
var cmd = require('node-cmd');
module.exports = function(url) {
return new Promise(function(resolve, reject) {
cmd.get(
'casperjs casper.js ' + url, // casper takes args to run the script
function(err, data, stderr){
if (err) {
reject(err);
return;
}
var obj = JSON.parse(data);
resolve(obj);
}
);
});
}

Related

Node PhantomJS script onResourceError path issue

Having some trouble using the webpage API in a phantomJS script I'm using for load testing.
I'm running the script in a child process, like so:
var path = require('path');
var childProcess = require('child_process');
var binPath = require('phantomjs').path;
var childArgs = [
path.join(__dirname, 'phantom-script.js')
];
var spawn = childProcess.spawn;
var child = spawn(binPath, childArgs);
child.stdout.on('data', function(data) {
const buf = Buffer.from(data);
console.log('stdout:', buf.toString());
});
child.stderr.on('data', function(data) {
const buf = Buffer.from(data);
console.log('stderr:', buf.toString());
});
And my simple phantomJS script:
var webPage = require('webpage');
var page = webPage.create();
page.onConsoleMessage = function (msg) {
console.log(msg);
};
page.onResourceError = function(resourceError) {
console.log(resourceError.errorCode + ':', resourceError.errorString);
};
function runScript() {
page.open('<webpage-url>', function(status) {
console.log('Status:', status);
if (status === 'success') {
page.evaluate(function() {
console.log('Title:', document.title);
});
}
});
}
runScript();
So to start the phantomJS script, if both of these files are in the test/ directory, and my current directory is up one from that: node test/child-process.js, which then spawns the child process and runs my phantomJS script.
So, this gets the script to run, but it always fails in page.open because of a resource error. Replacing my url with Google's, or really any website, works fine.
The error logged in onResourceError is stdout: 202: Cannot open file:///Users/<user>/path/to/local/current/directory: Path is a directory.
This is always the path from which I'm running this script. If I move down a directory into test/ and run it with node child-process.js, the error instead logs that directory.
As a headless browser, I assumed phantomJS would interface with a webpage like any client would, just without rendering the template--what does the current directory from which the script was run have anything to do with opening the webpage? Why would it be trying to load resources from my local directory when the webpage URL points to a public website, hosted at the IP and PORT specified in the first argument of page.open (e.g. xx.xxx.xx.xx:PORT)?
I'm at a bit of a loss here. The phantomJS path and all that is correct, since it runs the script fine. I just don't understand why page.open would attempt to open the directory from which the script was called--what does that have to do with its function, which is to open the URL and load it to the page?
Not sure if this is even worthy of answering--as opposed to just deleting.
I figured it out when I manually typed in the argument www.google.com, instead of copy/pasting from the browser, and and I got this as the path in the error: file:///Users/<user>/path/to/local/current/directory/www.google.com.
Now I know why I couldn't find a SO question for it. A stupid error on my part at any rate, it would've been a quick debug if the error had appended the IP address and PORT (my "url") to the end of the file path like it did for www.google.com, a clear indicator that it's not pinging a URL.
TL;DR: It's a URL, you need http(s)://...

Locomotive.js throws error upon calling "locomotive.boot"

I'm trying to write tests on my locomotive.js application, literally copy/pasting code from some examples on the internet. Even so, whenever I run my tests, I get an error saying
TypeError: string is not a function
When I check the number of arguments expected by locomotive.boot (using locomotive.boot.length), it says 2... But in every single example online (go ahead, google it) the documentation seems to say 3. Does anyone know what I'm doing wrong?
Here's my code:
var locomotive = require('locomotive'),
should = require('should'),
request = require('supertest');
var app, server;
describe('Application', function() {
before(function(done) {
locomotive.boot( __dirname+"/..", "test", function(err, express) {
if (err) throw err;
app = this;
express.listen(4000, '0.0.0.0', function() {
var addr = this.address();
console.log('Server started. [Env: '+SOPS.conf.get('app:environment')+'] [Addr: '+addr.address+'] [Port: '+addr.port+']');
done();
});
server = express;
});
});
it('should have started the app', function(){
should.exist(app);
should.exist(express);
});
});
There are 2 branches on the LocomotiveJS repo:
- 0.3.x (https://github.com/jaredhanson/locomotive/tree/0.3.x)
- master (https://github.com/jaredhanson/locomotive/tree/master)
If you're using version 0.3.x your code should work, the function declaration actually shows 4 arguments: dir, env, options, callback
you can have a look at the function definition here (Locomotive.prototype.boot): 0.3.x/lib/locomotive/index.js
As of version 0.4.x (branch master) the boot function only accepts 2 arguments: env, callback
the function definition for this branch is here (Application.prototype.boot): master/lib/application.js
so your code should look something like:
locomotive.boot( "test", *yourcallback* );
Hope this helps.

Node.js Beginner - Can't get past nodetuts tutorial number 2 - stdout and child process

I was trying to go through the tutorial nodetuts.com - tutorial 2 and unfortunatly could not get the example working, I'm very new to node.js and going through any tutorials I can get hold of. I understand that node.js is still beta and I figured that the code that makes this work is now obsolete. (This is the code):
var http = require('http');
var spawn = require('child_process').spawn;
http.createServer(function(request, response){
response.writeHead(200, {
'Content-Type' : 'text/plain'
});
var tail_child = spawn('tail', ['-f', 'test.txt']);
tail_child.stdout.on('data', function(data){
console.log(data.toString());
response.write(data);
});
}).listen(4000);
Anyway, determined to continue on I have been looking through the documentation on the node website and found this: http://nodejs.org/api/all.html#all_child_pid This is not exactly what I want (I want to complete that tutorial linked to up top) but I wanted to get something todo with child process working and incorperated that code into this:
var http = require('http');
var server = http.createServer(function(res, req){
res.writeHead(200);
res.end('testing');
var spawn = require('child_process').spawn,
grep = spawn('grep', ['ssh']);
console.log('Spawned child pid: ' + grep.pid);
grep.stdin.end();
}).listen(4000);
Unfortunately when I refresh the page http://localhost:4000/ I get nothing and command prompt spits out: (I know it says writeHead is a problem, but it works fine in other examples - (like nodetuts - tutorial 1))
res.writeHead(200);
^
TypeError: Object #<IncomingMessage> has no method 'writeHead'
at Server.<anonymous> (Z:\Joseph Goss Folder\Google Drive\Code\javascript_first\nodejs_first\stdoutTest.js:20:6)
at Server.EventEmitter.emit (events.js:91:17)
at HTTPParser.parser.onIncoming (http.js:1785:12)
at HTTPParser.parserOnHeadersComplete [as onHeadersComplete] (http.js:111:23)
at Socket.socket.ondata (http.js:1682:22)
at TCP.onread (net.js:404:27)
I wonder why I can't get this working, I'm obviously missing something but I don't know what, and I can't even get past tutorial number 2. :(
I'm running windows 7
I have also looked at this code Why does response.write appear to block my browser in Node.js? and his code doesn't work at all either.
You interchanged req and res in the function passed to createServer
var http = require('http');
var server = http.createServer(function(req, res){
res.writeHead(200);
res.end('testing');
var spawn = require('child_process').spawn,
grep = spawn('grep', ['ssh']);
console.log('Spawned child pid: ' + grep.pid);
grep.stdin.end();
}).listen(4000);

now.js : "Object has no method" error message when trying to start server.js

I installed node.js and now.js successfully.
For now.js, this is how I did:
npm install now -g
npm install now (had to add this one. Without it, I get a "Cannot find now..." error message)
When I start the node server and provide a server.js file like this:
var httpServer = require('http');
httpServer.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/html'});
res.write('Node is ok');
res.end();
}).listen(8080);
console.log('Server runs on http://xxxxx:8080/');
Everything is fine.
Now, I'm trying to add to this file a basic use of now.js:
var nowjs = require("now");
var everyone = nowjs.initialize(httpServer);
everyone.now.logStuff = function(msg){
console.log(msg);
}
I create an index.html file in the same folder (for testing purposes)
<script type="text/javascript" src="nowjs/now.js"></script>
<script type="text/javascript">
now.ready(function(){
now.logStuff("Now is ok");
});
</script>
This time, this is what I get on the terminal when starting the server:
Server runs on http://xxxxx:8080/
[TypeError: Object #<Object> has no method 'listeners']
TypeError: Object #<Object> has no method 'listeners'
at Object.wrapServer (/home/xxxx/node_modules/now/lib/fileServer.js:23:29)
at [object Object].initialize (/home/xxxx/node_modules/now/lib/now.js:181:14)
at Object.<anonymous> (/home/xxxx/server.js:10:22)
at Module._compile (module.js:444:26)
at Object..js (module.js:462:10)
at Module.load (module.js:351:32)
at Function._load (module.js:309:12)
at module.js:482:10
at EventEmitter._tickCallback (node.js:245:11)
Please keep in mind that I'm an absolute beginner.
Thank you for your help
'npm install -g' installs modules at a global level, often with the intent of providing system-wide binaries for terminal usage. Think Ruby Gems. If you want to include a module as part of your project you need to remove the -g.
Also, your httpServer variable is not your server but rather the http module. createServer() returns a server object which you want to capture with a variable to use in your nowjs.initialize() method as follows:
var http = require('http')
, now = require('now')
// Returns an Http Server which can now be referenced as 'app' from now on
var app = http.createServer(
//... blah blah blah
)
// listen() doesn't return a server object so don't pass this method call
// as the parameter to the initialize method below
app.listen(8080, function () {
console.log('Server listening on port %d', app.address().port)
})
// Initialize NowJS with the Http Server object as intended
var everyone = nowjs.initialize(app)

How can I control a browser ( ala Selenium ) with node.js?

I've heard of soda, but it seems like it requires you to signup and there's a limit on the # of minutes ( free acct / 200 minutes ).
Does anyone know if there's some alternative way to control a browser, or more specifically invoke JS on a web page?
https://github.com/LearnBoost/soda/raw/master/examples/google.js
/**
* Module dependencies.
*/
var soda = require('../')
, assert = require('assert');
var browser = soda.createClient({
host: 'localhost'
, port: 4444
, url: 'http://www.google.com'
, browser: 'firefox'
});
browser.on('command', function(cmd, args){
console.log(' \x1b[33m%s\x1b[0m: %s', cmd, args.join(', '));
});
browser
.chain
.session()
.open('/')
.type('q', 'Hello World')
.clickAndWait('btnG')
.getTitle(function(title){
assert.ok(~title.indexOf('Hello World'), 'Title did not include the query');
})
.clickAndWait('link=Advanced search')
.waitForPageToLoad(2000)
.assertText('css=#gen-query', 'Hello World')
.assertAttribute('as_q#value', 'Hello World')
.testComplete()
.end(function(err){
if (err) throw err;
console.log('done');
});
Zombie.js might work for you. It is headless and seems really cool.
There are actually now Selenium bindings for JavaScript that work with Node.js.
Here are some basic steps to get started:
1 Install Node.js, you can find the download here.
Make sure you
have the latest Chrome driver and put it in your path.
Use npm install selenium-webdriver to get the module added to your project.
Write a test, for example:
var webdriver = require('selenium-webdriver');
var driver = new webdriver.Builder().
withCapabilities(webdriver.Capabilities.chrome()).
build();
driver.get('http://www.google.com');
driver.findElement(webdriver.By.name('q')).sendKeys('simple programmer');
driver.findElement(webdriver.By.name('btnG')).click();
driver.quit();</code>
I cover how to do this with some screenshots and how to use Mocha as a test driver in my blog post here.
Here's a pure node.js wrapper around the java API for selenium's webdriver:
https://npmjs.org/package/webdriver-sync
Here's an example:
var webdriverModule = require("webdriver-sync");
var driver = new webdriverModule.ChromeDriver;
var By = webdriverModule.By;
var element = driver.findElement(By.name("q"));
element.sendKeys("Cheese!");
element.submit();
element = driver.findElement(By.name("q"));
assert.equal(element.getAttribute('value'), "Cheese!");
Save that in a .js file and run it with node.
The module is a pure wrapper, so things like sleep or synchronous calls are entirely possible. Here's the current interface of the module:
module.exports={
ChromeDriver:ChromeDriver,
FirefoxDriver:FirefoxDriver,
HtmlUnitDriver:HtmlUnitDriver,
By:new By(),
ExpectedConditions:new ExpectedConditions(),
WebDriverWait:WebDriverWait,
Credentials:UserAndPassword,
Cookie:Cookie,
TimeUnits:TimeUnits,
/**
* #param {number} amount in mills to sleep for.
*/
sleep:function(amount){
java.callStaticMethodSync(
"java.lang.Thread",
"sleep",
new Long(amount)
);
}
};
You can see an integration test that tests the full capabilities here:
https://github.com/jsdevel/webdriver-sync/blob/master/test/integrations/SmokeIT.js
wd is "A node.js javascript client for webdriver/selenium 2"

Resources