I have a casper script that submits a form and scrapes the response.
Im trying to set up a "scraping on demand" environment where I can post the form values to a url using PhatomJS webserver, then use that data in my Casper script to scrape the page and then print out the response on the webpage. I don't see how I can pass the post variable into casper and then pass the response back to Phantom.
Heres my basic Phantom/Casper structure:
var server = require('webserver').create();
server.listen(8080, function(request, response) {
phantom.casperPath = '/source/casper/casperjs';
phantom.injectJs('/source/casper/casperjs/bin/bootstrap.js');
var address = request.post.address;
var casper = require('casper').create();
casper.start();
casper.then(function(){
address = // want to access global address here
result = begin(this, address); //Contians Casper scrape code
});
casper.run(function(){
this.exit();
});
response.statusCode = 200;
response.write(result); // from casper
response.close();
});
Is there any way to access the variables from phantom in casper, and then pass data back once I finish scraping?
Unless you are doing something in PhantomJS that cannot be done in CasperJS, you are probably better off firing up a server in CasperJS and responding with the results of your Casper functions there.
Based on: https://stackoverflow.com/a/16489950/1096083
//includes web server modules
var server = require('webserver').create();
//start web server
var service = server.listen(ip_server, function(request, response) {
var results;
var address = request.post.address; // this is not working the way you would expect, needs a little help
var casper = require('casper').create();
casper.start(address, function() {
// do some stuff with casper
// store it in results
});
casper.then(function() {
// do some more stuff with casper
// store that in results too
});
casper.run(function() {
response.statusCode = 200;
response.write(results);
response.close();
});
});
console.log('Server running at http://localhost:' + ip_server+'/');
Related
Currently, I have a lot of linux based clients downloading firmware updates from my webserver.
After the client has successfully downloaded the firmware file, my server needs to execute a few scripts, which logs in to the client and performs some tasks.
Is there a way for a node server to keep track of the clients download progress, so I can execute the needed scripts once the file has been downloaded?
Ok so I will try.
If you serve your Firmware as static files through Apache/Nginx and direct url call. You don't get the progress inside your NodeJS script.
If you serve your files via stream inside a express controller you can listen to the progress. Look at this answer here https://stackoverflow.com/a/42273080/3168392
You will have to use a socket connection to make sure the node server gets update from the client of the progress of the file being downloaded.
Something like this
CLIENT_CODE
var socket = io('http://localhost');
socket.on('connect', function(){});
socket.on('data_reciving', parts.pop(),function(percentage){
if(parse_data(percentage) === 100){
client.socket.emit('downloadCompleted', {fileName:'test'});
}else{
//do nothing
}
});
SERVER_CODE:
sockets.on('connection', function (socket) {
//listen to the event from client
socket.on('downloadCompleted', function (data) {
connect_to_client();
do_some_operation();
socket.emit('ALLDONE',{some_data});
});
});
I guess this helps ,you can use this post for reference.
If you just want to run some code when a download has finished, you can use on-finished:
const onFinished = require('on-finished');
app.use((req, res, next) => {
onFinished(res, (err, res) => {
...log some data, perform some housekeeping, etc...
});
next();
});
As is, it will attach a "finished" listener to all responses, which is probably not what you want. Since this is plain Express middleware, you can attach it to specific routes instead (but how depends on how exactly the files are being served).
I found some code that seems to fit my needs.
With the code below, I can detect both the progress of a user's download from the server-side, and fire an event once the file transfer completes.
var http = require("http");
var fs = require("fs");
var filename = "./large-file";
var serv = http.createServer(function (req, res) {
var sent = 0;
var lastChunkSize = 0;
var stat = fs.statSync(filename);
res.setHeader('Content-disposition', 'attachment; filename=large-file.iso');
res.setHeader('Accept-Ranges', 'bytes');
res.setHeader('Keep-Alive', 'timeout=5, max=100');
res.writeHeader(200, {"Content-Length": stat.size});
var fReadStream = fs.createReadStream(filename, { highWaterMark: 128 * 1024 });
fReadStream.on('data', function (chunk) {
if(!res.write(chunk)){
fReadStream.pause();
lastChunkSize = chunk.length;
console.log('Sent', sent, 'of', stat.size);
}
});
fReadStream.on('end', function () {
console.log('Transfer complete.');
res.end();
});
res.on("drain", function () {
sent += lastChunkSize;
fReadStream.resume();
});
});
serv.listen(3001);
I'm new to node and got stuck with handling multiple async tasks.
Except from node, I've got another server (S1) which doesn't return data immediately to requests, it can returns multiple types of data and also can send notifications without requesting them specifically, so node have to listen to data from it , parse it and act accordingly.
The connection to this server (S1) is done by using:
S1 = net.createConnection({'host':S1Host, 'port': S1Port});
And node listens to data with:
S1.on('data', function(data){
S1DataParse(data);
});
I have to route the correct data (after parsing it) to a specific POST request.
app.post('/GetFooFromS1', function(req, res){
// Send request to S1
S1.write({'type':'foo'});
// If got the correct data sometime in the future, send response to the browser
res.setHeader('Content-Type', 'application/json');
res.json({'status':'success', 'value':S1FooData});
});
I tried to use the async module for that, but with no success.
What I was trying to do:
var asyncTasks = [];
app.post('/GetFooFromS1', function(req, res){
asyncTasks.push(function(callback){
// Send request to S1
S1.write({'type':'foo'});
});
async.parallel(asyncTasks, function(response){
res.setHeader('Content-Type', 'application/json');
res.json({'status':'success', 'value':response});
});
});
and another task in S1DataParse:
function S1DataParse(){
if(data.type='foo'){
asyncTasks.push(function(callback){
callback(data);
});
}
}
But, of course, the second task never added to the asyncTasks array. I really got stuck with that.
Can you please help me with that?
Thanks
-=-=-=- Edit -=-=-=-
Eventually, I came accross with events and EventEmitter().
From the POST request I call the function that sends requests to the data server (DataServerClientGet).
In this function I register a listener which will get the future data.
eventEmitter.on('getData', returnDataServerData);
It all works great except for one thing. Whenever I refresh the page or add other POST requests, I get an error:
Error: Can't set headers after they are sent.
It would be great if I solve this problem. Help me, please.
Thanks ;)
The whole code looks like this:
var express = require('express');
var app = express();
var http = require('http').Server(app);
var bodyParser = require('body-parser')
var net = require('net');
var events = require('events');
var dataServerHost = '127.0.0.1';
var dataServerPort = 12345;
var dataServerClient;
var logMsg;
var eventEmitter = new events.EventEmitter();
/*******************************************/
// Init
/*******************************************/
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({extended: true}));
app.use(express.static(__dirname + '/public'));
/*******************************************/
// Connect to the data server
/*******************************************/
DataServerConnect();
/*******************************************/
// Open listener on port 3000 (to browser)
/*******************************************/
http.listen(3000, function(){
logMsg = 'listening on *:3000';
console.log(logMsg);
});
/*******************************************/
// Routing
/*******************************************/
app.get('/', function(req, res){
res.sendFile(__dirname + '/index.html');
});
app.post('/GetDataFoo', function(req, res){
var msg;
var size;
msg ='\n{"Type":"Query", "SubType":"GetDataFoo","SearchFilter":""}';
size = msg.length;
logMsg = 'Client to DataServer: GetDataFoo';
console.log(logMsg);
DataServerClientGet('GetDataFoo', size, msg, res);
});
/*******************************************/
// Functions
/*******************************************/
function DataServerConnect(){
dataServerClient = net.createConnection({'host':dataServerHost, 'port': dataServerPort}, function(){
logMsg = 'Connected to DataServer ['+dataServerHost+':'+dataServerPort+']';
console.log(logMsg);
});
dataServerClient.on('data', function(data){
logMsg = 'DataServerData>>>\n'+data.toString()+'DataServerData<<<';
console.log(logMsg);
DataServerDataParse(data.toString());
});
dataServerClient.on('end', function(){
logMsg = 'Disconnected from DataServer';
console.log(logMsg);
});
}
function DataServerClientGet(type, size, msg, res){
dataServerClient.write('Type: Json\nSize: '+size+'\n\n'+msg, function(err){
var returnDataServerData = function returnDataServerData(results){
res.setHeader('Content-Type', 'application/json');
res.json({'status':'success', 'value':results});
}
eventEmitter.on('getData', returnDataServerData);
}
function DataServerDataParse(json){
if(json.Type=='GetDataFoo')
{
var MessageList = json.MessageList;
eventEmitter.emit('getData', MessageList);
}
}
-=-=-=- Edit -=-=-=-
The Error: Can't set headers after they are sent. caused by adding the same listener of the same type each time the DataServerClientGet was called and the res was sending multiple times.
I solved this one by adding: removeListener(event, listener)
right after the res, inside the function. Anyway, I think it's wrong and can cause problems if there will be multiple calling to DataServerClientGet with the same type etc.
There is a optional callback parameter that you can pass to write function(docs), something like :
S1.write({'type':'foo'},function(err){
if(err){
//Handle error
}else{
res.setHeader('Content-Type', 'application/json');
res.json({'status':'success', 'value':response});
}
})
This can work with post route , but in your 'data' listener ,you cant send data from server to client when there is not connection initialized by client (it is not bidireccional ) if you want bidireccional behavior you can check socket.io
I have a node.js server communicating to a client web page, sending it message. This is working great based on the many tutorials and searching stack overflow :)
Where I am having an issue is when I attempt to startup a separate socket connection to a 3rd party cmd line executable instance runs as a socket server. The 3rd party executable does not adhere to the socket.io namespace/room type of events, so I read that socket.io-events may help where instead of:
socket.on('some key', function(){/**do stuff*/}); I could:
eventRouter.on('*', function(){/*do stuff*/});
For this communication, I am assuming I need to use socket.io-client on the node.js side in order to talk to the cmd executable, but I am getting exceptions trying to do a socket2.use(router); where socket2 is my socket.io-client and router is the socket.io-events object.
All runs on localhost, node to web page is port 8001 and node.js to executable is on port 8002. Please pardon the code, for I have been trying to get this to work for a few days and is a bit ugly now.
The cmd executable to execute and its arguments I have coming from the web page which works. I am able to start the exe. The EXE expects a ACK on each message sent, thus why you see the code emitting it back.
I have a interval where I set and update an element on the web page. I have another element that I set messages (msg).
var http = require('http');
var url = require('url');
var fs = require('fs');
var server;
server = http.createServer(function(req, res){
// your normal server code
var path = url.parse(req.url).pathname;
switch (path){
case '/':
res.writeHead(200, {'Content-Type': 'text/html'});
res.write('<h1>Hello! Try the Test page </h1>');
res.end();
break;
case '/socket.html':
fs.readFile(__dirname + path, function(err, data){
if (err){
return send404(res);
}
res.writeHead(200, {'Content-Type': path == 'json.js' ? 'text/javascript' : 'text/html'});
res.write(data, 'utf8');
res.end();
});
break;
default: send404(res);
}
}),
send404 = function(res){
res.writeHead(404);
res.write('404');
res.end();
};
server.listen(8001);
var str = "ack0";
var bytes = [];
for (var i = 0; i < str.length; ++i) {
bytes.push(str.charCodeAt(i));
}
// use socket.io
var io = require('socket.io').listen(server);
// define interactions with client
io.sockets.on('connection', function(socket){
//send data to client
setInterval(function(){
socket.emit('date', {'date': new Date()});
}, 1000);
//recieve client data
socket.on('client_data', function(data){
var spawn = require('child_process').spawn;
console.log('pre-spawned');
spawn(data.cmd, data.args, {});
setTimeout(function() {
console.log('hello world!');
}, 1000);
var aptIO = require('socket.io-client');
var router = require('socket.io-events')();
var socket2 = aptIO.connect('localhost:8002', {reconnect: true});
router.on('connection', function(s){
//send data to client
console.log('apt');
router.on('*', function(sock, args, next){
var name = args.shift(), msg = args.shift();
console.log(name + " " + JSON.stringify(msg));
sock.emit(bytes);
io.sockets.emit('msg', {'msg': JSON.stringify(msg)})
next();
});
s.emit(bytes);
});
console.log('spawned');
// getting runtime exceptions here...have tried various things...
socket2.use(router);
});
});
With the help from JGreenwell, I was able to resolve me issue.
I ended up having the node server communicate to the client html page via socket.io connection for messages. The node server would launch the cmd line executable providing it the port to connect to which is different from the socket.io port used.
Once started, the executable would communicate with the server via the net module. The server would just pass the information on to the socket.io connection. the js in the html page knows how to parse the message in order to increment the progress bar and list the messages in a text area control.
I took it even further by having the messages be broadcast-ed to multiple clients on the socket.io connection.
I have a button on a page that when it is clicked, i want the page to call some node.js server code. I have this wired up via socket.io. The issue I am running into is I need the socket.io code to redirect my web page based on some business logic. Not sure what is the best way to do that via express (res.redirect?) and also how to get access to that within the socket.io call. Any help would be appreciated!
App.js
var dashboard = require('./middleware/dashboard.js');
...
app.get("/dashboard", function (req, res) {
dashboard.show(req, res);
});
...
var http = require('http');
var server = http.createServer(app);
var io = require('socket.io').listen(server);
io.set('log level',2); // sets socket io log level 0=error, 1=warn, 2=info, 3=debug
...
dashboard.wireUpSocketIO(io);
...
dashboard.js
var savedResponse;
exports.show = function(req, res) {
savedResponse = res;
res.render("dashboard.jade"});
};
exports.wireUpSocketIO = function(io){
io.sockets.on('connection', function (socket) {
socket.on('dashboardOnButtonClick', function(msg) {
<biz logic>
savedResponse.render("someOtherPage.jade", {
locals: {
title: "someOtherPage",
filter: msg.filter
}
});
});
});
}
dashboard.jade
button#btnFilter(class='btn btn-info') Test Button
script(src="/socket.io/socket.io.js")
script .
var socket = io.connect('http://localhost');
var emitMessage = { blah: false, filter:"n/a"};
$('#btnFilter').click(function(){
emitMessage.blah = true;
emitMessage.filter = "filterByBlah";
socket.emit('dashboardOnButtonClick', emitMessage);
});
I could think it is easier to do from the client side, that is, javascript:
Once the server side logic is done, you send a message back to the client and change the window.location to the new one.
Once the server is done, the client can issue a request to that the server will respond with a HTTP 301/302 Redirect. This way you can call res.Redirect on that Express request.
Regards,
Here is my code:
var express = require('express'),
app = express.createServer(express.logger());
app.use(express.bodyParser());
app.get('/', function(request, response) {
var name = request.param('name', null);
response.header('Content-Type', 'text/event-stream');
var t = setInterval(function(){
response.write('Hello World by '+name+' (using http-1.1 streaming data!)<br />');
}, 2000);
/*request.socket.on('close', function() {
clearInterval(t);
}); */
});
var port = process.env.PORT || 5000;
app.listen(port, function() {
//clearInterval(t);
console.log("Listening on " + port);
});
I am using express because I am testing this in heroku and I used the guide there.
So when I open the app I get a download popup with the data in the file (downloaded) instead of the html (dom) response.
What did I do wrong? (is it a browser problem?)
Edit:
#Joe, has solved my initial problem but yet, I have two questions:
1) Why i don't need to insert a 'correct' content type? (of streaming?)
2) Why does this code out put only after along time (about half a minute).?
If you are trying to display it as HTML you will want your Content-Type to be text/html