Make parallel requests to a Selenium Webdriver grid - node.js

I'm trying to use a selenium server grid to run multiple commands in parallel.
Here is my first test code:
var webdriver = require('selenium-webdriver');
for(var u = 0; u < 3; u++) {
makeScreenshot('foo/test' + u + '.png');
}
function makeScreenshot(path) {
var driver = new webdriver.Builder().forBrowser('firefox').usingServer('http://someurl:44111/wd/hub/').build();
console.log('Get');
driver.get('http://www.somepage.com').then(function() {
console.log('Screenshot');
driver.takeScreenshot().then(function(data){
console.log(path);
//var decodedImage = new Buffer(data, 'base64')
driver.quit();
});
});
}
That is the result:
Get
Get
Get
Screenshot
foo/test0.png
Screenshot
foo/test1.png
Screenshot
foo/test2.png
screenshot of requests
The "Get" appears immediately in sequence, "driver.get" creates a promise. My idea here is that the three requests are made asynchronously and thus appear almost simultaneously. But as you can see in the screenshot they will be made one after the other.
The grid definitely has enough selenium instances so why isn't the driver working in parallel?
It seems to me that "new webdriver.Builder()" creates some kind of singleton that doesn't work async but waits for the previous request to finish!?
Thanks for any help!

The answer may be multiple control flows:
WebDriverJS supports defining "parallel" flows using
webdriver.promise.createFlow(). This function accepts a callback which
will be passed the newly created flow. Tasks scheduled within this
flow will be synchronized with each other, but will remain independent
of any other control flows. Each call to createFlow() returns a
promise that will resolve when the flow has completed.
The example at the end of the chapter (which I'll quite verbatim) shows multiple Google search terms being tested concurrently:
var terms = [
'javascript',
'selenium',
'webdriver'
];
var flows = terms.map(function(term) {
return webdriver.promise.createFlow(function() {
var driver = new webdriver.Builder().build();
driver.get('http://www.google.com');
driver.findElement(webdriver.By.name('q')).sendKeys(term);
driver.findElement(webdriver.By.name('btnG')).click();
driver.getTitle().then(function(title) {
if (title !== (term + ' - Google Search')) {
throw Error('Unexpected title: ' + title);
}
});
});
});
webdriver.promise.fullyResolved(flows).then(function() {
console.log('All tests passed!');
});
It should be easy enough to add your custom driver build and lookups into that example. Perhaps the following:
var flows = [0,1,2,3].map(function(index) {
return webdriver.promise.createFlow(function() {
var driver = new webdriver.Builder().forBrowser('firefox').usingServer('http://someurl:44111/wd/hub/').build();
console.log('Get');
driver.get('http://www.somepage.com').then(function() {
console.log('Screenshot');
driver.takeScreenshot().then(function(data){
console.log('foo/test' + index + '.png');
//var decodedImage = new Buffer(data, 'base64')
driver.quit();
});
});
});
});

Related

How to wait in node.js for a variable available on the cloud to have a specific value

I'm sorry if this is a basic question, but I am trying to implement a program in node.js that should wait for the value of a variable available trough a request to a cloud api (photon.variable()) to be 1. This variable should not be requested more than once per second. My first attempt is included in the sample code below. Despite knowing it does not work at all, I think it could be useful to show the functionality I would like to implement.
var photondata = 0;
while (photondata < 1)
{
setTimeout(function () {
photon.variable("witok", function(err, data) {
if (!err) {
console.log("data: ", data.result);
photondata = data.result;
}
else console.log(err);
})}, 1000);
}
Since you couldn't do async stuff in loops before, the traditional approach would be to create a function that adds itself to setTimeout for as long as needed, then calls some other function when it's done. You still need to do this in the browser if not using Babel.
These days, you can stop execution and wait for things to happen when using a generator function (which latest versions of Node now support). There are many libraries that will let you do this and I will advertise ours :)
CL.run(function* () {
var photondata = 0;
while (true) {
yield CL.try(function* () {
var data = yield photon.variable("witok", CL.cb());
console.log("data: ", data.result);
photondata = data.result;
}, function* (err) {
console.log(err.message);
});
if (photondata >= 1) break;
yield CL.sleep(1000);
}
// do whatever you need here
});

Return a value from phantomjs to nodejs

I'm using phantomjs using jquerygo library and am trying to this.
Visit a url
Click on a link and wait for it to load
Grab a particular tag and return it to nodejs for processing.
I realize that in phantomjs:
The execution is sandboxed, the web page has no access to the phantom object and it can't probe its own setting
But I should be able to return a simple string from the evaluate right?
But that is not working. My code is as follows:
var photogsScrapeCount = function(url, callback){
console.log("LOADED PHOTOGSSCRAPE Count");
url = decodeURIComponent(url);
//$.config.site = 'https://www.magnumphotos.com/';
$.config.addJQuery = false;
$.visit(url, function() {
$.waitForElement(".7n7np102",function() {
$.getPage(function(page) {
var imgCounterMinus = page.evaluate(function(){
$(".7n7np102 a").click(); // open the image enlarge
var temp = setTimeout(function(){
imgCounterMinus1 = $("span[id$='TotalPageCount_Lbl']").html();
imgCounterMinus1 = imgCounterMinus1.split(" ");
imgCounterMinus1 = imgCounterMinus1[2];
imgCounterMinus1 = parseInt(imgCounterMinus1);
console.log("imgCounterMinus1" + imgCounterMinus1);
return (imgCounterMinus1 - 3);
}, 4000);
return temp;
});
//console.log("After evaluate: " + imgCounterMinus)
});
});
});
};
Can this be achieved in any different way? The basic example from website is working so I am assuming that the setTimeout is giving me problems.
Any ideas or suggestions would be very helpful as I have very little experience in writing jquery, Js.
The docs say (emphasis mine):
For one, this library is not a complete API mirror of jQuery. Every API is asynchronous (due to its interaction with Phantom.js), so there are some differences.
There is also an example how page.evaluate() must be used. The result is not returned, but passed into a second callback. There is no way to return something from an asynchronous execution of a function except by using the callback. So the setTimeout syntax is also wrong.
$(".7n7np102 a").click(function(){
setTimeout(function(){
$.getPage(function(page) {
page.evaluate(function(){
var imgCounterMinus1 = $("span[id$='TotalPageCount_Lbl']").html();
imgCounterMinus1 = imgCounterMinus1.split(" ");
imgCounterMinus1 = imgCounterMinus1[2];
imgCounterMinus1 = parseInt(imgCounterMinus1);
console.log("imgCounterMinus1" + imgCounterMinus1);
return (imgCounterMinus1 - 3);
}, function(err, result){
console.log("After evaluate: " + result);
callback();
$.close();
});
});
}, 4000);
});

How use async functions with node-pdfkit?

I am trying to create a PDF file with PDFKit. I insert an image with like this:
var PDFDocument = require('pdfkit');
var doc = new PDFDocument();
doc.image(some_image_as_buffer);
and it is working like expected. But now want the image be trimmed and I found GraphicsMagick for node.js. But the problem that I have is to make it work with PDFKit. doc.image expects a filename or a buffer, but since I already have a buffer I want to work with buffers (there is no file anywhere because the buffer comes directly from the database).
The trimming works like this:
var gm = require('gm');
gm(some_image_as_buffer, 'image.png')
.trim()
.toBuffer(function(err, trimmed_image_buffer) {
// trimmed_image_buffer is correct,
// but I can't put it to the document like this:
doc.image(trimmed_image_buffer);
// beacause I don't know which page and/or position
// the doc is currently on, because of the asynchronous
// nature of this callback.
});
UPDATE:
For clarification: I want to be able to use the asynchronous trimmed image in the synchronous code for PDFKit. PDFKit only works synchronously and gm doesn't offer a synchronous interface.
UPDATE2:
var gm = require('gm');
gm(some_image_as_buffer, 'image.png')
.trim()
.toBuffer(function(err, trimmed_image_buffer) {
// trimmed_image_buffer is correct,
// but I can't put it to the document like this:
doc.image(trimmed_image_buffer);
// beacause I don't know which page and/or position
// the doc is currently on, because of the asynchronous
// nature of this callback.
});
doc.text('some text');
// is not guaranteed to run after image is inserted
// and a couple of hundred lines more
After the last line in this example there are a lot more lines of code which add content to the PDF, but I don't want to put everything (couple of hundred lines) in one callback just because I need on asynchronous function to manipulate the image.
Is there any way to make this manipulation synchronous?
UPDATE_2
You basically ask for stopping execution of a code until some asynchronous operation has completed. For sure it is not possible in general case.
In case of gm module, it is not possible either. The gm module spawns a new process for executing a command (in your case trim()) and the API for spawning new processes is asynchronous in its very nature.
UPDATE
To make use of promise in your scenario:
var gm = require('gm'),
Q = require('Q'),
PDFDocument = require('pdfkit'),
doc = new PDFDocument();
function getTrimmedImage(some_image_as_buffer){
var deferred = Q.defer();
gm(some_image_as_buffer, 'image.png')
.trim()
.toBuffer(function(err, trimmed_image_buffer) {
if(err) { deferred.reject(err); }
else { deferred.resolve(trimmed_image_buffer); }
});
return deferred.promise;
}
// here goes all manipulations before the trimmed image is inserted
getTrimmedImage(some_image_as_buffer).then(
function(trimmed_image_buffer){
doc.image(trimmed_image_buffer);
// here goes all manipulations after the trimmed image is inserted
}
);
As I wrote in the comment above, a promise based solution should work elegantly. I use Q library, but any other promise library will do the job, as well.
One option would be to collect all resources of asynchronous nature before starting manipulating the pdf. Then you are guaranteed that no race condition occur, though it may slow down the whole process. I used a toy example to have it working in the browser environment, let me know if you have any problems converting it to your use case:
function getAsyncResource(){
var defer = Q.defer();
setTimeout(function(){
var result = "Some value: " + Date.now();
console.log("Async resource resolved: " + result);
defer.resolve(result);
}, Math.random() * 5000);
return defer.promise;
}
function someOperationThatNeedsAsyncResources(A, B, C){
console.log("Have all resources: ", A, B, C);
}
var A = getAsyncResource(),
B = getAsyncResource(),
C = getAsyncResource();
Q.all([A,B,C]).spread(someOperationThatNeedsAsyncResources);
<script src="//cdnjs.cloudflare.com/ajax/libs/q.js/1.1.2/q.js"></script>
Other option would be to split the process into steps, like so:
function getAsyncResource(value){
var defer = Q.defer();
setTimeout(function(){
var result = "Some value: " + value;
console.log("Async resource resolved: " + result);
defer.resolve(result);
}, Math.random() * 5000);
return defer.promise;
}
function nextStep(resource){
console.log("Next step: " + resource);
}
var A = getAsyncResource("A"),
B = getAsyncResource("B"),
C = getAsyncResource("C");
A.then(nextStep)
.then(function(){return B;})
.then(nextStep)
.then(function(){return C;})
.then(nextStep);
<script src="//cdnjs.cloudflare.com/ajax/libs/q.js/1.1.2/q.js"></script>

Node.js sending an object with function definitions to worker thread

So I am working on a project in Node.js and I want to open up some extra threads to handle the processing load more efficiently. But I am using classes with function definitions with them and when I try to send those objects to the worker thread, the functions defined in the object disappear and I am only left with the other fields in the object. Is there a way to send the worker an object and preserve the functions so they can be called within the worker?
var cluster = require('cluster');
if(cluster.isMaster){
Monster = function(species){
this.attack = function(){
console.log('CHOMP');
};
this.name = species;
};
var vamp = new Monster('vampire'),
worker = cluster.fork();
worker.send({'monster' : vamp});
}
else{
process.on('message', function(msg) {
console.log(msg.monster); //this logs "{ name: 'vampire' }"
msg.monster.attack(); //TypeError: Object #<Object> has no method 'attack'
});
}
No, there is no way to pass functions between threads. You can pass only JS plain objects (data only) and handle it with functions defined in current thread (like create new object with received data).
Charlie, I realize you asked this question a year ago, but I was wanting to do something very similar and I came across your question which you didn't mark an answer to yet. I thought I would take a "stab" at it and show you what I have done with your code. This different way of organizing code is for me a very acceptable workaround in my node.js work. I am pretty sure this gives you a way to accomplish what you want, even though you can't do it in the manner you wanted.
Declare your "class" outside the cluster code, like this:
var cluster = require('cluster');
var Monster = function(species){
this.attack = function(){
console.log('CHOMP!');
};
this.die = function() {
console.log("Oh, what did I eat? I don't feel so good....\r\n");
process.exit(0);
};
this.scare = function() {
console.log("BOO! I am a " + this.name + "!");
};
this.name = species;
};
if(cluster.isMaster){
worker = cluster.fork();
worker.send({'species' : 'Vampire'});
}
else{
process.on('message', function(msg) {
if(typeof msg.species !== "undefined") {
myMonster = new Monster(msg.species);
myMonster.scare();
myMonster.attack();
myMonster.die();
}
});
}
Give that a whirl and see if this is an answer you can accept!
Ok, stumbled upon this answer, and I found it strange that no one brought this up, but it might be a more modern feature than the question:
eval
let str = "() => { console.log('test') }"
let func = eval(str)
func()
Think it's obvious what's going on here, you can parse any string to javascript, and you can send strings to workers, so you can build and object with functions:
let obj = { a: "() => { ... }" }
and send the object over. (JSON.stringify(obj) first, and than you will have to parse the object first, and than all the substrings seperately)

http.Clientrequest.abort() ends program

I'm having some issues using Node.js as a http client against an existing long polling server. I'm using 'http' and 'events' as requires.
I've created a wrapper object that contains the logic for handling the http.clientrequest. Here's a simplified version of the code. It works exactly as expected. When I call EndMe it aborts the request as anticipated.
var http = require('http');
var events = require('events');
function lpTest(urlHost,urlPath){
this.options = {
host: urlHost,
port: 80,
path: urlPath,
method: 'GET'
};
var req = {};
events.EventEmitter.call(this);
}
lpTest.super_ = events.EventEmitter;
lpTest.prototype = Object.create(events.EventEmitter.prototype, {
constructor: {
value: lpTest,
enumerable: false
}
});
lpTest.prototype.getData = function getData(){
this.req = http.request(this.options, function(res){
var httpData = "";
res.on('data', function(chunk){
httpData += chunk;
});
res.on('end', function(){
this.emit('res_complete', httpData);
}
};
}
lpTest.prototype.EndMe = function EndMe(){
this.req.abort();
}
module.exports = lpTest;
Now I want to create a bunch of these objects and use them to long poll a bunch of URL's. So I create an object to contain them all, generate each object individually, initiate it, then store it in my containing object. This works a treat, all of the stored long-polling objects fire events and return the data as expected.
var lpObject = require('./lpTest.js');
var objWatchers = {};
function DoSomething(hostURL, hostPath){
var tempLP = new lpObject(hostURL,hostPath);
tempLP.on('res_complete', function(httpData){
console.log(httpData);
this.getData();
});
objWatchers[hosturl + hostPath] = tempLP;
}
DoSomething('firsturl.com','firstpath');
DoSomething('secondurl.com','secondpath);
objWatchers['firsturl.com' + 'firstpath'].getData();
objWatchers['secondurl.com' + 'secondpath'].getData();
Now here's where it fails... I want to be able to stop a long-polling object while leaving the rest going. So naturally I try adding:
objWatchers['firsturl.com' + 'firstpath'].EndMe();
But this causes the entire node execution to cease and return me to the command line. All of the remaining long-polling objects, that are happily doing what they're supposed to do, suddenly stop.
Any ideas?
Could it have something to do with the fact that you are only calling getData() when the data is being returned?
Fixed code:
function DoSomething(hostURL, hostPath){
var tempLP = new lpObject(hostURL,hostPath);
tempLP.on('res_complete', function(httpData){
console.log(httpData);
});
tempLP.getData();
objWatchers[hosturl + hostPath] = tempLP;
}
I have seemingly solved this, although I'm note entirely happy with how it works:
var timeout = setTimeout(function(){
objWatchers['firsturl.com' + 'firstpath'].EndMe();
}, 100);
By calling the closing function on the object after a delay I seem to be able to preserve the program execution. Not exactly ideal, but I'll take it! If anyone can offer a better method please feel free to let me know :)

Resources