AWS Lambda Nodejs 12.x using Selenium: Chrome Driver not found

AWS Lambda Nodejs 12.x using Selenium: Chrome Driver not found - node.js

When I run my nodejs 12.x code calling selenium, I am getting the error below.
At the top level of my directory structure, I have my index.js file, a lib folder and a node_modules folder.
I am preparing the code on a Mac and then zipping it, uploading to S3 and running it in Lambda.
Where should my chrome and chromedriver executables be? I don't think I can use npm to install them as I think I need the linux versions for running in Lambda?
This is the error I am getting:
{
"errorType": "Error",
"errorMessage": "The ChromeDriver could not be found on the current PATH. Please download the latest version of the ChromeDriver from http://chromedriver.storage.googleapis.com/index.html and ensure it can be found on your PATH.",
"trace": [
"Error: The ChromeDriver could not be found on the current PATH. Please download the latest version of the ChromeDriver from http://chromedriver.storage.googleapis.com/index.html and ensure it can be found on your PATH.",
" at new ServiceBuilder (/var/task/node_modules/selenium-webdriver/chrome.js:232:13)",
" at getDefaultService (/var/task/node_modules/selenium-webdriver/chrome.js:321:22)",
" at Function.createSession (/var/task/node_modules/selenium-webdriver/chrome.js:695:44)",
" at createDriver (/var/task/node_modules/selenium-webdriver/index.js:155:33)",
" at Builder.build (/var/task/node_modules/selenium-webdriver/index.js:662:16)",
" at Runtime.exports.handler (/var/task/index.js:38:26)",
" at Runtime.handleOnce (/var/runtime/Runtime.js:66:25)"
]
}
This is my code
'use strict';
exports.handler = async (event, context, callback) => {
var webdriver = require('selenium-webdriver');
var chrome = require('selenium-webdriver/chrome');
var builder = new webdriver.Builder().forBrowser('chrome');
var chromeOptions = new chrome.Options();
const defaultChromeFlags = [
'--headless',
'--disable-gpu',
'--window-size=1280x1696', // Letter size
'--no-sandbox',
'--user-data-dir=/tmp/user-data',
'--hide-scrollbars',
'--enable-logging',
'--log-level=0',
'--v=99',
'--single-process',
'--data-path=/tmp/data-path',
'--ignore-certificate-errors',
'--homedir=/tmp',
'--disk-cache-dir=/tmp/cache-dir'
];
chromeOptions.setChromeBinaryPath("/var/task/lib/chrome");
chromeOptions.addArguments(defaultChromeFlags);
builder.setChromeOptions(chromeOptions);
//*****this is the problem line****//
var driver = builder.build();
driver.get(event.url);
driver.getTitle().then(function(title) {
console.log("Page title for " + event.url + " is " + title)
callback(null, 'Page title for ' + event.url + ' is ' + title);
});
driver.quit();
};

Check this:
https://github.com/blackboard/lambda-selenium/blob/master/README.md
Welcome to the lambda-selenium project! The purpose of this project is to show how to use Selenium Webdriver with Amazon Web Services (AWS) Lambda compute service.
or
https://github.com/smithclay/lambdium/blob/master/README.md
Lambdium uses Selenium Webdriver with Headless Chromium to run Webdriver scripts written in JavaScript on AWS Lambda.

You can use chrome-aws-lambda package for running it headless in your lambda function , As per Documentation 512 MB of ram will be used https://www.npmjs.com/package/chrome-aws-lambda

Related

Synology NodeJS Selenium - Server terminated early with status 127

I read a lot of similar issue but nothing indicate works ...
I'm on Synology - DSM 7.1 (Debian) and my code is
const chrome = require('selenium-webdriver/chrome');
const chromedriver = require('chromedriver');
const webdriver = require('selenium-webdriver');
//const path = require('chromedriver').path;
const {By, until, Builder} = require('selenium-webdriver');
exports.getInfoFromUrl = async(url) => {
// Lancement du webdriver pour scrapper Bet Assistant
//let service = new chrome.ServiceBuilder().build();
//chrome.setDefaultService(service);
//var driver = new webdriver.Builder(path).withCapabilities(webdriver.Capabilities.chrome()).build();
const options = new chrome.Options();
options.addArguments(
'--no-sandbox',
'headless',
'disable-gpu',
'--disable-dev-shm-usage'
);
var driver = new webdriver.Builder(chromedriver.path)
//.forBrowser('chrome')
.withCapabilities(webdriver.Capabilities.chrome())
.setChromeOptions(options)
.build();
/*chrome.setDefaultService(new chrome.ServiceBuilder(chromedriver.path).build());
var driver = new webdriver.Builder(chromedriver.path)
.setChromeOptions(new chrome.Options().addArguments(['--no-sandbox','-headless', '--disable-dev-shm-usage']))
.build();
*/
driver.get(url);
}
When I execute this code with "node script.js" I get this error :
/volume1/web/betassistant/node_modules/selenium-webdriver/remote/index.js:248
reject(Error(e.message))
^
Error: Server terminated early with status 127
at /volume1/web/betassistant/node_modules/selenium-webdriver/remote/index.js:248:24
at processTicksAndRejections (node:internal/process/task_queues:96:5)
I try several sample or code to run webdriver but nothing works. I see some of user install "default-jre" (How do I solve "Server terminated early with status 127" when running node.js on Linux?) but I don't have "apt-get" and I think JRE don't be need on DSM.
Some help will be appreciate :)

Cannot read property 'filename' of undefined Using html-pdf with lambda

I'm experiencing problems when I try to create a PDF from HTML using Lambda Function, I'm receiving the error below:
{
"errorType": "TypeError",
"errorMessage": "Cannot read property 'filename' of undefined",
"trace": [
"TypeError: Cannot read property 'filename' of undefined",
" at execPdfToBuffer (/var/task/node_modules/html-pdf/lib/pdf.js:48:21)",
" at ChildProcess.respond (/var/task/node_modules/html-pdf/lib/pdf.js:144:5)",
" at ChildProcess.emit (events.js:314:20)",
" at ChildProcess.EventEmitter.emit (domain.js:483:12)",
" at Process.ChildProcess._handle.onexit (internal/child_process.js:276:12)"
]
}
My code is:
try{
let arq = await new Promise((res,rej)=>{
pdf.create(html,{
format: "Letter",
orientation: "portrait",
phantomPath: '/opt/phantomjs_linux-x86_64'
}).toBuffer(function(err, buffer){
if (err){
rej(false);
}else{
res(buffer);
}
});
});
const params = {
Key: 'teste.pdf',
Body: arq, // <---------
Bucket: 'temp'
};
let S3 = new AWS.S3();
let response = await S3.upload(params).promise();
if (response){
return true;
}else{
return false;
}
}catch(err){
console.log(err);
return false;
}
I've read this topic: html-pdf package is not working on aws lambda
But the proposed solution didn't work for me.
Thank you in advance

In the version 3.0.1 you can set the flag localUrlAccess: true, , it will work

I ran into this issue, too. I fixed it by downgrading html-pdf from version 3.0.1 to version 2.2.0.
The release notes for the newer version are less than inspiring: "Not sure this module is even usable without installing phantomjs manually"
Perhaps there's a way to make version 3 work with lambda. But downgrading was a quick win for me.

Selenium testing with AWS Lambda Node.js 12.x - Error saying "Chromedriver not found"

I've been trying to run my Selenium tests on AWS Lambda with runtime Node.js 12.x, but I've been running into an error.
{
"errorType": "Error",
"errorMessage": "The ChromeDriver could not be found on the current PATH. Please download the latest version of the ChromeDriver from http://chromedriver.storage.googleapis.com/index.html and ensure it can be found on your PATH.",
"trace": [
"Error: The ChromeDriver could not be found on the current PATH. Please download the latest version of the ChromeDriver from http://chromedriver.storage.googleapis.com/index.html and ensure it can be found on your PATH.",
" at new ServiceBuilder (/var/task/aws-Lambda-Selenium-2.0/node_modules/selenium-webdriver/chrome.js:232:13)",
" at getDefaultService (/var/task/aws-Lambda-Selenium-2.0/node_modules/selenium-webdriver/chrome.js:321:22)",
" at Function.createSession (/var/task/aws-Lambda-Selenium-2.0/node_modules/selenium-webdriver/chrome.js:695:44)",
" at createDriver (/var/task/aws-Lambda-Selenium-2.0/node_modules/selenium-webdriver/index.js:155:33)",
" at Builder.build (/var/task/aws-Lambda-Selenium-2.0/node_modules/selenium-webdriver/index.js:662:16)",
" at Runtime.module.exports.testFn [as handler] (/var/task/aws-Lambda-Selenium-2.0/handler.js:35:32)",
" at Runtime.handleOnce (/var/runtime/Runtime.js:66:25)"
]
}
I know that a similar question has been answered recently, but I've used the chrome-aws-lambda npm package and this still hasn't worked. My Lambda function has 2GB of memory at its disposal, and I've limited the time outs to 5 min. Below I have placed the code of my handler.js. Any help would be greatly appreciated.
'use strict';
module.exports.testFn = async (event, context, callback) =>
{
const webdriver = require("selenium-webdriver");
const chrome = require('selenium-webdriver/chrome');
const builder = new webdriver.Builder().forBrowser('chrome');
const chromeOptions = new chrome.Options();
const defaultChromeFlags =
[
'--headless',
'--disable-gpu',
'--disable-dev-shm-usage',
'--window-size=1280x1696',
'--no-sandbox',
'--user-data-dir=/tmp/user-data',
'--hide-scrollbars',
'--enable-logging',
'--log-level=0',
'--v=99',
'--single-process',
'--data-path=/tmp/data-path',
'--ignore-certificate-errors',
'--homedir=/tmp',
'--disk-cache-dir=/tmp/cache-dir'
];
const HEADLESS_CHROME_PATH = '/var/task/aws-Lambda-Selenium-2.0/node_modules/chrome-aws-lambda/bin/chromium.br';
chromeOptions.setChromeBinaryPath(HEADLESS_CHROME_PATH);
chromeOptions.addArguments(defaultChromeFlags);
builder.setChromeOptions(chromeOptions);
let driver = await builder.build(); // <-- program is not getting passed this line
await driver.get("https://www.google.com/");
await driver.getTitle().then(() =>
{
console.log("Page title for " + "https://www.google.com/" + " is ");
});
await driver.quit();
}

Configure Puppeteer executablePath chrome in your local Windows

Puppeteer version : 1.11.0
Platform / OS version: Windows 10 pro
Node.js version: 12.6.6
When I did a local development test in windows, happen was problem in executablePath.
"Failed to launch chrome! spawn /usr/bin/chromium-browser ENOENT"
I saw for windows needs to get complete path. Otherwise cannot find chrome.exe
Default in code:
const browser = await puppeteer.launch({executablePath: '/path/to/Chrome'});
In windows it worked thus:
const browser = await puppeteer.launch({executablePath: 'C:\\your_workspace\\node_modules\\puppeteer\\.local-chromium\\win64-(version)\\chrome-win\\chrome.exe'});
In visual code suggest the path
Visual Code view explorer

You can also set the environment variable PUPPETEER_EXECUTABLE_PATH.
This is useful in conjunction with PUPPETEER_SKIP_CHROMIUM_DOWNLOAD set to true

Maybe this can help:
const osPlatform = os.platform(); // possible values are: 'darwin', 'freebsd', 'linux', 'sunos' or 'win32'
console.log('Scraper running on platform: ', osPlatform);
let executablePath;
if (/^win/i.test(osPlatform)) {
executablePath = '';
} else if (/^linux/i.test(osPlatform)) {
executablePath = '/usr/bin/google-chrome';
}

Protractor running tests on PhantomJS

I can't seem to get PhantomJS through a test successfully. I tried to integrate it into my project, but after that failed I tried to just run the basic Angular Docs samples and I'm getting the same issue. My steps so far:
npm install -g phantomjs
phantomjs --webdriver=9515 // ... GhostDriver - Main - running on port 9515
protractor protractorConf.js
This is the same file from the example with only browserName, and seleniumAddress port changed:
// An example configuration file.
exports.config = {
// The address of a running selenium server.
seleniumAddress: 'http://localhost:9515',
// Capabilities to be passed to the webdriver instance.
capabilities: {
'browserName': 'phantomjs'
},
// Spec patterns are relative to the current working directly when
// protractor is called.
specs: ['onProtractorRunner.js'],
// Options to be passed to Jasmine-node.
jasmineNodeOpts: {
showColors: true,
}
};
I get the following error message:
UnknownError: Error Message => 'Detected a page unload event; asynchronous script execution does not work across page loads.'
I found this issue on github, which seemed to be related. I thought I had made enough sense of their brower-setup.md to include it in one of my beforeEach functions. Then I found here ptor is just wrapping the driver anyway. Wow, I know I'm a noob here in protractor/selenium land, but the signal to noise ratio is intensively dissuasive. I'd really like to get the performance benefits of using PhantomJS, but the prospect of losing several more hours on this is hurting my head. I'm on Windows 7 Enterprise 64-bit, in case that matters. Thanks!

Acutally this fix was solving the same issue for me:
https://github.com/pschwartau/protractor/commit/1eeff8b1b2e3e8f3b7c8152264411f26d4665a07
As originally described here: https://github.com/angular/protractor/issues/85#issuecomment-26846255 by renanmartins
Inside protractor/lib/protractor.js Replace
this.driver.get('about:blank');
this.driver.executeScript(
'window.name = "' + DEFER_LABEL + '" + window.name;' +
'window.location.href = "' + destination + '"');
with
var driver = this.driver;
this.getCapabilities().then(function (capabilities) {
if (capabilities.caps_.browserName === 'phantomjs') {
driver.executeScript('window.name = "' + DEFER_LABEL + '" + window.name;');
driver.get(destination);
} else {
driver.get('about:blank');
driver.executeScript(
'window.name = "' + DEFER_LABEL + '" + window.name;' +
'window.location.href = "' + destination + '"');
}
// Make sure the page is an Angular page.
driver.executeAsyncScript(clientSideScripts.testForAngular, 10).
then(function(hasAngular) {
if (!hasAngular) {
throw new Error('Angular could not be found on the page ' +
destination);
}
});
});

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string