Selenium testing with AWS Lambda Node.js 12.x - Error saying "Chromedriver not found" - node.js

I've been trying to run my Selenium tests on AWS Lambda with runtime Node.js 12.x, but I've been running into an error.
{
"errorType": "Error",
"errorMessage": "The ChromeDriver could not be found on the current PATH. Please download the latest version of the ChromeDriver from http://chromedriver.storage.googleapis.com/index.html and ensure it can be found on your PATH.",
"trace": [
"Error: The ChromeDriver could not be found on the current PATH. Please download the latest version of the ChromeDriver from http://chromedriver.storage.googleapis.com/index.html and ensure it can be found on your PATH.",
" at new ServiceBuilder (/var/task/aws-Lambda-Selenium-2.0/node_modules/selenium-webdriver/chrome.js:232:13)",
" at getDefaultService (/var/task/aws-Lambda-Selenium-2.0/node_modules/selenium-webdriver/chrome.js:321:22)",
" at Function.createSession (/var/task/aws-Lambda-Selenium-2.0/node_modules/selenium-webdriver/chrome.js:695:44)",
" at createDriver (/var/task/aws-Lambda-Selenium-2.0/node_modules/selenium-webdriver/index.js:155:33)",
" at Builder.build (/var/task/aws-Lambda-Selenium-2.0/node_modules/selenium-webdriver/index.js:662:16)",
" at Runtime.module.exports.testFn [as handler] (/var/task/aws-Lambda-Selenium-2.0/handler.js:35:32)",
" at Runtime.handleOnce (/var/runtime/Runtime.js:66:25)"
]
}
I know that a similar question has been answered recently, but I've used the chrome-aws-lambda npm package and this still hasn't worked. My Lambda function has 2GB of memory at its disposal, and I've limited the time outs to 5 min. Below I have placed the code of my handler.js. Any help would be greatly appreciated.
'use strict';
module.exports.testFn = async (event, context, callback) =>
{
const webdriver = require("selenium-webdriver");
const chrome = require('selenium-webdriver/chrome');
const builder = new webdriver.Builder().forBrowser('chrome');
const chromeOptions = new chrome.Options();
const defaultChromeFlags =
[
'--headless',
'--disable-gpu',
'--disable-dev-shm-usage',
'--window-size=1280x1696',
'--no-sandbox',
'--user-data-dir=/tmp/user-data',
'--hide-scrollbars',
'--enable-logging',
'--log-level=0',
'--v=99',
'--single-process',
'--data-path=/tmp/data-path',
'--ignore-certificate-errors',
'--homedir=/tmp',
'--disk-cache-dir=/tmp/cache-dir'
];
const HEADLESS_CHROME_PATH = '/var/task/aws-Lambda-Selenium-2.0/node_modules/chrome-aws-lambda/bin/chromium.br';
chromeOptions.setChromeBinaryPath(HEADLESS_CHROME_PATH);
chromeOptions.addArguments(defaultChromeFlags);
builder.setChromeOptions(chromeOptions);
let driver = await builder.build(); // <-- program is not getting passed this line
await driver.get("https://www.google.com/");
await driver.getTitle().then(() =>
{
console.log("Page title for " + "https://www.google.com/" + " is ");
});
await driver.quit();
}

Related

Puppeteer: TypeError: Readable is not a constructor

I have been trying to use Puppeteer#15.5.0 to generate a PDF on the server side in Node.js.
import { launch } from 'puppeteer';
...
const browser = await launch();
const page = await browser.newPage();
await page.setContent('COME ON!');
console.log(await page.content());
const pdfBuffer = await page.pdf();
The console.log statement gives me the expected output of <html><head></head><body>COME ON!</body></html>
It then runs into the following error:
Error:
TypeError: Readable is not a constructor
at getReadableFromProtocolStream (/Users/kaziehsanaziz/Work/DocSpace/repos/docspace-pay/.webpack/service/src/public-lambda.js:405775:12)
at runMicrotasks (<anonymous>)
at processTicksAndRejections (internal/process/task_queues.js:95:5)
at async Page.pdf (/Users/kaziehsanaziz/Work/DocSpace/repos/docspace-pay/.webpack/service/src/public-lambda.js:403129:26)
at async /Users/kaziehsanaziz/Work/DocSpace/repos/docspace-pay/.webpack/service/src/public-lambda.js:329729:31
Puppeteer cannot be bundled using Webpack. The issue was that I was trying to do just that. In my case, since I was using Serverless, the solution was to tell the serverless-bundle plugin to not bundle the Puppeteer.
bundle:
packager: yarn
esbuild: true
forceExclude:
- aws-sdk
- puppeteer
externals:
- puppeteer-core
- '#sparticuz/chrome-aws-lambda'
The forceExclude is doing the trick here for the local environment. The external is what's helping the Production environment.
I have also run into this issue. It occurs when webpack (v5 on my end) bundles puppeteer. I have solved it by explicitly declaring webpack ignore directive when importing a file which uses puppeteer. I did this via dynamic es import, but a static one could be done in a very similar way:
const loadModule = async (modulePath) => {
try {
return await import(/* webpackIgnore: true */ modulePath)
} catch (e) {
throw new ImportError(`Unable to import module ${modulePath}`)
}
}
const renderPdf = (await loadModule('../../renderPdf/index.js')).default
use require puppeteer instead of import puppeteer statement

Synology NodeJS Selenium - Server terminated early with status 127

I read a lot of similar issue but nothing indicate works ...
I'm on Synology - DSM 7.1 (Debian) and my code is
const chrome = require('selenium-webdriver/chrome');
const chromedriver = require('chromedriver');
const webdriver = require('selenium-webdriver');
//const path = require('chromedriver').path;
const {By, until, Builder} = require('selenium-webdriver');
exports.getInfoFromUrl = async(url) => {
// Lancement du webdriver pour scrapper Bet Assistant
//let service = new chrome.ServiceBuilder().build();
//chrome.setDefaultService(service);
//var driver = new webdriver.Builder(path).withCapabilities(webdriver.Capabilities.chrome()).build();
const options = new chrome.Options();
options.addArguments(
'--no-sandbox',
'headless',
'disable-gpu',
'--disable-dev-shm-usage'
);
var driver = new webdriver.Builder(chromedriver.path)
//.forBrowser('chrome')
.withCapabilities(webdriver.Capabilities.chrome())
.setChromeOptions(options)
.build();
/*chrome.setDefaultService(new chrome.ServiceBuilder(chromedriver.path).build());
var driver = new webdriver.Builder(chromedriver.path)
.setChromeOptions(new chrome.Options().addArguments(['--no-sandbox','-headless', '--disable-dev-shm-usage']))
.build();
*/
driver.get(url);
}
When I execute this code with "node script.js" I get this error :
/volume1/web/betassistant/node_modules/selenium-webdriver/remote/index.js:248
reject(Error(e.message))
^
Error: Server terminated early with status 127
at /volume1/web/betassistant/node_modules/selenium-webdriver/remote/index.js:248:24
at processTicksAndRejections (node:internal/process/task_queues:96:5)
I try several sample or code to run webdriver but nothing works. I see some of user install "default-jre" (How do I solve "Server terminated early with status 127" when running node.js on Linux?) but I don't have "apt-get" and I think JRE don't be need on DSM.
Some help will be appreciate :)

Cannot read property 'filename' of undefined Using html-pdf with lambda

I'm experiencing problems when I try to create a PDF from HTML using Lambda Function, I'm receiving the error below:
{
"errorType": "TypeError",
"errorMessage": "Cannot read property 'filename' of undefined",
"trace": [
"TypeError: Cannot read property 'filename' of undefined",
" at execPdfToBuffer (/var/task/node_modules/html-pdf/lib/pdf.js:48:21)",
" at ChildProcess.respond (/var/task/node_modules/html-pdf/lib/pdf.js:144:5)",
" at ChildProcess.emit (events.js:314:20)",
" at ChildProcess.EventEmitter.emit (domain.js:483:12)",
" at Process.ChildProcess._handle.onexit (internal/child_process.js:276:12)"
]
}
My code is:
try{
let arq = await new Promise((res,rej)=>{
pdf.create(html,{
format: "Letter",
orientation: "portrait",
phantomPath: '/opt/phantomjs_linux-x86_64'
}).toBuffer(function(err, buffer){
if (err){
rej(false);
}else{
res(buffer);
}
});
});
const params = {
Key: 'teste.pdf',
Body: arq, // <---------
Bucket: 'temp'
};
let S3 = new AWS.S3();
let response = await S3.upload(params).promise();
if (response){
return true;
}else{
return false;
}
}catch(err){
console.log(err);
return false;
}
I've read this topic: html-pdf package is not working on aws lambda
But the proposed solution didn't work for me.
Thank you in advance
In the version 3.0.1 you can set the flag localUrlAccess: true, , it will work
I ran into this issue, too. I fixed it by downgrading html-pdf from version 3.0.1 to version 2.2.0.
The release notes for the newer version are less than inspiring: "Not sure this module is even usable without installing phantomjs manually"
Perhaps there's a way to make version 3 work with lambda. But downgrading was a quick win for me.

Selenium webdriver timeouts don't set when usingServer

I try a lot to set Timeout but it still uses the 300-second default timeout
const { Builder } = require('selenium-webdriver');
let driver = await new Builder()
.forBrowser('firefox')
.usingServer('http://selenium-server:4000/wd/hub')
.build();
const capabilities = await driver.getCapabilities();
capabilities['map_'].set('timeouts', { implicit: 0, pageLoad: 60000, script: 30000 });
await driver.get(url);
I also try driver.manage().timeouts() but got error: driver.manage(...).timeouts is not a function
"selenium-webdriver": "^4.0.0"
There is no stable version of selenium 4. At least I tried 4.0.0-alpha-6 and this version has a lot of problems.
Let's try 3.6.0 instead.
Then this worked:
driver.manage().timeouts().pageLoadTimeout(60000);

AWS Lambda Nodejs 12.x using Selenium: Chrome Driver not found

When I run my nodejs 12.x code calling selenium, I am getting the error below.
At the top level of my directory structure, I have my index.js file, a lib folder and a node_modules folder.
I am preparing the code on a Mac and then zipping it, uploading to S3 and running it in Lambda.
Where should my chrome and chromedriver executables be? I don't think I can use npm to install them as I think I need the linux versions for running in Lambda?
This is the error I am getting:
{
"errorType": "Error",
"errorMessage": "The ChromeDriver could not be found on the current PATH. Please download the latest version of the ChromeDriver from http://chromedriver.storage.googleapis.com/index.html and ensure it can be found on your PATH.",
"trace": [
"Error: The ChromeDriver could not be found on the current PATH. Please download the latest version of the ChromeDriver from http://chromedriver.storage.googleapis.com/index.html and ensure it can be found on your PATH.",
" at new ServiceBuilder (/var/task/node_modules/selenium-webdriver/chrome.js:232:13)",
" at getDefaultService (/var/task/node_modules/selenium-webdriver/chrome.js:321:22)",
" at Function.createSession (/var/task/node_modules/selenium-webdriver/chrome.js:695:44)",
" at createDriver (/var/task/node_modules/selenium-webdriver/index.js:155:33)",
" at Builder.build (/var/task/node_modules/selenium-webdriver/index.js:662:16)",
" at Runtime.exports.handler (/var/task/index.js:38:26)",
" at Runtime.handleOnce (/var/runtime/Runtime.js:66:25)"
]
}
This is my code
'use strict';
exports.handler = async (event, context, callback) => {
var webdriver = require('selenium-webdriver');
var chrome = require('selenium-webdriver/chrome');
var builder = new webdriver.Builder().forBrowser('chrome');
var chromeOptions = new chrome.Options();
const defaultChromeFlags = [
'--headless',
'--disable-gpu',
'--window-size=1280x1696', // Letter size
'--no-sandbox',
'--user-data-dir=/tmp/user-data',
'--hide-scrollbars',
'--enable-logging',
'--log-level=0',
'--v=99',
'--single-process',
'--data-path=/tmp/data-path',
'--ignore-certificate-errors',
'--homedir=/tmp',
'--disk-cache-dir=/tmp/cache-dir'
];
chromeOptions.setChromeBinaryPath("/var/task/lib/chrome");
chromeOptions.addArguments(defaultChromeFlags);
builder.setChromeOptions(chromeOptions);
//*****this is the problem line****//
var driver = builder.build();
driver.get(event.url);
driver.getTitle().then(function(title) {
console.log("Page title for " + event.url + " is " + title)
callback(null, 'Page title for ' + event.url + ' is ' + title);
});
driver.quit();
};
Check this:
https://github.com/blackboard/lambda-selenium/blob/master/README.md
Welcome to the lambda-selenium project! The purpose of this project is to show how to use Selenium Webdriver with Amazon Web Services (AWS) Lambda compute service.
or
https://github.com/smithclay/lambdium/blob/master/README.md
Lambdium uses Selenium Webdriver with Headless Chromium to run Webdriver scripts written in JavaScript on AWS Lambda.
You can use chrome-aws-lambda package for running it headless in your lambda function , As per Documentation 512 MB of ram will be used https://www.npmjs.com/package/chrome-aws-lambda

Resources