puppeteer with aws lambda function ro scrape data .Failed to launch chrome - node.js

I face some issue while using puppeteer with aws lambda Failed to launch chrome! node version 12.x
{ errorType: 'Error',
errorMessage:
'Failed to launch chrome! spawn ./node_modules/puppeteer/.local-chromium/linux-722234/chrome-linux/chrome ENOENT\n\n\nTROUBLESHOOTING: https://github.com/GoogleChrome/puppeteer/blob/master/docs/troubleshooting.md\n',
trace:
[ 'Error: Failed to launch chrome! spawn ./node_modules/puppeteer/.local-chromium/linux-722234/chrome-linux/chrome ENOENT',
'',
'',
'TROUBLESHOOTING: https://github.com/GoogleChrome/puppeteer/blob/master/docs/troubleshooting.md',
'',
' at onClose (/var/task/node_modules/puppeteer-core/lib/Launcher.js:342:14)',
' at ChildProcess.<anonymous> (/var/task/node_modules/puppeteer-core/lib/Launcher.js:333:64)',
' at ChildProcess.emit (events.js:223:5)',
' at Process.ChildProcess._handle.onexit (internal/child_process.js:270:12)',
' at onErrorNT (internal/child_process.js:456:16)',
' at processTicksAndRejections (internal/process/task_queues.js:81:21)' ] }```

you should use chrome-aws-lambda (https://github.com/alixaxel/chrome-aws-lambda)
just change the import to
const chromium = require('chrome-aws-lambda');
and call puppeteer call to
browser = await chromium.puppeteer.launch({
args: chromium.args,
defaultViewport: chromium.defaultViewport,
executablePath: await chromium.executablePath,
headless: chromium.headless,
ignoreHTTPSErrors: true,
});

Related

Unable to load script on puppeteer AWS Lambda

Im trying to add an external javascript file to puppeteer on Lambda.
const chromium = require('chrome-aws-lambda');
const browser = await chromium.puppeteer.launch({
....
});
const webPage = await browser.newPage();
await webPage.goto(url, { waitUntil: 'networkidle2', timeout: 0 });
await webPage.addScriptTag({
url: `https://code.jquery.com/jquery-3.6.0.min.js`,
});
But unable to load the script
ERROR Error: Loading script from https://code.jquery.com/jquery-3.6.0.min.js failed
at DOMWorld.addScriptTag (/var/task/node_modules/puppeteer-core/lib/cjs/puppeteer/common/DOMWorld.js:171:23)
at processTicksAndRejections (internal/process/task_queues.js:95:5)
at async I (/var/task/src/functions/preparePages/handler.js:20:3689)
at async Runtime.E [as handler] (/var/task/src/functions/preparePages/handler.js:20:5400)

Puppeteer Fails to launch the browser

After creating a directory with an index.js file with the following code:
const puppeteer = require('puppeteer');
async function main() {
const browser = await puppeteer.launch({
headless: false,
args: ['--no-sandbox']
});
const page = await browser.newPage();
await page.goto('https://example.com');
await page.screenshot({
path: 'example.png'
});
await browser.close();
}
// Start the script
main();
and then running npm init, and npm install puppeteer, the following error is returned:
node index.js
/mnt/c/Users/trgre/OneDrive/Desktop/puppeteer
test/node_modules/puppeteer/lib/cjs/puppeteer/node/BrowserRunner.js:214
reject(new Errors_js_1.TimeoutError(`Timed out after ${timeout} ms while trying to
connect to the browser! Only Chrome at revision r${preferredRevision} is guaranteed to
work.`));
^
TimeoutError: Timed out after 30000 ms while trying to connect to the browser! Only Chrome at
revision r901912 is guaranteed to work.
at Timeout.onTimeout (/mnt/c/Users/trgre/OneDrive/Desktop/puppeteer
test/node_modules/puppeteer/lib/cjs/puppeteer/node/BrowserRunner.js:214:20)
at listOnTimeout (node:internal/timers:557:17)
at processTimers (node:internal/timers:500:7)
Node.js v17.1.0
Any ideas on what to do in order to run a puppeteer program, I am on windows using Ubuntu 20?

Puppeteer error while running in ubuntu machine

when I run puppeteer on Ubuntu I get this error:
UnhandledPromiseRejectionWarning: Error: Unable to launch browser, error message: Failed to launch the browser process!
[2098647:2098647:0520/162023.317120:ERROR:vaapi_wrapper.cc(594)] Could not get a valid VA display
[2098647:2098647:0520/162023.317252:ERROR:gpu_init.cc(426)] Passthrough is not supported, GL is egl
TROUBLESHOOTING: https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md
at Cluster.<anonymous> (/root/Desktop/Copart/node_modules/puppeteer-cluster/dist/Cluster.js:119:23)
at Generator.throw (<anonymous>)
at rejected (/root/Desktop/Copart/node_modules/puppeteer-cluster/dist/Cluster.js:6:65)
at process._tickCallback (internal/process/next_tick.js:68:7)
Here are my puppeteer options:
pupOptions: {
headless: false,
args: [
"--incognito",
"--disable-gpu",
"--disable-dev-shm-usage",
"--disable-setuid-sandbox",
"--no-first-run",
"--no-sandbox",
"--no-zygote",
],
defaultViewport: null,
slowMo: 10,
sameDomainDelay: 1000,
retryDelay: 3000,
workerCreationDelay: 3000,
timeout: 30000000,
userDataDir: "/root/.config/google-chrome",
executablePath: "/opt/google/chrome/google-chrome",
}
Also, here is the plugins that I use:
const puppeteer = require("puppeteer-extra");
const RecaptchaPlugin = require("puppeteer-extra-plugin-recaptcha");
I tried killing google instance before running code but still didn't work
Also, I would like to mention that it works when using "puppeteer-cluster"
Anyone have any idea or solution for this? Thanks a lot for the help!
I had to remove "--disable-gpu", from args
If you are running puppeteer on Ubuntu server, you should try turning
headless: false
to
headless: true
if there is no GUI on your system, then it can't show you the browser

chrome-aws-lambda Amazon linux 2 getting Error: socket hang up

Background:
I have this code working in AWS linux AMI with node 8 for lambda. Since Amazon has discontinued node 8 in lambda I have been working on transitioning to node 10 which now uses the Amazon linux 2. Since upgrading I have been unable to get past the error: socket hang up issue.
Version sets
Node v10.18.1
chrome-aws-lambda 2.0.2
puppeteer 2.0.0
Amazon Linux release 2 (Karoo)
Snippet of code:
console.log('start 1')
try {
// create the browser session and page. Then go to url
const browser = await puppeteer.launch({
// devtools: true
args: chrome.args,
defaultViewport: chrome.defaultViewport,
executablePath: await chrome.executablePath,
headless: chrome.headless,
})
console.log('start 2')
const page = await browser.newPage()
console.log('starting browser logic')
// set page timeout out milisecods, currently 2
page.setDefaultTimeout(pageTimeOut)
// goes to webpage waits for network traffic to die off
const [startPage] = await Promise.all([
page.goto(url),
page.waitForNavigation({waitUntil: "networkidle0"})
])
Error:
The Error occurs at await puppeteer.launch
bash-4.2# node run.js
starting check: LoginCheck
start 1
ErrorEvent {
target:
WebSocket {
domain: null,
_events:
[Object: null prototype] { open: [Function], error: [Function] },
_eventsCount: 2,
_maxListeners: undefined,
readyState: 3,
protocol: '',
_binaryType: 'nodebuffer',
_closeFrameReceived: false,
_closeFrameSent: false,
_closeMessage: '',
_closeTimer: null,
_closeCode: 1006,
_extensions: {},
_receiver: null,
_sender: null,
_socket: null,
_isServer: false,
_redirects: 0,
url:
'ws://127.0.0.1:41553/devtools/browser/cd72d3b1-e70e-4a34-aa65-351ef1857587',
_req: null },
type: 'error',
message: 'socket hang up',
error:
{ Error: socket hang up
at createHangUpError (_http_client.js:323:15)
at Socket.socketOnEnd (_http_client.js:426:23)
at Socket.emit (events.js:203:15)
at Socket.EventEmitter.emit (domain.js:448:20)
at endReadableNT (_stream_readable.js:1145:12)
at process._tickCallback (internal/process/next_tick.js:63:19) code: 'ECONNRESET' } }
I was able to resolve this issue by installing the following AWS linux 2 libraries.
pango.x86_64 libXcomposite.x86_64 libXcursor.x86_64 libXdamage.x86_64 libXext.x86_64 libXi.x86_64 libXtst.x86_64 cups-libs.x86_64 libXScrnSaver.x86_64 libXrandr.x86_64 alsa-lib.x86_64 gtk3.x86_64 xorg-x11-fonts-100dpi xorg-x11-utils xorg-x11-fonts-Type1 xorg-x11-fonts-misc xorg-x11-fonts-cyrillic xorg-x11-fonts-75dpi ipa-gothic-fonts atk.x86_64 GConf2.x86_64 avahi.x86_64

python-shell from a node application

Question:
How do I run my python script in my node app?
This works:
From command line, I run this, and it works.
python generatePersonTerraform.py -s http://localhost:8080/api/person/239/exportPersonGeneration
Code that does not work in Node server.js
var PythonShell = require('python-shell');
...
var runPythonRoutine = function (request, response) {
var PythonShell = require('python-shell');
var options = {
mode: 'text',
pythonPath: 'python',
pythonOptions: ['-s'],
scriptPath: '.',
args: ['http://localhost:8080/api/person/135/exportPersonGeneration']
};
PythonShell.run('generatePersonTerraform.py', options, function (err, results) {
console.log(err);
});
}
Error:
at PythonShell.parseError (/root/my-app/node_modules/python-shell/index.js:191:17)
at terminateIfNeeded (/root/my-app/node_modules/python-shell/index.js:98:28)
at ChildProcess.<anonymous> (/root/my-app/node_modules/python-shell/index.js:89:9)
at emitTwo (events.js:106:13)
at ChildProcess.emit (events.js:194:7)
at Process.ChildProcess._handle.onexit (internal/child_process.js:215:12)
executable: 'python',
options: [ '-s' ],
script: 'generatePersonTerraform.py',
args: [ 'http://localhost:8080/api/person/239/exportPersonGeneration' ],
exitCode: 1 }
Note
I have been trying to use https://www.npmjs.com/package/python-shell
EDIT 1
I changed the options to:
var options = {
mode: 'text',
pythonPath: 'python',
pythonOptions: [],
scriptPath: '.',
args: ['-s', 'http://localhost:8080/api/serviceType/135/exportPluginGeneration']
};
and got this error:
at PythonShell.parseError (/root/my-app/node_modules/python-shell/index.js:191:17)
at terminateIfNeeded (/root/my-app/node_modules/python-shell/index.js:98:28)
at ChildProcess.<anonymous> (/root/my-app/node_modules/python-shell/index.js:89:9)
at emitTwo (events.js:106:13)
at ChildProcess.emit (events.js:194:7)
at Process.ChildProcess._handle.onexit (internal/child_process.js:215:12)
executable: 'python',
options: null,
script: 'generatePersonTerraform.py',
args:
[ '-s',
'http://localhost:8080/api/person/135/exportPersonGeneration' ],
exitCode: 0 }
But, it also ran and worked, it just stalls the node app. So, if I figure out how to make it not stall then I am all good. The python script seems to build all the files it is supposed to.
'-s' should be in args, not pythonOptions.

Resources