Run Chai and Puppeteer Test in Google Cloud Functions not Working - node.js

I am running a Script based in Puppeteer, Chai and Mocha that runs locally fine, but not in Google Cloud functions.
I can´t make chai work in Google Cloud Functions.
This is the error I get:
and this is Code I am using in index.js :
/**
* Responds to any HTTP request.
*
* #param {!express:Request} req HTTP request context.
* #param {!express:Response} res HTTP response context.
*/
const puppeteer = require('puppeteer');
//const expect = require('chai')
describe('My First Puppeteer Test', () => {
let browser
let page
before(async function () {
browser = await puppeteer.launch({
headless: true,
devtools: false,
})
page = await browser.newPage()
await page.setDefaultTimeout(100000)
await page.setDefaultNavigationTimeout(20000)
})
after(async function () {
await browser.close()
})
beforeEach(async function () {
//Runs before each Test step
})
afterEach(async function () {
//Runs after each Test step
})
it('login', async function () {
await page.goto('https://my.website.com/user/')
await page.goto('https://my.website.com/user/')
await page.waitForSelector('#mat-input-0')
await page.type('#mat-input-0', 'MyUsername')
await page.waitForTimeout(1000)
await page.waitForSelector('#mat-input-1')
await page.type('#mat-input-1', 'MyPassword')
await page.waitForTimeout(1000)
await page.click(
'body > sbnb-root > sbnb-login > section > article > sbnb-login-form > form > button'
)
await page.waitForTimeout(10000)
})
it('Go To metrics', async function () {
await page.goto('https://my.website.com/stadistic')
//Click Reviews
await page.waitForSelector(
'body > sbnb-root > sbnb-exports > div > main > section.create-export-container > div:nth-child(2) > section:nth-child(2) > div > div:nth-child(1) > div > a'
)
await page.click(
'body > sbnb-root > sbnb-exports > div > main > section.create-export-container > div:nth-child(2) > section:nth-child(2) > div > div.text__small'
)
await page.waitForTimeout(10000)
// Click Single File
await page.waitForSelector(
'body > sbnb-root > sbnb-exports > div > main > section.create-export-container > div:nth-child(8) > div:nth-child(1)'
)
await page.click(
'body > sbnb-root > sbnb-exports > div > main > section.create-export-container > div:nth-child(8) > div:nth-child(1)'
)
await page.waitForTimeout(10000)
})
it('Click send and close', async function () {
// Click Send File
await page.waitForSelector(
'body > sbnb-root > sbnb-exports > div > main > section.create-export-container > div.export-final > button'
)
await page.click(
'body > sbnb-root > sbnb-exports > div > main > section.create-export-container > div.export-final > button'
)
await page.waitForTimeout(80000)
})
})
and this is how my package.json looks like:
{
"name": "sample-http",
"version": "0.0.1",
"scripts": {
"test": "mocha --reporter spec"
},
"dependencies": {
"chai": "^4.3.4",
"mocha": "^9.1.3",
"puppeteer": "^1.20.0"
}
}
How I can I make it work in Google Cloud Functions?
Some ideas about how to rewrite the code just using puppeteer in a way that it works?

Based on the GCP documentation and a related question, it looks like the proper way to test a Cloud Function is to separate the testing and the actual code to be tested. My suggestion is to build your Puppeteer function and deploy it (here is a guide for using Puppeteer with Cloud functions), and then perform tests against it, as shown in the documentation.
As for the specific error you are seeing, it appears that it’s caused by not running the tests through the correct mocha commands. You will be able to run these commands by refactoring your code according to the GCP documentation.

Related

Crontab not running SOME node js scripts

I have a VPS running amazon linux with Node JS. I'm trying to run a node js script that sends a GET request to an API to get data, then sends a POST request to another API with said data. The script works when running using node in the terminal, but not after adding it to sudo crontab -e.
Now I know about the complications with absolute paths when using crontab, I've read many of the stackoverflow questions and I'm fairly certain pathing isn't the issue. To test this, I created a test script that creates a file in the root directory, and it works in crontab. This test script is in the SAME directory as the main script, which has me very confused.
The script uses two packages, Axios and Puppeteer. Here is the script with some information replaced with placeholders:
import axios from "axios";
import puppeteer from "puppeteer";
const apiKey = process.env.API_KEY
const USER_ID = process.env.USER_ID
const sitekey = process.env.SITE_KEY
const surl= 'url'
const url= 'url'
async function webSubmit (data) {
const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox'] });
const page = await browser.newPage();
await page.goto(url);
await page.$eval("input[name='data']", (el, data) => el.value = data, data)
await page.click('#submitButton')
setTimeout(() => {
browser.close()
}, 5000)
}
function checkStatus (id) {
axios.get(url)
.then(response => {
if (response.data === 'NOT_READY') {
setTimeout(() => {
checkStatus(id)
}, 10000)
} else {
const data = response.data.slice(3)
webSubmit(data)
}
})
.catch(e => console.log(e))
}
function post() {
const postUrl = url
axios.post(postUrl)
.then(response => {
const id = response.data.slice(3)
setTimeout(() => {
checkStatus(id)
}, 30000)
})
.catch(error => console.log(error))
}
post()
Test Script:
import * as fs from 'fs'
fs.writeFile("/vote" + Math.random(), "Hi", function(err) {
if(err){
return console.log(err)
}
console.log("File saved")
})
Crontab Jobs:
* * * * * /opt/bitnami/node/bin/node /home/bitnami/app/index.js
* * * * * cd /home/bitnami/app && /opt/bitnami/node/bin/node index.js
Both these jobs work if I change the index.js script to test.js, but not as index.js.
Any help would be appreciated, thank you!

Puppeteer in headless false mode on ubuntu error

I have this web-scraping function
async () => {
try {
const browser = await puppeteer.launch({
headless: false,
ignoreHTTPSErrors: true,
args: ['--no-sandbox', "--disabled-setupid-sandbox"]
})
const page = await browser.newPage();
await page.goto('https://finviz.com/map.ashx');
await page.waitForTimeout(3000);
await page.click('.content #root > div > div:nth-child(3) > button:nth-child(1)'); // Fullscreen
await page.click('.content #root > div > div:nth-child(3) > button:nth-child(2)'); // Share map
await page.waitForTimeout(3000);
const imageUrl = await page.$eval('img.w-full', el => el.src);
console.log(imageUrl);
await browser.close();
} catch (err) {
console.log(err);
}
};
When I try to run it on ubuntu I get an error
Missing X server or $DISPLAY The platform failed to initialize.
Exiting. NaCl helper process running without a sandbox!
If I try to run it in headless mode, I get an error
Error: No node found for selector: .content #root > div > div:nth-child(3) > button:nth-child(1)
On my local machine the script runs fine in mode headless : true
How can you get out of this situation?

Firebase function will not deploy when requiring outside packages

I am having trouble deploying my web scraping function and do not know how to fix the issue.
Index.js
const functions = require("firebase-functions");
const pup = require("puppeteer");
const WebsiteData = require('./schema');
exports.scrape = functions
.runWith({ memory: '1GB' })
.pubsub.schedule('0 0 * * *')
.onRun(async (context) => {
const browser = await pup.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
await page.setUserAgent(
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36'
)
var pages = [
{
name: 'aave-protocol',
fee:'0.09',
url: 'https://aave.com',
},
{
name: 'uniswap-v2',
fee:'0.30',
url: 'https://uniswap.org',
},
{
name: 'dydx',
fee:'0.00',
url: 'https://dydx.exchange',
},
{
name: 'dodo-bsc',
fee:'0.00',
url: 'https://dodoex.io',
},
{
name: 'cream-finance',
fee:'0.03',
url: 'https://cream.finance',
},
{
name: 'multiplier-finance',
fee:'0.06',
url: 'https://multiplier.finance',
}
]
var result = [];
for (var each in pages) {
await page.goto(`https://www.dapp.com/app/${pages[each].name}`, { waitUntil: 'networkidle0' })
var users = await page.evaluate(() => {
return document.querySelector('#app > div.app-container > div.root.dapp-detail > section.detail-page-outer > div > div.left-sec > div.stats-card > div.chart-section > div:nth-child(2) > div:nth-child(4) > span.value > span').innerText
})
var volume = await page.evaluate(() => {
return document.querySelector('#app > div.app-container > div.root.dapp-detail > section.detail-page-outer > div > div.left-sec > div.stats-card > div.chart-section > div:nth-child(3) > div:nth-child(4) > span.value > span:nth-child(2)').innerText
})
var obj = { "name": `${pages[each].name}`, "nDUniqueUsers": `${users}`, "nDTransactions": `${volume}`, "fee": `${pages[each].fee}`, "url": `${pages[each].url}`};
result.push(obj);
}
await browser.close();
const websiteMongo = new WebsiteData({ sites: result });
await websiteMongo.save(function(err,data){
if (err) {
console.log(err);
return null;
}
});
console.log("Done.")
return null;
});
The function is meant to use puppeteer to open up around 5 pages collect the info and upload the data to a MongoDB database. The code works perfectly in local host, but when I run firebase deploy, I get an error. Here is the error message: "Function failed on loading user code. This is likely due to a bug in the user code. Error message: Error: please examine your function logs to see the error cause: https://cloud.google.com/functions/docs/monitoring/logging#viewing_logs. Additional troubleshooting documentation can be found at https://cloud.google.com/functions/docs/troubleshooting#logging. Please visit https://cloud.google.com/functions/docs/troubleshooting for in-depth troubleshooting documentation."}"
I know that the problem consists of these two lines:
const pup = require("puppeteer");
const WebsiteData = require('./schema');
When I comment out those two lines I can deploy the function. Here is the code for schema.js:
var mongoose = require('mongoose')
mongoose.connect("URI");
var Schema = mongoose.Schema
var websiteData = new Schema({
sites: [{
name: {
type: String,
required : true,
},
nDUniqueUsers: {
type: String,
required : true,
},
nDTransactions: {
type: String,
required : true,
}, fee: {
type: String,
required : true,
}, url: {
type: String,
required : true,
}
}],
})
var WebsiteData = mongoose.model("SiteData", websiteData);
module.exports = WebsiteData
I do not know how to fix this. Any help would be greatly appreciated.
I suspect you haven't included "puppeteer" in the dependencies section of the package.json deployed alongside your function. Or possibly you've inadvertently included it in the devDependencies section instead of dependencies.
// package.json
{
// ...
"dependencies": {
"puppeteer": "^13.5.1" // be sure you have this
}
}

Node puppeteer scraping YouTube and encountering redirected you too many times

I'm trying to scrape a YouTube playlists URL using Node / puppeteer. It was working, but now I'm getting ERR_TOO_MANY_REDIRECTS error. I can still access the page using chrome from my desktop.
I've tried using the chromium browser and chrome browsers. I've also tried using the puppeteer-extra stealth plugin and the random-useragent.
This is how my code stand at the moment:
const browser = await puppeteer.launch({
stealth: true,
headless: false // true,
executablePath: "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
args: [
'--disable-notifications', '--disable-features=site-per-process'
],
defaultViewport: null
});
const page = await browser.newPage()
await page.setUserAgent(random_useragent.getRandom());
await page.goto(<playlist-url, {
waitUntil: 'networkidle2',
timeout: 0
})
await page.waitForSelector('button[aria-label="Agree to the use of cookies and other data for the purposes described"')
It at the page.goto it bombs. And it happens even if I try going to https://www.youtube.com.
Any suggestions what I should try next. I tried a proxy server but couldn't get it to work. I suspect I need a proxy to actually route through.
If all you need is playlist IDs for a given channel, it's possible to query a feed at:
https://youtube.com/feeds/videos.xml?channel_id=<Channel ID>
To get IDs of videos you can query a feed at:
https://youtube.com/feeds/videos.xml?playlist_id=PLAYLIST_ID
You can get playlists (and Mixes) links from YouTube like in the code example below (also check full code the online IDE):
const puppeteer = require("puppeteer-extra");
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
puppeteer.use(StealthPlugin());
const searchString = "java course";
const requestParams = {
baseURL: `https://www.youtube.com`,
encodedQuery: encodeURI(searchString), // what we want to search for in URI encoding
};
async function fillPlaylistsDataFromPage(page) {
const dataFromPage = await page.evaluate((requestParams) => {
const mixes = Array.from(document.querySelectorAll("#contents > ytd-radio-renderer")).map((el) => ({
title: el.querySelector("a > h3 > #video-title")?.textContent.trim(),
link: `${requestParams.baseURL}${el.querySelector("a#thumbnail")?.getAttribute("href")}`,
videos: Array.from(el.querySelectorAll("ytd-child-video-renderer a")).map((el) => ({
title: el.querySelector("#video-title")?.textContent.trim(),
link: `${requestParams.baseURL}${el.getAttribute("href")}`,
length: el.querySelector("#length")?.textContent.trim(),
})),
thumbnail: el.querySelector("a#thumbnail #img")?.getAttribute("src"),
}));
const playlists = Array.from(document.querySelectorAll("#contents > ytd-playlist-renderer")).map((el) => ({
title: el.querySelector("a > h3 > #video-title")?.textContent.trim(),
link: `${requestParams.baseURL}${el.querySelector("a#thumbnail")?.getAttribute("href")}`,
channel: {
name: el.querySelector("#channel-name a")?.textContent.trim(),
link: `${requestParams.baseURL}${el.querySelector("#channel-name a")?.getAttribute("href")}`,
},
videoCount: el.querySelector("yt-formatted-string.ytd-thumbnail-overlay-side-panel-renderer")?.textContent.trim(),
videos: Array.from(el.querySelectorAll("ytd-child-video-renderer a")).map((el) => ({
title: el.querySelector("#video-title")?.textContent.trim(),
link: `${requestParams.baseURL}${el.getAttribute("href")}`,
length: el.querySelector("#length")?.textContent.trim(),
})),
thumbnail: el.querySelector("a#thumbnail #img")?.getAttribute("src"),
}));
return [...mixes, ...playlists];
}, requestParams);
return dataFromPage;
}
async function getYoutubeSearchResults() {
const browser = await puppeteer.launch({
headless: false,
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
const page = await browser.newPage();
const URL = `${requestParams.baseURL}/results?search_query=${requestParams.encodedQuery}`;
await page.setDefaultNavigationTimeout(60000);
await page.goto(URL);
await page.waitForSelector("#contents > ytd-video-renderer");
const playlists = await fillPlaylistsDataFromPage(page);
await browser.close();
return playlists;
}
getYoutubeSearchResults().then(console.log);
📌Note: to get thumbnail you need to scroll playlist into view (using .scrollIntoView() method).
Output:
[
{
"title":"Java Complete Course | Placement Series",
"link":"https://www.youtube.com/watch?v=yRpLlJmRo2w&list=PLfqMhTWNBTe3LtFWcvwpqTkUSlB32kJop",
"channel":{
"name":"Apna College",
"link":"https://www.youtube.com/c/ApnaCollegeOfficial"
},
"videoCount":"35",
"videos":[
{
"title":"Introduction to Java Language | Lecture 1 | Complete Placement Course",
"link":"https://www.youtube.com/watch?v=yRpLlJmRo2w&list=PLfqMhTWNBTe3LtFWcvwpqTkUSlB32kJop",
"length":"18:46"
},
{
"title":"Variables in Java | Input Output | Complete Placement Course | Lecture 2",
"link":"https://www.youtube.com/watch?v=LusTv0RlnSU&list=PLfqMhTWNBTe3LtFWcvwpqTkUSlB32kJop",
"length":"42:36"
}
],
"thumbnail":null
},
{
"title":"Java Tutorials For Beginners In Hindi",
"link":"https://www.youtube.com/watch?v=ntLJmHOJ0ME&list=PLu0W_9lII9agS67Uits0UnJyrYiXhDS6q",
"channel":{
"name":"CodeWithHarry",
"link":"https://www.youtube.com/c/CodeWithHarry"
},
"videoCount":"113",
"videos":[
{
"title":"Introduction to Java + Installing Java JDK and IntelliJ IDEA for Java",
"link":"https://www.youtube.com/watch?v=ntLJmHOJ0ME&list=PLu0W_9lII9agS67Uits0UnJyrYiXhDS6q",
"length":"19:00"
},
{
"title":"Basic Structure of a Java Program: Understanding our First Java Hello World Program",
"link":"https://www.youtube.com/watch?v=zIdg7hkqNE0&list=PLu0W_9lII9agS67Uits0UnJyrYiXhDS6q",
"length":"14:09"
}
],
"thumbnail":null
}
]
You can read more about scraping YouTube playlists from blog post Web scraping YouTube secondary search results with Nodejs.

Can you run multiple tests in one browser context Playwright Javascript?

Is it possible to run multiple tests in one browser window for playwright/test?
currently it will hit browser.close(); after every test even though they are testing on the same page which puts a lot of extra time on the tests.
test.beforeAll(async ({ browser }) => {
const context = await browser.newContext();
const page = await context.newPage();
await page.goto('https://example.com');
});
test('nav test', async ({ page }) => {
const name = await page.innerText('.navbar__title');
expect(name).toBe('Playwright');
});
test('header test', async ({ page }) => {
const name = await page.innerText('.navbar__header');
expect(name).toBe('Playwright');
});
When you create a tests like this test('header test', async ({page}) => { you're specifying page and telling it to create a new page context.
Remove the page from the test - and share the one you create from your .beforeAll
Try this:
test.describe('1 page multiple tests', () => {
let page;
test.beforeAll(async ({ browser }) => {
const context = await browser.newContext();
page = await context.newPage();
await page.goto('https://example.com');
});
test.afterAll(async ({ browser }) => {
browser.close;
});
test('nav test', async () => {
const name = await page.innerText('h1');
expect(name).toContain('Example');
});
test('header test', async () => {
const name = await page.innerText('h1');
expect(name).toContain('Domain');
});
});
Run it like this :
npx playwright test .\StackTests_SinglePage.spec.ts --headed
(you can see the name of my file in there)
You might need to toggle it down to 1 worker if it tries to parallel run your test.
For me, that code opens 1 borwser, 1 page, passes both tests the closes out.
Can you try wrapping the tests in a describe block? So they are treated as a group and not as an individual tests.
test.describe('two tests for same page', () => {
test('nav test', async ({ page }) => {
const name = await page.innerText('.navbar__title');
expect(name).toBe('Playwright');
});
test('header test', async ({ page }) => {
const name = await page.innerText('.navbar__header');
expect(name).toBe('Playwright');
});
});

Resources