Record google meet audio using puppeteerr - node.js

I am trying to record a google meet using puppeteer, so far I am able to join the meet as a bot and record the video. But I am not able to record the audio, I tried few packages like puppeteer-screen-recorder, but it is creating an empty file for audio.
Here is my script code
Pls change the meeting URL to replicate and test
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import { executablePath } from 'puppeteer';
import fs from 'fs';
import AudioRecorder from 'node-audiorecorder';
import * as PuppeteerScreenRecorder from 'puppeteer-screen-recorder'
import RecordRTC from 'recordrtc';
import { getStream } from "puppeteer-stream";
const file = fs.createWriteStream("./test.webm");
puppeteer.use(StealthPlugin());
(async () => {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
devtools: false,
args: [
"--window-size=1920,1080",
"--window-position=1921,0",
"--autoplay-policy=no-user-gesture-required",
],
ignoreDefaultArgs: ["--mute-audio"],
executablePath: executablePath(),
});
const page = await browser.newPage();
const navigationPromise = page.waitForNavigation();
const context = browser.defaultBrowserContext();
await context.overridePermissions(
"https://meet.google.com/", ["microphone", "camera", "notifications"]
);
// going to Meet after signing in
await page.waitForTimeout(2500);
await page.goto('https://meet.google.com/cmp-zzwo-adb' + '?hl=en', {
waitUntil: 'networkidle0',
timeout: 10000,
});
await navigationPromise;
await page.waitForSelector('input[aria-label="Your name"]', {
visible: true,
timeout: 50000,
hidden: false,
});
// turn off cam using Ctrl+E
await page.waitForTimeout(1000);
await page.keyboard.down('ControlLeft');
await page.keyboard.press('KeyE');
await page.keyboard.up('ControlLeft');
await page.waitForTimeout(1000);
//turn off mic using Ctrl+D
await page.waitForTimeout(1000);
await page.keyboard.down('ControlLeft');
await page.keyboard.press('KeyD');
await page.keyboard.up('ControlLeft');
await page.waitForTimeout(1000);
//click on input field to enter name
await page.click(`input[aria-label="Your name"]`);
//enter name
await page.type(`input[aria-label="Your name"]`, 'Bot');
//click on ask to join button
await page.click(
`button[class="VfPpkd-LgbsSe VfPpkd-LgbsSe-OWXEXe-k8QpJ VfPpkd-LgbsSe-OWXEXe-dgl2Hf nCP5yc AjY5Oe DuMIQc LQeN7 jEvJdc QJgqC"]`
);
const stream = await getStream(page, { audio: true, mimeType: "audio/mp3" });
console.log("recording");
stream.pipe(file);
// setTimeout(async () => {
// await stream.destroy();
// file.close();
// console.log("finished");
// }, 1000 * 30);
const recorder = new PuppeteerScreenRecorder.PuppeteerScreenRecorder(page);
await recorder.start('./report/video/simple.webm'); // supports extension - mp4, avi, webm and mov
// const devices = await page.evaluate(() =>
// navigator.mediaDevices.getUserMedia(
// { audio: true }
// )
// )
// let x = await navigator.mediaDevices.getUserMedia({audio: true});
// console.log(x, "Available devices");
// navigator.mediaDevices.getUserMedia({
// video: false,
// audio: true
// }).then(async function (stream) {
// let recorder = RecordRTC(stream, {
// type: 'audio'
// });
// recorder.startRecording();
// const sleep = m => new Promise(r => setTimeout(r, m));
// await sleep(3000);
// recorder.stopRecording(function () {
// let blob = recorder.getBlob();
// invokeSaveAsDialog(blob);
// });
// });
setTimeout(async () => {
await recorder.stop();
await stream.destroy();
file.close();
console.log("finished");
await browser.close();
}, 15000)
})();
Here is my package.json file
{
"name": "own",
"version": "1.0.0",
"description": "",
"main": "index.js",
"type": "module",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"node-audiorecorder": "^3.0.0",
"puppeteer": "^19.6.3",
"puppeteer-extra": "^3.3.4",
"puppeteer-extra-plugin-stealth": "^2.11.1",
"puppeteer-screen-recorder": "^2.1.2",
"puppeteer-stream": "^2.1.4",
"recordrtc": "^5.6.2",
"screencap": "^1.0.0",
"ws": "^8.12.1"
}
}
I tried using different available packages of puppeteer but it's creating an empty file only for audio. I was able to record the video with different packages but audio is not capturing. I want to record the audio of meeting attendees.

Related

how to trigger chrome extension function from puppeteer Cluster

I need to trigger a function in background.js in extension from puppeteer Cluster
here is my code :
const wait = (ms) => new Promise(resolve => setTimeout(resolve, ms));
(async () => {
const puppeteer = addExtra(puppeteerStream);
const pathToExtension = "C:/Users/Proj/test-extension";
const extensionId = 'lpnlgnlkloegindjk443sfdbjipo';
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_PAGE,
maxConcurrency: 50,
timeout: 86400000,
retryLimit: 10,
retryDelay: 1000,
puppeteer: puppeteer,
executablePath: executablePath(),
puppeteerOptions: {
executablePath: "C:/Program Files/Google/Chrome/Application/chrome.exe",
timeout: 120000,
headless: false,
pipe: true,
defaultViewport:null,
ignoreDefaultArgs: [
],
args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-notifications", "--allow-http-screen-capture", "--autoplay-policy=no-user-gesture-required",
'--disable-gpu',
'--disable-dev-shm-usage',
'--no-first-run',
'--enable-usermedia-screen-capturing',
'--auto-select-desktop-capture-source=pickme',
'--no-zygote',
`--whitelisted-extension-id=${extensionId}`,
'--enable-remote-extensions',
'--start-maximized',
`--disable-extensions-except=${pathToExtension}`,
`--load-extension=${pathToExtension}`,
]
}
});
// setup the function to be executed for each request
cluster.task(async ({ page, data: url }) => {
return start(page, url);
});
app.get('/', async function (req, res) {
res.send("index page");
});
app.get('/start', async function (req, res) { // expects URL to be given by ?url=...
try {
// run the task function for the URL
const resp = cluster.queue(req.query.url);
res.send(resp);
} catch (err) {
res.end('Error: ' + err.message);
}
});
async function start(page, Url) {
const context = page.browser().defaultBrowserContext();
await page.goto(Url);
}
})();
now I can run mt puppeteer and click a button in my extension manually but what I need is call the function (same as button click in extension) from puppeteer cluster automatically after await page.goto(Url);
I can do that when I'm using puppeteer without cluster like :
const browser = await puppeteer.launch({
headless: false,
args: [
`--whitelisted-extension-id=${extensionId}`,
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
`--disable-extensions-except=${pathToExtension}`,
`--load-extension=${pathToExtension}`,
],
});
const page = await browser.newPage();
await page.goto('https://www.youtube.com/', { waitUntil: 'networkidle2' });
const targets = await browser.targets();
const backgroundPageTarget = targets.find(target => target.type() === 'background_page' && target.url().startsWith(`chrome-extension://${extensionId}/`));
const backgroundPage = await backgroundPageTarget.page();
backgroundPage.on('console', msg => {
for (let i = 0; i < msg.args().length; i++) {
console.log(`${i}: ${msg.args()[i]}`);
}
});
await backgroundPage.evaluate(() => {
startFunction();
return Promise.resolve(42);
});
But I cannot get the targets in cluster any suggestion

node puppeteer working ok on local ,but getting timeout on heroku/render code=H12

when trying to run the below code the /test and /testp are working fine but the / is getting time out on both Heroku and render services
index.js
const express = require("express");
const app = express();
const port = process.env.PORT || 5000;
const scrapper = require("./scraper");
const scrapper2 = require("./scrapper2");
app.get("/", async (req, res) => {
const link =
"http://yit.maya-tour.co.il/Drop_Report.aspx?client_code=2660&coordinator_code=2669";
console.log(link);
let searchQery = req.query.search;
const scrapp = await scrapper(link);
if (searchQery) {
let station = scrapp.filter((station) =>
station?.location?.includes(searchQery)
);
return res.json(station);
}
return res.json(scrapp);
});
app.get("/test", async (req, res) => {
return res.json("helo");
});
app.get("/testp", async (req, res) => {
const scrapp = await scrapper2();
return res.json(scrapp);
});
app.listen(port, () => {
console.log(`scrapper on${port}`);
});
scrapper.js
const puppeteer = require("puppeteer");
module.exports = async (link) => {
try {
const browserFetcher = puppeteer.createBrowserFetcher();
const revisionInfo = await browserFetcher.download("938248");
//essential boilerpalte
const browser = await puppeteer.launch({
executablePath: revisionInfo.executablePath,
headless: true,
args: ["--no-sandbox"],
});
const page = await browser.newPage();
await page.goto(link);
await page.waitForSelector("#PassListView1 > tbody", { timeout: 3000 });
// getting all the buttons
let buttons = await page.$$eval("a", (links) =>
links.map((a) => ({ name: a.innerText, link: a.href, button: a }))
);
//filtering the non number ones like sorting by name
buttons = buttons.filter((obj) => Number(obj.name));
let array = [];
//getting the first page
const options = await page.$$eval("#PassListView1 > tbody>tr", (options) =>
options.map((option) => ({
driverName: option.innerText.split("\t")[0],
vehicle: option.innerText.split("\t")[1],
station: option.innerText.split("\t")[2],
location: option.innerText.split("\t")[3],
hour: option.innerText.split("\t")[4],
all: option.innerText.split("\t"),
}))
);
array = [...array, ...options];
// iterating on all the buttons -click-> await for page to load -> getting the content of the page
for await (const [i, { name, link, button }] of buttons.entries()) {
await Promise.all([
//waiting page to load
page.waitForNavigation(),
//clicking on button
page.click(
`#PassListView1 > tbody > tr:nth-child(14) > td > table > tbody > tr > td:nth-child(${
i + 2
}) > a `
),
//waiting for a main selector to make sure page loaded
page.waitForSelector("#PassListView1 > tbody", { timeout: 1000 }),
]);
//scraping through data
const options = await page.$$eval(
"#PassListView1 > tbody>tr",
(options) =>
options.map((option) => ({
driverName: option.innerText.split("\t")[0],
vehicle: option.innerText.split("\t")[1],
station: option.innerText.split("\t")[2],
location: option.innerText.split("\t")[3],
hour: option.innerText.split("\t")[4],
all: option.innerText.split("\t"),
}))
);
array = [...array, ...options];
}
await browser.close();
return array;
} catch (error) {
console.log(error);
}
};
scrapper2.js --just to see something is working
this one actually does work
const puppeteer = require("puppeteer");
module.exports = async () => {
try {
const browserFetcher = puppeteer.createBrowserFetcher();
const revisionInfo = await browserFetcher.download("938248");
//essential boilerpalte
const browser = await puppeteer.launch({
executablePath: revisionInfo.executablePath,
headless: true,
args: ["--no-sandbox"],
});
const page = await browser.newPage();
await page.goto("https://www.youtube.com/", {
waitUntil: "load",
// Remove the timeout
timeout: 0,
});
const tilte = await page.title();
await browser.close();
return tilte;
} catch (error) {
console.log(error);
}
};
package.json
{
"name": "webscrapping",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"start": "nodemon index.js"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"express": "^4.17.2",
"nodemon": "^2.0.15",
"puppeteer": "^13.0.1",
"puppeteer-core": "^1.9.0"
}
}
Heroku log
2021-12-28T19:27:18.246133+00:00 app[web.1]: http://yit.maya-tour.co.il/Drop_Report.aspx?client_code=2660&coordinator_code=2669
2021-12-28T19:27:48.246622+00:00 heroku[router]: at=error code=H12 desc="Request timeout" method=GET path="/" host=my-station-server.herokuapp.com request_id=2f68f71e-e593-4124-936d-41d63e68ff17 fwd="77.139.50.119" dyno=web.1 connect=0ms service=30000ms status=503 bytes=0 protocol=http
2021-12-28T19:29:26.522035+00:00 app[web.1]: Error: net::ERR_CONNECTION_TIMED_OUT at http://yit.maya-tour.co.il/Drop_Report.aspx?client_code=2660&coordinator_code=2669
2021-12-28T19:29:26.522043+00:00 app[web.1]: at navigate (/app/node_modules/puppeteer/lib/cjs/puppeteer/common/FrameManager.js:155:23)
2021-12-28T19:29:26.522044+00:00 app[web.1]: at processTicksAndRejections (internal/process/task_queues.js:95:5)
2021-12-28T19:29:26.522044+00:00 app[web.1]: at async FrameManager.navigateFrame (/app/node_modules/puppeteer/lib/cjs/puppeteer/common/FrameManager.js:130:21)
2021-12-28T19:29:26.522045+00:00 app[web.1]: at async Frame.goto (/app/node_modules/puppeteer/lib/cjs/puppeteer/common/FrameManager.js:500:16)
2021-12-28T19:29:26.522045+00:00 app[web.1]: at async Page.goto (/app/node_modules/puppeteer/lib/cjs/puppeteer/common/Page.js:1167:16)
2021-12-28T19:29:26.522046+00:00 app[web.1]: at async module.exports (/app/scraper.js:16:5)
2021-12-28T19:29:26.522047+00:00 app[web.1]: at async /app/index.js:13:18
also, see Heroku build packs installed
https://github.com/CoffeeAndCode/puppeteer-heroku-buildpack.git
https://github.com/jontewks/puppeteer-heroku-buildpack
https://buildpack-registry.s3.amazonaws.com/buildpacks/jontewks/puppeteer.tgz
and heroku/nodejs
as it turns out the website will only accept Israeli IP address -so needs a proxy for that, still looking for one.

How to send data from node/express to preview pdf in front without saving file?

My aim is to generate pdf contact with puppeteer with an html page that I built.
I succesfully generate this pdf in my back. But I have a problem to send data to my front. I tried many things but... One I got an arrayBuffer, once a blob, now a readableStream and I can read with my front none of theses...
Is there a way to easily send pdf and preview it in browser (in modal) ?
Here is my back :
const date = Date.now();
const pathPDF = `contract-${date}.pdf`;
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(
`http://localhost:3000/admin/reservation/${req.params.reservation}/contract`,
{
waitUntil: 'networkidle2',
}
);
const pdf = await page.pdf({
path: pathPDF,
format: 'A4',
printBackground: true,
});
await browser.close();
// res.json(pdf) <-- I tried this first, don't work
// res.contentType('application/pdf');
// res.sendFile(pathPDF); <-- Then this, not working...
// const data = fs.readFileSync(`./${pathPDF}`);
// res.contentType('application/pdf');
// res.send(data); <-- I tryed this too, same...
Here action :
export const createContract = (reservation) => {
return fetch(`${API}/reservation/contract/${reservation}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
},
})
.then((response) => {
// return response.blob(); <-- Tried this
return response.json();
})
.catch((err) => console.log(err));
};
Here my call in page :
const generateContract = () => {
setLoading(true);
createContract(reservation._id).then((result) => {
if (result.error) {
setLoading(false);
snackbarShowMessage(`${result.error}`);
} else {
setPdf(URL.createObjectURL(result)); <-- Tried this
setPdf(result) <-- This too
setLoading(false);
snackbarShowMessage(`${result.message}`, 'success');
setOpen(true);
}
});
};
Do you have any idea where I doing wrong..?

Cannot upload file to Azure blob storage using Azure functions Node

NOTE: There is no issue here, The issue was with my functions.json script file location. It was pointing to an old script file. The minute I pointed to a new one, it started working.
I am not sure why this is happening, I have a try-catch block and the function never hits the catch block but the image I am trying upload never shows up in the container.
I am new to NODEJS. Since I cant achieve the same thing using C# functions, I decided to write it in the node.
Problem: Azure function Service bus topic trigger, take the message payload and grabs a screenshot of the page using puppeteer. The output from the buffer is in form of the buffer, I am trying to upload this to Azure storage blob.
import { AzureFunction, Context } from "#azure/functions";
import { ServiceBusMessage } from "#azure/service-bus";
import * as puppeteer from 'puppeteer';
import * as BlobServiceClient from "azure-storage";
import { Readable } from 'stream';
const serviceBusTopicTrigger: AzureFunction = async function (context: Context, mySbMsg: ServiceBusMessage): Promise<void> {
try {
const promotionId = context.bindingData.userProperties.promotionId;
context.log('Player Screen Grabber ServiceBus topic trigger function processing message started', promotionId);
const playerURL = process.env['playerURL'] + promotionId + '/';
let browser = await puppeteer.launch({ headless: true });
let page = await browser.newPage();
await page.goto(playerURL, { waitUntil: 'networkidle2' });
await page.setViewport({ width: 1920, height: 1080 });
const screenshotBuffer = await page.screenshot({
encoding: 'binary'
});
await page.close();
await browser.close();
const newPlayerScreenShotStream = new Readable({
read() {
this.push(screenshotBuffer);
},
});
var fileName = promotionId + ".png";
context.bindings.fileName = fileName;
context.bindings.storage = screenshotBuffer;
context.done();
context.log('Player Screen Grabber ServiceBus topic trigger function processing message ended', promotionId);
}
catch (error) {
throw error;
}
};
According to your infromation you provide, you want to use dymaic name in Azure function blob storage output bining. If so, we cannot use context.bindings.<> to implement it. For more details, please refer to here and here
If you want to implement it, you have the following two choices.
Using Azure Functions binding expression patterns
if you define the message's body as json, we can directly read the value with binding expression in function
For example
My message
function.json
{
"bindings": [
{
"name": "mySbMsg",
"type": "serviceBusTrigger",
"direction": "in",
"topicName": "",
"subscriptionName": "",
"connection": "MYSERVICEBUS"
},
{
"type": "blob",
"direction": "out",
"name": "outputBlob",
"path": "outcontainer/{fileName}.png",
"connection": "AzureWebJobsStorage"
}
],
"scriptFile": "../dist/ServiceBusTopicTrigger1/index.js"
}
Function code
import { AzureFunction, Context } from "#azure/functions";
import * as puppeteer from "puppeteer";
const serviceBusTopicTrigger: AzureFunction = async function (
context: Context,
mySbMsg: any
): Promise<void> {
try {
context.log("ServiceBus topic trigger function processed message", mySbMsg);
const promotionId = context.bindingData.userProperties.promotionId;
const playerURL =
"https://learn.microsoft.com/en-us/azure/azure-functions/functions-reference-node?tabs=v2";
let browser = await puppeteer.launch({ headless: true });
let page = await browser.newPage();
await page.goto(playerURL, { waitUntil: "networkidle2" });
await page.setViewport({ width: 1920, height: 1080 });
const screenshotBuffer = await page.screenshot({
encoding: "binary",
});
await page.close();
await browser.close();
context.bindings.outputBlob = screenshotBuffer;
} catch (error) {
throw error;
}
};
export default serviceBusTopicTrigger;
Using Azure Blob storage sdk
Function code
import { AzureFunction, Context } from "#azure/functions";
import * as puppeteer from "puppeteer";
import { BlobServiceClient } from "#azure/storage-blob";
const serviceBusTopicTrigger: AzureFunction = async function (
context: Context,
mySbMsg: any
): Promise<void> {
try {
context.log("ServiceBus topic trigger function processed message", mySbMsg);
const promotionId = context.bindingData.userProperties.promotionId;
const playerURL =
"https://learn.microsoft.com/en-us/azure/azure-functions/functions-reference-node?tabs=v2";
let browser = await puppeteer.launch({ headless: true });
let page = await browser.newPage();
await page.goto(playerURL, { waitUntil: "networkidle2" });
await page.setViewport({ width: 1920, height: 1080 });
const screenshotBuffer = await page.screenshot({
encoding: "binary",
});
await page.close();
await browser.close();
// the storage account connection string
const constr = process.env["AzureWebJobsStorage"];
const blobserviceClient = BlobServiceClient.fromConnectionString(constr);
const containerClient = blobserviceClient.getContainerClient("output");
const blob = containerClient.getBlockBlobClient(`${promotionId}.png`);
await blob.uploadData(screenshotBuffer);
} catch (error) {
throw error;
}
};
export default serviceBusTopicTrigger;
My message
Result

How to download PDF blob using puppeteer?

When the download button is clicked, a new tab is opened where the user can view a PDF statement.
This new tab has a URL starting with blob:, e.g.: blob:https://some-domain.com/statement-id.
How could I download this PDF statement to the file system?
Note: I'm using { headless: false } mode.
Trying to simulate the case:
import puppeteer from 'puppeteer';
import { writeFileSync } from 'fs';
// Minimal PDF from https://github.com/mathiasbynens/small#documents
const minimalPdf = `%PDF-1.
1 0 obj<</Pages 2 0 R>>endobj
2 0 obj<</Kids[3 0 R]/Count 1>>endobj
3 0 obj<</Parent 2 0 R>>endobj
trailer <</Root 1 0 R>>`;
const browser = await puppeteer.launch({ headless: false, defaultViewport: null });
try {
const [page] = await browser.pages();
await page.goto('http://example.com/');
await page.evaluate((pdf) => {
const url = URL.createObjectURL(new Blob([pdf], {type: 'application/pdf'}));
window.open(url);
}, minimalPdf);
const newTarget = await page.browserContext().waitForTarget(
target => target.url().startsWith('blob:')
);
const newPage = await newTarget.page();
const blobUrl = newPage.url();
page.once('response', async (response) => {
console.log(response.url());
const pdfBuffer = await response.buffer();
console.log(pdfBuffer.toString());
console.log('same:', pdfBuffer.toString() === minimalPdf);
writeFileSync('minimal.pdf', pdfBuffer);
});
await page.evaluate((url) => { fetch(url); }, blobUrl);
} catch(err) { console.error(err); } finally { /* await browser.close(); */ }

Resources