I want to send the form infos to the same url, get the documents that match the infos on the url, render the page with the documents and then print that page with puppeteer in a pdf format (every document will become a page and then a pdf document at the end).
this is the code I am using:
the route that receives the request in documentRoutes:
router.use(
'/:document',
documentController.getDocument,
documentController.printDocument
);
the middlewares:
getDocument:
exports.getDocument = async (req, res, next) => {
const course = req.params.document.split('-')[0];
const document = req.params.document.split('-')[1];
const documents = await Document.find({ CODE: course });
res.status(200).render(`${document}`, {
title: document,
documents,
});
next();
};
printDocument:
exports.printDocument = async (req, res, next) => {
const url = req.params.document;
const browser = await puppeteer.launch({
executablePath: '/usr/bin/chromium',
});
const page = await browser.newPage();
await page.goto(`http://localhost:3000/${url}`);
await page.pdf({
path: path.join(appRoot.path, 'downloads/certificate.pdf'),
format: 'A4',
margin: {
top: '1cm',
left: '1cm',
right: '1cm',
bottom: '1cm',
},
});
await browser.close();
console.log('pdf generated successfully');
await browser.close();
next();
};
error message:
Error Unexpected extraInfo events for request 70460993C8EB5E671E4D72B16003040A
Related
I try to send a pdf made by Puppeteer to my Angular client but I get this error:
The server-side code:
app.get('/convCVtoPDF',auth, jsonParser,async (req,res)=>{
const id = req.header('id')
const options = {
margin: {
top: '50px',
right: '50px',
bottom: '50px',
left: '50px',
},
pageSize: 'A4',
landscape: false,}
try {
const browser = await puppeteer.launch({"headless": true});
const page = await browser.newPage();
await page.setExtraHTTPHeaders({
'id':id
});
await page.goto('http://localhost:4200/BasicCvPDF');
const pdf = await page.pdf(options);
await browser.close();
// Send the PDF buffer in the response
res.setHeader('Content-Type', 'application/pdf');
res.send(pdf);
} catch (e) {
console.log(e);
res.status(500).send();
}
})
And the client side is:
async ngOnInit(){
await this.http.get<any>('http://localhost:3000/getUserById',{
headers:new HttpHeaders({'Content-Type':'application/json'})
}).toPromise()
}
I am sure the PDF is valid because If I specify a path for Puppeteer to save it, I can open and view it without a problem.
Any thoughts on why it can happen?
I have a nextjs page which consists of a react video player which plays a YouTube video based on some id passed in the url. The YouTube video is fetched in getServerSideProps based on the id. Then on the client side I am using /api/some-route to take a screenshot of that video player div using Puppeteer. Problem is when in api side I am opening a browser with Puppeteer with that particular URL, getServerSideProps is called and again my api/some-routes is getting called. So It has made a loop and is not finishing. How do I stop this?
My page:
export default function Home() {
useEffect(() => {
if (typeof window === undefined) {
return;
}
const url = window.location.href;
setTimeout(() => {
fetch(`/api/scrapper?url=${url}`)
.then((res) => {
res.json();
})
.then((data) => {
console.log(data);
});
}, 10000);
}, [params.slug[0]);
return (
<>
<Layout>
<Frame id="capture" />
</Layout>
</>
);
}
export const getServerSideProps = async ({ params }) => {
return {
props: { params, serverData },
};
}
/api/scrapper.js
import puppeteer from "puppeteer";
export default async function My(req, res) {
const url = req.query.url;
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
const img = await page.screenshot({ path: "output.png" });
console.log("img", img);
await page.close();
await browser.close();
return res.json("done");
}
My aim is to generate pdf contact with puppeteer with an html page that I built.
I succesfully generate this pdf in my back. But I have a problem to send data to my front. I tried many things but... One I got an arrayBuffer, once a blob, now a readableStream and I can read with my front none of theses...
Is there a way to easily send pdf and preview it in browser (in modal) ?
Here is my back :
const date = Date.now();
const pathPDF = `contract-${date}.pdf`;
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(
`http://localhost:3000/admin/reservation/${req.params.reservation}/contract`,
{
waitUntil: 'networkidle2',
}
);
const pdf = await page.pdf({
path: pathPDF,
format: 'A4',
printBackground: true,
});
await browser.close();
// res.json(pdf) <-- I tried this first, don't work
// res.contentType('application/pdf');
// res.sendFile(pathPDF); <-- Then this, not working...
// const data = fs.readFileSync(`./${pathPDF}`);
// res.contentType('application/pdf');
// res.send(data); <-- I tryed this too, same...
Here action :
export const createContract = (reservation) => {
return fetch(`${API}/reservation/contract/${reservation}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
},
})
.then((response) => {
// return response.blob(); <-- Tried this
return response.json();
})
.catch((err) => console.log(err));
};
Here my call in page :
const generateContract = () => {
setLoading(true);
createContract(reservation._id).then((result) => {
if (result.error) {
setLoading(false);
snackbarShowMessage(`${result.error}`);
} else {
setPdf(URL.createObjectURL(result)); <-- Tried this
setPdf(result) <-- This too
setLoading(false);
snackbarShowMessage(`${result.message}`, 'success');
setOpen(true);
}
});
};
Do you have any idea where I doing wrong..?
I can't get my puppet to click on the "Followers" link in Instagram.
I assume that Instagram has done some anti-bot magic, but maybe I'm just too conspiratory about it.
How could I get the "Followers" modal to show?
This is my code:
const puppeteer = require('puppeteer');
var instagram = {
username: 'username',
password: 'password'
}
var run = ()=>{
(async () => {
const browser = await puppeteer.launch({headless: false, args: ['--lang=en-EN,en']});
const page = await browser.newPage();
await page.setExtraHTTPHeaders({ 'Accept-Language': 'en' });
await page.goto('https://www.instagram.com/accounts/login');
await page.waitFor(1500);
await page.click("input[type=\"text\"]")
await page.keyboard.type(instagram.username);
await page.click("input[type=\"password\"]")
await page.keyboard.type(instagram.password);
await page.click("button[type=\"submit\"]");
await page.waitForNavigation();
await page.click(".HoLwm");
await page.click(".glyphsSpriteUser__outline__24__grey_9")
await page.waitForNavigation();
await page.waitFor(2500);
await page.waitForSelector('a.-nal3');
await page.evaluate(() => document.querySelector('a.-nal3')) //does not work
//await page.click(".-nal3") //does not work
await page.waitFor(3000);
await page.waitFor(1000);
//await browser.close();
})();
}
run()
While reviewing your script I noticed that not all of your selectors are the same in my Instagram so I fixed it trying not to use exact selectors since they may change tomorrow.
But this works today (see comments on what changed in the script):
var run = ()=>{
(async () => {
const browser = await puppeteer.launch({headless: false, args: ['--lang=en-EN,en']});
const rand = function(){ return Math.floor(1000 + Math.random() * 2000) }
const page = await browser.newPage();
await page.setExtraHTTPHeaders({ 'Accept-Language': 'en' });
await page.goto('https://www.instagram.com/accounts/login');
// When you can try not to `waitFor` set periods of time
// Wait for selectors, wait for random periods of time
await page.waitForSelector('button[type=\"submit\"]');
await page.click("input[type=\"text\"]")
await page.keyboard.type(instagram.username);
await page.waitFor(rand())
await page.click("input[type=\"password\"]")
await page.keyboard.type(instagram.password);
await page.waitFor(rand())
await page.click("button[type=\"submit\"]");
await page.waitForNavigation();
await page.waitFor(rand())
// After login we're back on the main page
// Wait till React starts and paints the interface
// We're waiting for "Profile" icon to be visible
await page.waitForSelector("span[aria-label='Profile']");
// Then we click on the link inside of which the icon is located
// That is link to the profile
await page.evaluate(() => document.querySelector("span[aria-label='Profile']").parentNode.click() );
await page.waitForNavigation();
await page.waitFor(rand())
// Do not rely on a selector
// Find a link that contains "followers" in its href
await page.waitForSelector("a[href*='followers']");
const followers = await page.evaluate(() => document.querySelector("a[href*='followers']").textContent)
console.log("Total followers: " + followers);
// Click on the followers link
await page.evaluate( () => document.querySelector("a[href*='followers']").click() )
// Wait for the followers modal and profiles
await page.waitFor("div[role='presentation'] div[role='dialog'] div:nth-child(2) ul li");
// Get followers that are in the list in the second div of that modal
const people = await page.evaluate(() => {
return [...document.querySelectorAll("div[role='presentation'] div[role='dialog'] div:nth-child(2) ul li")]
.map(user => {
const profLink = user.querySelector("a[title]")
return {
"name" : profLink.textContent,
"url" : profLink.href
};
})
})
console.log(people)
// await browser.close();
})();
}
Just getting started with Puppeteer and i can launch the browser, go to a url, run a bunch of actions and then close the browser. What i am looking to see if i can do though is open the browser and loop over a set of actions in the same session.
I have a JSON object with urls i want to visit, so want to loop over that
// teams.js
module.exports = {
premier_league: [
{ team_name: "Team 1", url: "https://url-of-site/team_1"},
{ team_name: "Team 2", url: "https://url-of-site/team_2"}
]
}
My script to launch puppeteer is as follows
// index.js
const TEAM = require('./teams');
const puppeteer = require('puppeteer');
(async () => {
// Initialise Browser
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({
width: 1280,
height: 800
});
await page.goto('login page');
await page.click('login_box');
await page.keyboard.type('username');
await page.click('login_password');
await page.keyboard.type('password');
await page.click('login_button');
await page.waitForNavigation();
// Go To Team URL
await page.goto('Team URL')
await browser.close();
})();
So to loop over my JSON object I can use
Object.keys(TEAM['premier_league']).forEach(function(key) {
// Output url of each team
console.log(TEAM['premier_league'][key]['url'])
});
If i wrap my go to url with my loop, then page is no longer accessible
// index.js
const TEAM = require('./teams');
const puppeteer = require('puppeteer');
(async () => {
// Initialise Browser
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({
width: 1280,
height: 800
});
await page.goto('login page');
await page.click('login_box');
await page.keyboard.type('username');
await page.click('login_password');
await page.keyboard.type('password');
await page.click('login_button');
await page.waitForNavigation();
Object.keys(TEAM['premier_league']).forEach(function(key) {
// Go To Team URL
await page.goto(TEAM['premier_league'][key]['url'])
});
await browser.close();
})();
The actual error is
await page.goto(TEAM[args][key]['url'])
^^^^
SyntaxError: Unexpected identifier
Your Object.keys callback function need to use async as well in order to use await inside. Try to change as below
Object.keys(TEAM['premier_league']).forEach( async function(key) {
// Go To Team URL
await page.goto(TEAM['premier_league'][key]['url'])
});
Hope it helps