Retrieving <title> of a page with URL in NodeJS - node.js

I am trying to get <title> of the urls without using third party packages. Is it possible to get the title of a page from the url without using third party packages.
Routes can be single or multiple
/I/want/title/?address=http://yahoo.com
/I/want/title/?address=google.com&address=www.dawn.com/events/
app.js
const http = require('http');
const url = require('url');
const app = require('express')();
app.get('/I/want/title/', (req, res, next) => {
const urls = url.parse(req.url,true).query;
const queryObject = url.parse('https://nodejs.org/en/', true).query;
res.send('Sucess');
next(); // Allows the request to continue to the next middleware in line
});
const server = http.createServer(app);
server.listen(3000);

You need to fetch the requested page, parse the code to extract it's <title>.
The url is just for passing the url :-) Like parsing that 'https://nodejs.org/en/' uses https protocol etc., not for fetching its remote content.
Use something like this.
const fetch = require('node-fetch'),
express = require('express'),
app = express()
const parseTitle = (body) => {
let match = body.match(/<title>([^<]*)<\/title>/) // regular expression to parse contents of the <title> tag
if (!match || typeof match[1] !== 'string')
throw new Error('Unable to parse the title tag')
return match[1]
}
app.get('/', (req, res) => {
const { url } = req.query
if (!url)
return res.status(400).end('Missing url query parameter')
fetch(url)
.then(res => res.text()) // parse response's body as text
.then(body => parseTitle(body)) // extract <title> from body
.then(title => res.send(title)) // send the result back
.catch(e => res.status(500).end(e.message)) // catch possible errors
})
app.listen(3000)
Your server will listen on http://localhost:3000, just run this thru your browser:
http://localhost:3000/?url=https://google.com gives Google
http://localhost:3000/?url=https://stackoverflow.com/questions/64051968/retrieving-title-of-a-page-with-url-in-nodejs gives node.js - Retrieving <title> of a page with URL in NodeJS - Stack Overflow

If you insist on using no 3rd-party libraries (bad idea?? Also... express is a 3rd party library...), you can use the following:
const http = require('http');
let app = require('express')();
app.get('/I/want/title', async (req, res, next) => {
try {
// The same "http" module can help us make a request
let html = await new Promise((resolve, reject) => {
let [ , protocol, host, port, path ] = req.query.address.match(/(http|https):[/][/]([^:/]*)(:[0-9]*)?([/].*)?/) || [];
if (![ 'http', 'https' ].includes(protocol)) throw new Error('Supplied url is invalid');
port = port || (protocol === 'http' ? 80 : 443);
let request = require(protocol).request(`${protocol}://${host}`, { port, method: 'GET', path }, response => {
// Collect the http body in `chunks`
let chunks = [];
response.setEncoding('utf8');
response.on('data', chunk => chunks.push(chunk));
// Either reject with error, or resolve with full http body
response.on('error', err => reject(err));
response.on('end', () => resolve(chunks.join('')));
});
request.on('error', err => reject(err));
request.end();
});
// A regex parse is subject to errors, but should do here:
let [ ,, title=null ] = html.match(/<title( [^>]*)?>(.*)<[/]title>/i) || [];
if (!title) throw new Error(`Response contained no title`);
res.send(title.split('\n').map(ln => ln.trim()).filter(Boolean).join(' '));
} catch(err) {
res.status(400).send(`Couldn't get title: ${err.stack}`.replace(/\n/g, '<br/>'));
}
next(); // Allows the request to continue to the next middleware in line
});
http.createServer(app).listen(3000);
I haven't tested this myself but I expect it to work and handle some more obvious edge-cases.
You can see that specifically making the http request is a real headache; you need to handle http and https somewhat separately. This also expects the "address" param to be a fully qualified domain name with protocol, and optional port and path.
You can try running the server and requesting http://localhost:3000/I/want/title?address=https://stackoverflow.com; should produce "Stack Overflow - Where Developers Learn, Share, & Build Careers". You could also try http://localhost:3000/I/want/title?address=http://chess2.fun, and you should see "CHESS2".

a simple req.originalUrl will get what you are looking for if I understand your question. The you can you write javascript string functions to get the part you want
app.get('/I/want/title/', (req, res, next) => {
const url = req.originalUrl;
res.send('Sucess');
next(); // Allows the request to continue to the next middleware in line
});

Related

How do I make server-side fetch calls?

I have a React web application which currently does fetch calls client-side to update a dashboard with live information (let's say current weather, as an example), meaning that with an increase in users it will cause unnecessary traffic calls and could potentially crash this weather website.
What I am trying to understand is how can I make those fetch calls be server-side? I have looked into creating a Node.js Express server, but I am unsure if it has the functionality to make fetch calls to a remote host.
Here is my code with request-weather which does not really work, unfortunately.
const { response } = require('express');
const express = require('express');
const app = express();
var fetch = require('node-fetch');
const port = process.env.PORT || 5000;
app.use(express.json());
// This displays message that the server running and listening to specified port
app.listen(port, () => console.log(`Listening on port ${port}`));
// create a GET route
app.get('/request-info', (req, res) => {
res.send({ information: 'information call successful' });
});
app.get('/request-weather', (req, res) => {
fetch('http://thisotherwebsite.com/weather-query-that-returns-json',
{method: 'GET',
headers: {' Accept': 'application/json'}})
.then(res => {
return res;
})
});
Couple things:
Your /request-weather handler makes the request to thisotherwebsite but doesn't do anything with the response.
Your .then(res => { return res; }) doesn't actually do anything. You're just taking what fetch already returns and returning it.
If you want to send the response back to the browser you might do something like this:
fetch(...) // make the request
.then(result => result.json()) // extract the data
.then(data => {
res.json(data); // send it to the browser
})
If you want to do additional processing you could await the fetch call and then do whatever else you need to do with it:
app.get('/request-weather', async (req, res) => { // make handler async
// get data from the other site
const data = await fetch(...)
.then(response => response.json());
// package it up with some other stuff
responseData = {
fromOtherSite: data,
myExpressStuff: {
foo: 1,
bar: 2,
}
}
// return it to the browser
res.json(responseData);
Reference:
fetch: response.json() - Extracting data from a fetch response
express response.json() - Sending json to the response (usually to the browser)

Downloading a file from a Node.JS API REST (express) with React.JS (Axios Get)

I have a React JS application that as a Backend has an API REST made with Node JS.
Currently, my objective is to be able to download files that are on the server.
The correct behavior should be that the user, after clicking on "Download file", should receive the file (Download with browser).
On the server-side, I have something like this (obviously, I'm gonna simplify it by removing JWT middleware, DB queries, etc..):
const express = require('express');
const router = express.Router();
const bodyParser = require("body-parser");
const cors = require("cors");
const app = express();
app.use(cors({ origin: "http://localhost:3000" }));
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({ extended: true }));
router.get('/download', (req, res, next) => {
res.download("\\\\folder\\subfolder\\myfile.txt");
});
app.use('/api', router);
const PORT = 3001;
app.listen(PORT, function() {
console.log("Server is running on port "+PORT);
});
Then, as I have said, I have a React JS application working as a Frontend:
apihelperdownload () {
return axios.get(API_URL + "download").then(function (response) {
return response;
})
}
.............
function downloadFile() {
apihelperdownload().then(
(res) => {
// Should I do something here with response?
},
(error) => {
}
)
}
<button className="download" onClick={() => downloadFile()}>
Download File
</button>
I have two files on my server, one TXT and one JPG.
Both have the correct path (I'm not getting any "file/path not found" error) and I am receiving a "200 status OK" in both cases... But I can't download the files.
Also:
In the JPG case, in Network Tab, on preview sub-Tab I can see the image (so the browser is receiving the image).
And the response looks like this:
(ignore the params and the different url, it's just that here is not simplified)
- In the TXT case, in Network Tab, on preview sub-Tab I can just see a white page.
And the response looks like this:
As you can see, in this second case (.txt file), the data is "empty" ( "" )
Data is the correct text.. I didn't save the txt file.. So it was empty..
I have checked several related questions like this Download a file from NodeJS Server using Express
But unfortunately, I haven't found how to solve my issue.
1) What am I doing wrong on the server-side?
2) What I have to do with the response on client-side?
Thanks
I have found how to solve it without third-party libraries and in quite an "easy way".
First of all, I have changed the request to POST (since I just made GET because I thought it was the only way).
After that, on the Axios request, we have to indicate the responseType as blob:
function apihelperdownload () {
return axios.post(API_URL + "download",{ key: 'value', headers: authHeader(), responseType: 'blob' }).then(function (response) {
return response;
})
}
Then, when we receive the response, we have to create an URL object as a Blob and a link element to download it.
function downloadFile(filename) {
apihelperdownload().then(
(res) => {
const url = window.URL.createObjectURL(new Blob([res.data]));
const link = document.createElement('a');
link.href = url;
if (typeof window.navigator.msSaveBlob === 'function') {
window.navigator.msSaveBlob(
res.data,
filename
);
} else {
link.setAttribute('download', filename);
document.body.appendChild(link);
link.click();
}
},
(error) => {
alert("Something went wrong");
}
)
}
With this, we can download almost any kind of file very easily.
You can use js-file-download module.
const FileDownload = require('js-file-download');
Axios.get(API_URL + "download")
.then((response) => {
FileDownload(response.data, 'file.txt');
});
Check this response for more: https://stackoverflow.com/a/41940307/6512445

How to know the url that I will be redirected to? [nodejs] [node-fetch]

I am trying to load a JSON file from a url in google cloud. I am using the node-fetch package and it works fine for a couple of hours. The problem is that google changes the redirected url frequently. How can I make a get request to the url I will be forwarded to? Or at least know what url I will be forwarded to? I see there is also a package called request, but its deprecated.
This is the code
var express = require('express');
var router = express.Router();
var fetch = require('node-fetch');
router.get('/', async (req, res) => {
const url = 'https://storage.cloud.google.com/blablabla/config.json';
fetch(url)
.then((res) => {
if (res.ok) {
return res.json();
}
})
.then((data) => res.send({ data }))
.catch((err) => res.send(err));
});
module.exports = router;
You can look up the final URL in the response headers. In your case res.headers.get('location') should do the trick.
The Response object has an undocumented url property. So, let's say you call
const response = await fetch(url, {
redirect: 'follow',
follow: 10,
});
response.url will be the URL of the last redirect that was followed.

Express JS proxy to call web api

I have the following code. And a web api which returns string array,
const express = require('express');
const proxy = require('express-http-proxy');
var app = express();
app.use('/proxy', proxy('http://localhost:56660/api/values'));
app.listen(3000);
When I tried to do localhost:3000/proxy I do not get a response,
But when I use app.use('/proxy', proxy('www.google.com')); , it redirects to google web site.
Please suggest me a best approach/solution:
I want to create a proxy server which gets url from browser (Application), modify the url, call the new url and send the response back to browser(Application).
You can get the URL to be proxied as a query parameter, modify it and then pass that URL to proxy, like this (use instead of app.use('/proxy', proxy('http://localhost:56660/api/values'));):
app.get('/proxy', (req, res, next) => {
const modifiedURL = modifyURL(req.query.url)
return proxy(modifiedURL)(req, res, next)
})
You can call you server with an URL like this (GET method):
https://my.server.com/proxy?url=https://urltobeproxied.com
UPDATE:
I think this would work according to your needs:
app.use('/proxy', (req, res, next) => {
const requestedUrl = `${req.protocol}://${req.get('Host')}${req.url}`
const modifiedURL = modifyURL(requestedUrl)
proxy(modifiedURL)(req, res, next)
})
UPDATE2:
app.use('/proxy', proxy('http://localhost:56660/api/values', {
proxyReqPathResolver: function(req) {
const requestedUrl = `${req.protocol}://${req.get('Host')}${req.url}`
const modifiedURL = modifyURL(requestedUrl)
return require('url').parse(modifiedURL).path;
}
}))
UPDATE3:
An example of proxy modifying the response (extracted from the package docs);
app.use('/proxy', proxy('http://localhost:56660/api/values', {
userResDecorator: function(proxyRes, proxyResData, userReq, userRes) {
data = JSON.parse(proxyResData.toString('utf8'));
data.newProperty = 'exciting data';
return JSON.stringify(data);
}
}))

Node.js with Express: how to redirect a POST request

I want to redirect from one URL request to another 'POST' request, like this:
var app = require('express')();
app.get('/', function(req, res) {
res.redirect('/test');
});
app.post('/test', function(req, res) {
res.send('/test page');
});
app.listen(3000, function() {
console.log('listenning on port:3000');
});
However, I can't redirect to '/test' page because it is a POST request. So what should I do to make the redirection work, keeping the '/test' request POST?
You can do this:
app.post('/', function(req, res) {
res.redirect(307, '/test');
});
Which will preserve the send method.
For reference, the 307 http code spec is:
307 Temporary Redirect (since HTTP/1.1) In this occasion, the request
should be repeated with another URI, but future requests can still use
the original URI.2 In contrast to 303, the request method should not
be changed when reissuing the original request. For instance, a POST
request must be repeated using another POST request.
For more info, see: http://www.alanflavell.org.uk/www/post-redirect.html
Keep in mind the middleware architecture: Each handler may manipulate the context, and either respond - or - call next().
By this premise, the express router is basically a middleware function you may use after "correcting" the url.
(BTW, the request app is also a function, although I'm not sure if I recommend going back so early in the chain)
Here's a kind'a example:
const router = new require('express').Router()
const user = require('../model/user')
//assume user implements:
// user.byId(id) -> Promise<user>
// user.byMail(email) -> Promise<user>
const reqUser = userPromise => (req, res, next) =>
req.user
? next()
: userPromise(req)
.then(user => { req.user = user })
.then(next, next)
//assume the sever that uses this router has a
//standard (err, req, res, next) handler in the end of the chain...
const byId = reqUser( req => user.byId(req.params.id) )
const byMail = reqUser( req => user.byMail(req.params.mail) )
router.post('/by-id/:id/friends',
byId,
(req, res) => res.render('user-friends', req.user)
)
router.post('/by-email/:email/friends',
byMail,
(req, res, next) => {
req.url = `/by-id/${req.user.id}/friends`
next()
},
router
)
The only difference between 307 and 302 is that 307 guarantees that the method and the body will not be changed when the redirected request is made.
https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/307
I believe the question is that the node server is receiving a POST request but needs to redirect it to a different server as GET request. I recently had to deal with something similar. Here is how I solved it:
var proxy = require('express-http-proxy');
app.use('incomin/url', proxy('forwarding:server', {
//The proxyRqDecorator allows us to change a few things including the request type.
proxyReqOptDecorator: (proxyReqOpts, srcReq) => {
proxyReqOpts.method = 'GET';
return proxyReqOpts;
},
//The proxyReqPathResolver takes the Given URL and updates it to the forward path
proxyReqPathResolver: function (req) {
return new Promise( (resolve, reject) => {
setTimeout( () =>{
var value = req.body.key;
var resolvedPathValue = 'forwarding/url' + value;
console.log(`Inside forward path. The resolved path is ${resolvedPathValue}`);
resolve(resolvedPathValue);
}, 200);
});
}
}));
Keep in mind that the above proxyReqPathResolver is setup async. The synchronous vesrion and more info on express-http-proxy are described here:
https://www.npmjs.com/package/express-http-proxy

Resources