NODE JS - request npm - manipulate url body - node.js

I'm working on a server side (self) project with node js (for the first time), and i ran into some difficulties.
My goal is the following:
first part - Im using "/uploads/processData" URL in my server to get URL(s) from the user request.
Now i want to access the user request URL(s) and get their HTML(s) file(s), to do so i'm using the "request" npm package (code below).
second part - I want access the body that I get back from the request package (from the first part), so I'm using cheerio npm package to do so.
Now to my problem - lets say that i'm trying to get the body of the url:
https://www.amazon.com/NIKE-Mens-Lunarconverge-Running-Shoes/dp/B06VVFGZHL?pd_rd_wg=6humg&pd_rd_r=61904ea4-c78e-43b6-8b8d-6b5ee8417541&pd_rd_w=Tue7n&ref_=pd_gw_simh&pf_rd_r=VGMA24803GJEV6DY7458&pf_rd_p=a670abbe-a1ba-52d3-b360-3badcefeb448&th=1
From some reason that i cant understand (probably because of lack of knowledge at web development), I dont always get the same body that i see when I review the above page (link) using F12, with my first part code. Hence sometimes my cheerio extraction (the second part) works as i expect and sometime does not (because some element from the full/original HTML file are missing). At first I thought it might be cache thing, so I added a middleware to set "nocache" flag.
What am I missing here? Does the way I try to operate wrong? Is there any way to ensure i get the same full/original HTML everytime?
Here is my code so far -
nocache middleware
function nocache(req, res, next) {
res.header("Cache-Control", "private, no-cache, no-store, must-revalidate");
res.header("Expires", "-1");
res.header("Pragma", "no-cache");
next();
}
EDIT
uploadRoutes.post("/processGoogleSearchData", nocache, (req, res) => {
//Assuming getting in req.body the google result JSON as "googleSearchResult"
var itemsArr = [];
var linksArr = [];
var bodysArr = [];
itemsArr = req.body.googleSearchResult.items;
if (itemsArr.length === 0) {
//return appropriate message
return res.status(400).send({ message: "No data sent to server" });
}
var linksArr = itemsArr.map(item => item.link);
//Get the needed info from every link
linksArr.forEach(link => {
request(link, (err, response, body) => {
if (!err && response.statusCode === 200) {
var $ = cheerio.load(body);
var tr = $(".a-lineitem").children();
var priceTd = tr.find(".a-span12");
var priceSpan = priceTd.find("#priceblock_ourprice");
console.log(priceSpan.text());
//when trying to build array of bodys the extraction doesnt work at all
bodysArr.push(body);
}
});
});
res.send(bodysArr);
});
I changed my code to the above, and it seems like the data extraction works more often. Can anyone explain why the extraction still sometimes doesnt work?
I tried return bodysArr for debbug purposes but when i do that the extraction does not work at all and my path response is always an empty array, why is that?

The problem is that:
res.send(bodysArr);
is executed straight after the call to
linksArr.forEach(link => {
The callbacks
(err, response, body) => {
if (!err && response.statusCode === 200) {
var $ = cheerio.load(body);
var tr = $(".a-lineitem").children();
var priceTd = tr.find(".a-span12");
var priceSpan = priceTd.find("#priceblock_ourprice");
console.log(priceSpan.text());
//when trying to build array of bodys the extraction doesnt work at all
bodysArr.push(body);
}
won't be guaranteed to have fired yet. What you want is ensure that res.send(bodysArr) runs after all the requests have happened
There are a few ways to handle this, one is with the excellent async library.
Hopefully you can get the gist of it with this example.
var array = [1,2,3]
function asyncRequest(input, callback){
//Do your fetch request here and call callback when done
setTimeout(callback, 10); //using setTiemout as an example
}
async.each(array, asyncRequest, (err) => {
if(err){
throw err;
}
console.log("All Finished");
});
<script src="https://cdnjs.cloudflare.com/ajax/libs/async/2.6.1/async.min.js"></script>

After reviewing Sudsy explanation, I came across loops of asynchronous methods.
While playing with this subject I could not figure out whats wrong with my following code:
This works fine - so i ended up using it
async function getItemsInfo(itemsArr) {
return itemsArr.map(async item => {
try {
var body = await axios(item.link);
var $ = await cheerio.load(body.data);
var tr = await $(".a-lineitem").children();
var priceTd = await tr.find(".a-span12");
var priceSpan = await priceTd.find("#priceblock_ourprice");
return priceSpan.text();
} catch (err) {
return err.message;
}
});
}
getItemsInfo(linksArr)
.then(res => Promise.all(res))
.then(res => console.log(res))
.catch(err => console.error(err));
Can someone explain to me what's wrong with the following codes?
async function getItemsInfo(itemsArr) {
await Promise.all(
itemsArr.map(async item => {
try {
var body = await axios(item.link);
var $ = await cheerio.load(body.data);
var tr = await $(".a-lineitem").children();
var priceTd = await tr.find(".a-span12");
var priceSpan = await priceTd.find("#priceblock_ourprice");
return priceSpan.text();
} catch (err) {
throw err.message;
}
})
)
.then(resulst => {
return results;
})
.catch(err => {
throw err.message;
});
}
//the caller function
try {
getItemsInfo(linksArr).then(results => {
res.status(200).send(results);
});
} catch (err) {
res.status(400).send(err.message);
}
or
async function getItemsInfo(itemsArr) {
const promises = itemsArr.map(async item => {
try {
var body = await axios(item.link);
var $ = await cheerio.load(body.data);
var tr = await $(".a-lineitem").children();
var priceTd = await tr.find(".a-span12");
var priceSpan = await priceTd.find("#priceblock_ourprice");
return priceSpan.text();
} catch (err) {
return err.message;
}
});
var results = await Promise.all(promises)
.then(results => {
return results;
})
.catch(err => {
return err.message;
});
}
//the caller function
try {
getItemsInfo(linksArr).then(results => {
res.status(200).send(results);
});
} catch (err) {
res.status(400).send(err.message);
}

Related

weather app: Error [ERR_HTTP_HEADERS_SENT]: Cannot set headers after they are sent to the client

I am trying to build a weather app using node that takes
{
"cities": [
"toronto",
"mumbai",
"london"
]
}
as input and returns
{
"weather": {
"toronto": "24C",
"mumbai": "34C",
"london": "14C"
}
}
this as output
app.post('/getWeather',(req,res)=>{
const city = req.body.city;
city.map(city=>{
const url=`http://api.openweathermap.org/data/2.5/weather?q=${city}&appid=${process.env.WEATHER_API_KEY}`;
request(url, function(err, response, body) {
// On return, check the json data fetched
if (err) {
res.render('index', { weather: null, error: 'Error, please try again' });
} else {
let weather = JSON.parse(body);
console.log(weather);
Since the question does not have all of the code, I cannot give a definite answer, but I assume that the code either tries to sent response for the each city separately and/or does not wait for all the API calls to finish.
To fix the issue, async/await needs to be used (since the response depends on several API calls), and response must be sent after its completely assembled.
An example based on the given code:
app.post("/getWeather", async (req, res) => {
const cities = req.body.cities;
const reqs = cities.map((city) => {
const url = `http://api.openweathermap.org/data/2.5/weather?q=${city}&appid=${process.env.WEATHER_API_KEY}`;
return new Promise((resolve, reject) => {
request(url, function (err, response, body) {
if (err) {
reject(err);
} else {
let weather = JSON.parse(body);
resolve(weather);
}
});
});
});
const responses = await Promise.all(reqs);
const result = {};
for (const response of responses) {
if (response.ok) {
result[response.city] = response.temp;
}
}
res.json(result);
});

NodeJS table children are undefined

I've been working on a discord/roblox bot. I've gotten almost finished, but ran into a problem. I'm a noob at node, so please dont harp too much if the code is bad. What this does is I click a button in a roblox game, it sends a POST request to a heroku app, and then with the info provided it sends a message to a channel in my discord server.
Relevant bits of code:
const Discord = require('discord.js');
const request = require('request');
var express = require('express');
app = express();
bodyParser = require('body-parser');
path = require('path');
var bodyParser = require('body-parser');
var PORT = process.env.PORT || 5000;
const bot = new Discord.Client();
function GetData(id){
let url = "https://economy.roblox.com/v1/assets/"+id+"/resale-data";
let url2 = "https://thumbnails.roblox.com/v1/assets?assetIds="+id+"&size=150x150&format=Png";
let url3 = "http://api.roblox.com/marketplace/productinfo?assetId="+id
let options = {json: true};
var response = {};
request(url, options, (error, res, body) => {
if (error) {
return console.log(error)
};
if (!error && res.statusCode == 200) {
response.rap = body.recentAveragePrice
console.log(response)
};
});
request(url2, options, (error, res, body) => {
if (error) {
return console.log(error)
};
if(!error && res.statusCode == 200) {
response.thumbnail = body.data[0].imageUrl
console.log(response)
}
});
request(url3, options, (error, res, body) => {
if (error) {
return console.log(error)
};
if(!error && res.statusCode == 200) {
response.name = body.Name
console.log(response)
}
});
return response
};
app.post('/exec', function(req, res) {
res.setHeader('Content-Type', 'application/json')
res.send(JSON.stringify({
success: true
}));
console.log("working");
console.log(req.body)
var data = GetData(req.body.id)
if (req.body.state === true){
var state = "New Projected"
} else {
var state = "No longer projected"
}
const msg = new Discord.RichEmbed()
.setDescription(data.name)
.setAuthor(state)
.setColor(0x1c90d9)
.addField("RAP:", data.rap)
.setImage(data.thumbnail)
bot.channels.get("655196831835226133").send(msg);
});
However, this is the message that is sent in the discord server:
everything is undefined and there is no thumbnail
Help is greatly appreciated. Thanks a lot.
Some reading here: https://www.promisejs.org/
Asynchronous functions, like making a request to a server, take time to return and do so in a callback. You have to keep track of the paths of the callbacks to make sure you do not do the next step before a previous step has completed otherwise you will skip over it entirely. It is not like calling a regular function that does something in order.
When you do return response at the end of GetData, there is nothing in the object because you need to wait for the three responses to complete within their respective callbacks. You are seeing things logged to the console because they are happening after the fact in the background. If you were to put timestamps and additional logging, you would see that it is not all happening in the order of how you wrote it, but rather in order of when the requests themselves complete.
There are many different ways to write and use promises, but a very simple one here would be to nest the requests so they happen one after the other, and then finally resolve the promise at the very end when your response object has all the data. With Promises you resolve which is the same as returning, and reject when you want to raise an error.
Without going too deep into it, here is an example of what you can do. This is untested as I am not running your application or trying to rewrite it, that is not was StackOverflow is for, but hopefully it can send you in the right direction. Note that I call reject on the errors to tell the promise to stop running.
function GetData(id){
// declare the Promise
return new Promise((resolve, reject) => {
let url = "https://economy.roblox.com/v1/assets/"+id+"/resale-data";
let url2 = "https://thumbnails.roblox.com/v1/assets?assetIds="+id+"&size=150x150&format=Png";
let url3 = "http://api.roblox.com/marketplace/productinfo?assetId="+id
let options = {json: true};
var response = {};
// make request 1
request(url, options, (error1, res1, body1) => {
if (error1) {
console.log(error1)
// reject
reject(error1);
};
if (!error1 && res1.statusCode == 200) {
// all good, continue
response.rap = body1.recentAveragePrice
console.log(response)
// make request 2
request(url2, options, (error2, res2, body2) => {
if (error2) {
console.log(error2)
// reject
reject(error2);
};
if(!error2 && res2.statusCode == 200) {
// all good, continue
response.thumbnail = body2.data[0].imageUrl
console.log(response)
// make request 3
request(url3, options, (error3, res3, body3) => {
if (error3) {
console.log(error3)
// reject
reject(error3);
};
if(!error3 && res3.statusCode == 200) {
response.name = body3.Name
console.log(response)
// finally, resolve the final response object
resolve(response);
}
});
}
});
};
});
});
};
Now to the calling function. You still call GetData() but you use the then() function to get what was resolved, or catch() to catch any errors. I would also suggest putting your res.send() functions at the end, unless you absolutely need to return right away as your originally wrote it.
app.post('/exec', function(req, res) {
res.setHeader('Content-Type', 'application/json')
console.log("working");
console.log(req.body)
// get all the data as a Promise
GetData(req.body.id)
.then((data) => {
// now you have your data!
if (req.body.state === true){
var state = "New Projected"
} else {
var state = "No longer projected"
}
const msg = new Discord.RichEmbed()
.setDescription(data.name)
.setAuthor(state)
.setColor(0x1c90d9)
.addField("RAP:", data.rap)
.setImage(data.thumbnail)
bot.channels.get("655196831835226133").send(msg);
res.send(JSON.stringify({
success: true
}));
})
.catch((err) => {
// you can handle errors here that were rejected in the promise, if necessary
res.send(JSON.stringify({
success: false
}));
})
});

Assign value to variable outside mongo query in nodejs

Right now i have this code
router.get('/export', function(req, res, next) {
var postData, eventData, messageData, userData
Posts.list().then(data=> {
var jsonOutput=JSON.stringify(data)
postData=jsonOutput //this doesnt work
})
.catch(erro => res.status(500).send('error'))
Events.list().then(data=> {
var jsonOutput=JSON.stringify(data)
eventData=jsonOutput //this doesnt work
})
.catch(erro => res.status(500).send('error'))
Messages.list().then(data=> {
var jsonOutput=JSON.stringify(data)
messageData=jsonOutput //this doesnt work
})
.catch(erro => res.status(500).send('error'))
Users.list().then(data=> {
var jsonOutput=JSON.stringify(data)
userData=jsonOutput //this doesnt work
})
.catch(erro => res.status(500).send('error'))
//Then when all data from colections is retrieve i want to use the 4 variables that i created in the beggining
});
So basicly im trying to retrieve the data from my mongo database and then assign the results to that 4 variables that i create, but im not getting success.
For what i´ve been seeing i have to use async but im having some trouble doing it.
I don't like too much mrlanlee solution. This is a typical situation where using async / await can really make sense. Anyway, the Hugo's solution (the second one, with async await), even if it just works, will make the four queries in sequence, one after another to. If you want a clean, working and parallel solution, check this:
router.get('/export', async function(req, res, next) {
let data
try {
data = await Promise.all([
Posts.list(),
Events.list(),
Messages.list(),
Users.list()
]);
// at this point, data is an array. data[0] = Posts.list result, data[1] = Events.list result etc..
res.status(200).json(data)
} catch (e) {
res.status(500).send('error');
}
});
The other answer from Sashi is on the right track but you will probably run into errors. Since your catch statement on each promise returns 500, if multiple errors are caught during the query, Express will not send an error or 500 each time, instead it will throw an error trying to.
See below.
router.get('/export', function(req, res, next) {
var postData, eventData, messageData, userData
try {
postData = Posts.list().then(data=> {
return JSON.stringify(data);
});
eventData = Events.list().then(data=> {
return JSON.stringify(data)
});
messageData = Messages.list().then(data=> {
return JSON.stringify(data);
})
userData = Users.list().then(data=> {
return JSON.stringify(data)
});
} catch (err) {
// this should catch your errors on all 4 promises above
return res.status(500).send('error')
}
// this part is optional, i wasn't sure if you were planning
// on returning all the data back in an object
const response = {
postData,
eventData,
messageData,
userData,
};
return res.status(200).send({ response })
});
For explanation of why you weren't able to mutate the variables, see Sashi's answer as he explains it.
The variables defined outside the async code is out of scope of the async functions. Hence you cannot store the returned value from the async functions in those variables.
This should work.
router.get('/export', function(req, res, next) {
var postData, eventData, messageData, userData
postData = Posts.list().then(data=> {
var jsonOutput=JSON.stringify(data);
return jsonOutput;
}).catch(erro => res.status(500).send('error'));
eventData = Events.list().then(data=> {
var jsonOutput=JSON.stringify(data);
return jsonOutput;
}).catch(erro => res.status(500).send('error'));
messageData = Messages.list().then(data=> {
var jsonOutput=JSON.stringify(data);
return jsonOutput;
}).catch(erro => res.status(500).send('error'));
userData = Users.list().then(data=> {
var jsonOutput=JSON.stringify(data);
return jsonOutput;
}).catch(erro => res.status(500).send('error'));
});
Using Async/Await is a much neater solution.
router.get('/export', async function(req, res, next) {
var postData, eventData, messageData, userData;
try{
postData = await Posts.list();
eventData = await Events.list();
messageData = await Messages.list()
userData = await Users.list();
catch (e){
res.status(500).send('error');
}
});

Asynchronous Results Assigned to a Sequential Array

I'm doing Exercise 9 of LearnYouNode. The goal of the exercise is to print the contents of the HTTP results in the order of the arguments given on the command line. Everything seems to be working correctly, but they are not staying in order. I realize that having the jobId inside the callback is wrong because it won't execute until it completes, but I'm still completely blocked on how to make it behave as intended. Just a FYI, I'm trying to do this without using Async or any other libraries for educational purposes. Also, any other tips based on my coding not related to my problem would be appreciated!
const http = require('http');
urls = process.argv.slice(2, process.argv.length);
function multiGetter (urlList, callback) {
var results = new Array(urlList.length);
var current = 0;
var completed = 0;
var hasErrors = false;
function done(err) {
if(err) {
hasErrors = true;
return callback(err);
}
if(++completed === urlList.length && !hasErrors) {
callback(null, results);
}
}
urls.forEach( (url) => {
http.get(url, (res) => {
let jobId = current;
current++;
results[jobId] = '';
res.setEncoding('utf8')
.on('error', (e) => { console.error(e.message); })
.on('data', (data) => { results[jobId] += data; })
.on('end', () => { done(null); });
}).on('error', console.error);
});
}
multiGetter(urls, (err, contents) => {
if (err) {
console.error;
}
contents.forEach(result => {
console.log(result);
});
});
One way of doing this could be the following:
change the results variable into an object instead of an array: var results = {};
assign to jobId the value of url instead of current (you can get rid of the current variable)
Finally, in your callback at the bottom, change the iteration to:
urls.forEach(url => {
console.log(contents[url]);
});

How to properly assign payload to GET function using express.js

I am trying currently learning to build crawler using node + express +cheerio.
In the route I put this:
[index.js]
app.get('/api/crawler/android', crawlerController.android);
which calls into controller
[crawler-controller.js]
var androidCrawler = require('../crawlers/android')
module.exports.android = androidCrawler.androidget;
then I invoke the crawler (based on cheerio)
[crawler.js]
var request = require('request');
var cheerio = require('cheerio');
var androidget =request('https://www.developer-tech.com/categories/Android/', function (error, response, html){
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var result = {result:[]};
$('article').each(function (i, element) {
var Title = $(this).find("h2").text();
var Link = $(this).find("a").attr("href");
var Image = $(this).find("img").attr("src");
var payload = {
"Title":Title,
"Link":Link,
"Image":Image
};
result['result'].push(payload);
});
console.log("aaa", result);
console.log(typeof result);
return result;
}});
module.exports = {
getAndroid: function (androidget, res) {
res.send(JSON.stringify(result));
}
}
When I console log directly to crawler.js via terminal it return JSON object properly, but I think the way I export the function to be invoked by app.get is where I'm wrong and I can't figure it out.
Perhaps somebody could help me to properly invoke the crawler in my case?
There is no point of returning a result in a callback function, this will just do nothing.
What you can do is wrap your request in a function and call a callback that you create :
// file.js
const wrapFunction = (url, callback) => {
request(url, ((error, response, html) => {
// ...
callback(result);
})
}
and then use it :
// just an example
wrapFunction(yourUrl, (result) => {
// deal with your result
})
When you have that, you can export it and then use it in your middleware / controller :
// file.js
module.exports = wrapFunction;
// index.js
const wrapFunction = require('file.js'); // here is your function
app.get('/yourRoute', (req, res) => {
wrapFunction(yourUrl, (result) => {
res.send(JSON.stringify(result));
});
})
You can also use Promises :
const wrapFunction = (url) => {
return new Promise((resolve, reject) => {
request(url, ((error, response, html) => {
if (error) reject(error);
resolve(result);
});
});
};
And then :
wrapFunction(yourUrl).then(result => {
// deal with your result ...
}).catch(error => {
// deal with your error ...
});
Hope it helps,
Best regards

Resources