How to properly assign payload to GET function using express.js - node.js

I am trying currently learning to build crawler using node + express +cheerio.
In the route I put this:
[index.js]
app.get('/api/crawler/android', crawlerController.android);
which calls into controller
[crawler-controller.js]
var androidCrawler = require('../crawlers/android')
module.exports.android = androidCrawler.androidget;
then I invoke the crawler (based on cheerio)
[crawler.js]
var request = require('request');
var cheerio = require('cheerio');
var androidget =request('https://www.developer-tech.com/categories/Android/', function (error, response, html){
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var result = {result:[]};
$('article').each(function (i, element) {
var Title = $(this).find("h2").text();
var Link = $(this).find("a").attr("href");
var Image = $(this).find("img").attr("src");
var payload = {
"Title":Title,
"Link":Link,
"Image":Image
};
result['result'].push(payload);
});
console.log("aaa", result);
console.log(typeof result);
return result;
}});
module.exports = {
getAndroid: function (androidget, res) {
res.send(JSON.stringify(result));
}
}
When I console log directly to crawler.js via terminal it return JSON object properly, but I think the way I export the function to be invoked by app.get is where I'm wrong and I can't figure it out.
Perhaps somebody could help me to properly invoke the crawler in my case?

There is no point of returning a result in a callback function, this will just do nothing.
What you can do is wrap your request in a function and call a callback that you create :
// file.js
const wrapFunction = (url, callback) => {
request(url, ((error, response, html) => {
// ...
callback(result);
})
}
and then use it :
// just an example
wrapFunction(yourUrl, (result) => {
// deal with your result
})
When you have that, you can export it and then use it in your middleware / controller :
// file.js
module.exports = wrapFunction;
// index.js
const wrapFunction = require('file.js'); // here is your function
app.get('/yourRoute', (req, res) => {
wrapFunction(yourUrl, (result) => {
res.send(JSON.stringify(result));
});
})
You can also use Promises :
const wrapFunction = (url) => {
return new Promise((resolve, reject) => {
request(url, ((error, response, html) => {
if (error) reject(error);
resolve(result);
});
});
};
And then :
wrapFunction(yourUrl).then(result => {
// deal with your result ...
}).catch(error => {
// deal with your error ...
});
Hope it helps,
Best regards

Related

REST API integration to 3rd party

I'm trying to create REST API. My API should return a list of users taken from a 3rd party (after some manipulations) and return it.
Here is my code:
function getUsersFrom3rdParty(options) {
https.get(options, (resp) => {
let data ='';
// A chunk of data has been received.
resp.on('data', (chunk) => {
data += chunk;
});
// The whole response has been received. Print out the result.
resp.on('end', () => {
console.log(JSON.parse(data));
});
}).on("error", (err) => {
console.log("Error: " + err.message);
});
}
exports.getUsers = (req, res, next) => {
var data = getUsersFrom3rdParty();
//do the manilupations and return to the api
};
I don't get the data in getUsers function.
I'd suggest using something like axios - npmjs - for making asynchronous calls to a 3rd party API:
const axios = require('axios')
function getUsersFrom3rdParty(options) {
const processResponse = (response) => {
const processedResponse = ...
// do whatever you need to do, then return
return processedResponse
}
return axios.get('/example.com')
.then(processResponse)
}
// then, you can use `getUsersFrom3rdParty` as a promise
exports.getUsers = (req, res, next) => {
const handleResponse = (data) => {
res.json({ data }) // or whatever you need to do
}
const handleError = (err) => {
res.json({ error: 'Something went wrong!' }) // or whatever you need to do
}
getUsersFrom3rdParty(...)
.then(handleResponse)
.catch(handleError)
}
This way, you're waiting for your API call to finish before you render something and/or return a response.
You are not passing options variable when you are calling getUsersFrom3rdParty function
var data = getUsersFrom3rdParty(options);
You have to pass options to make it work and I suggest to use request module .It works better than https module.
Here is your code using request
const request = require("request");
function getUsersFrom3rdParty(options) {
request(options, (error, response, body) => {
if (!error && response.statusCode == 200) {
//Returned data
console.log(JSON.parse(body));
}
});
}
exports.getUsers = (req, res, next) => {
var data = getUsersFrom3rdParty(options);
};

return value not getting logged from module exports

I'm writing a code that uses a library(jsforce) to query on Salesforce and get the records.
Currently, to keep the code clean, I'm separating the index and rest calls file. Here is my code.
var jsforce = require('jsforce');
const uName = 'myId';
const pwd = 'myPwd';
const servKey = 'myKey';
var conn = new jsforce.Connection();
var login = conn.login(uName, pwd, servKey, function (err, res) {
if (err) { return false; }
return true;
});
module.exports = {
myCases: () => {
console.log(`I'm called`);
login.then(() => conn.query(`Select ID, Subject from Case where status='new'`, function (err, openCases) {
if (err) { return console.error(err); }
return openCases;
}));
}
}
and my index file is as below.
const restServices = require('./restServices');
var test = function () {
restServices.myCases((err, data, response) => {
console.log('err')
console.log(err)
console.log('data');
console.log(data);
console.log('response');
console.log(response);
});
}
test();
When I run it, my log prints only I'm called (from restServices.js). but none of the data from my index.js is printed.
also when I add a console.log(openCases), it prints exactly the required data.
please let me know on where am I going wrong in returning the data and how can I fix this.
Thanks
To mycase pass an callback
Example in service.js
Mycase(callback) =>{
// response, err from db then
Callback(err, response)
}
In index.js
Service.mycase((err, response) =>{
Console.log (err, response)
}

NODE JS - request npm - manipulate url body

I'm working on a server side (self) project with node js (for the first time), and i ran into some difficulties.
My goal is the following:
first part - Im using "/uploads/processData" URL in my server to get URL(s) from the user request.
Now i want to access the user request URL(s) and get their HTML(s) file(s), to do so i'm using the "request" npm package (code below).
second part - I want access the body that I get back from the request package (from the first part), so I'm using cheerio npm package to do so.
Now to my problem - lets say that i'm trying to get the body of the url:
https://www.amazon.com/NIKE-Mens-Lunarconverge-Running-Shoes/dp/B06VVFGZHL?pd_rd_wg=6humg&pd_rd_r=61904ea4-c78e-43b6-8b8d-6b5ee8417541&pd_rd_w=Tue7n&ref_=pd_gw_simh&pf_rd_r=VGMA24803GJEV6DY7458&pf_rd_p=a670abbe-a1ba-52d3-b360-3badcefeb448&th=1
From some reason that i cant understand (probably because of lack of knowledge at web development), I dont always get the same body that i see when I review the above page (link) using F12, with my first part code. Hence sometimes my cheerio extraction (the second part) works as i expect and sometime does not (because some element from the full/original HTML file are missing). At first I thought it might be cache thing, so I added a middleware to set "nocache" flag.
What am I missing here? Does the way I try to operate wrong? Is there any way to ensure i get the same full/original HTML everytime?
Here is my code so far -
nocache middleware
function nocache(req, res, next) {
res.header("Cache-Control", "private, no-cache, no-store, must-revalidate");
res.header("Expires", "-1");
res.header("Pragma", "no-cache");
next();
}
EDIT
uploadRoutes.post("/processGoogleSearchData", nocache, (req, res) => {
//Assuming getting in req.body the google result JSON as "googleSearchResult"
var itemsArr = [];
var linksArr = [];
var bodysArr = [];
itemsArr = req.body.googleSearchResult.items;
if (itemsArr.length === 0) {
//return appropriate message
return res.status(400).send({ message: "No data sent to server" });
}
var linksArr = itemsArr.map(item => item.link);
//Get the needed info from every link
linksArr.forEach(link => {
request(link, (err, response, body) => {
if (!err && response.statusCode === 200) {
var $ = cheerio.load(body);
var tr = $(".a-lineitem").children();
var priceTd = tr.find(".a-span12");
var priceSpan = priceTd.find("#priceblock_ourprice");
console.log(priceSpan.text());
//when trying to build array of bodys the extraction doesnt work at all
bodysArr.push(body);
}
});
});
res.send(bodysArr);
});
I changed my code to the above, and it seems like the data extraction works more often. Can anyone explain why the extraction still sometimes doesnt work?
I tried return bodysArr for debbug purposes but when i do that the extraction does not work at all and my path response is always an empty array, why is that?
The problem is that:
res.send(bodysArr);
is executed straight after the call to
linksArr.forEach(link => {
The callbacks
(err, response, body) => {
if (!err && response.statusCode === 200) {
var $ = cheerio.load(body);
var tr = $(".a-lineitem").children();
var priceTd = tr.find(".a-span12");
var priceSpan = priceTd.find("#priceblock_ourprice");
console.log(priceSpan.text());
//when trying to build array of bodys the extraction doesnt work at all
bodysArr.push(body);
}
won't be guaranteed to have fired yet. What you want is ensure that res.send(bodysArr) runs after all the requests have happened
There are a few ways to handle this, one is with the excellent async library.
Hopefully you can get the gist of it with this example.
var array = [1,2,3]
function asyncRequest(input, callback){
//Do your fetch request here and call callback when done
setTimeout(callback, 10); //using setTiemout as an example
}
async.each(array, asyncRequest, (err) => {
if(err){
throw err;
}
console.log("All Finished");
});
<script src="https://cdnjs.cloudflare.com/ajax/libs/async/2.6.1/async.min.js"></script>
After reviewing Sudsy explanation, I came across loops of asynchronous methods.
While playing with this subject I could not figure out whats wrong with my following code:
This works fine - so i ended up using it
async function getItemsInfo(itemsArr) {
return itemsArr.map(async item => {
try {
var body = await axios(item.link);
var $ = await cheerio.load(body.data);
var tr = await $(".a-lineitem").children();
var priceTd = await tr.find(".a-span12");
var priceSpan = await priceTd.find("#priceblock_ourprice");
return priceSpan.text();
} catch (err) {
return err.message;
}
});
}
getItemsInfo(linksArr)
.then(res => Promise.all(res))
.then(res => console.log(res))
.catch(err => console.error(err));
Can someone explain to me what's wrong with the following codes?
async function getItemsInfo(itemsArr) {
await Promise.all(
itemsArr.map(async item => {
try {
var body = await axios(item.link);
var $ = await cheerio.load(body.data);
var tr = await $(".a-lineitem").children();
var priceTd = await tr.find(".a-span12");
var priceSpan = await priceTd.find("#priceblock_ourprice");
return priceSpan.text();
} catch (err) {
throw err.message;
}
})
)
.then(resulst => {
return results;
})
.catch(err => {
throw err.message;
});
}
//the caller function
try {
getItemsInfo(linksArr).then(results => {
res.status(200).send(results);
});
} catch (err) {
res.status(400).send(err.message);
}
or
async function getItemsInfo(itemsArr) {
const promises = itemsArr.map(async item => {
try {
var body = await axios(item.link);
var $ = await cheerio.load(body.data);
var tr = await $(".a-lineitem").children();
var priceTd = await tr.find(".a-span12");
var priceSpan = await priceTd.find("#priceblock_ourprice");
return priceSpan.text();
} catch (err) {
return err.message;
}
});
var results = await Promise.all(promises)
.then(results => {
return results;
})
.catch(err => {
return err.message;
});
}
//the caller function
try {
getItemsInfo(linksArr).then(results => {
res.status(200).send(results);
});
} catch (err) {
res.status(400).send(err.message);
}

NodeJS Download HTML with Request

Having quite a bit of trouble getting an HTML page to download using NodeJS. Here is my code snippet:
const request = require('request');
request('http://www.google.com', { json: true }, (err, res, body) => {
if (err) {
return console.log(err);
}
console.log(body.url);
console.log(body.explanation);
});
When I step through this it executes in about half a second. I get no errors back but I'm not getting any content logged to the console...
This works for me.
const request = require('request')
request('https://google.com', (err, res, body) => console.log(err ? err : body))
With request you can pipe the response body of a request directly to a WriteableStream
const fs = require('fs')
const request = require('request')
request('https://google.com').pipe(fs.createWriteStream('./google-index.html'))
Per the comments below, the following example illustrates how to wrap this request so it can be awaited and printed to the screen or written to a file.
const {promisify} = require('util')
const fs = require('fs')
const writeFile = promisify(fs.writeFile)
const request = require('request')
const getGoogleIndexHTML = () => {
return new Promise((resolve, reject) => {
request('https://google.com', (err, res, body) => err ? reject(err) : resolve(body))
})
}
const printAndWriteGoogleIndex = async () => {
try {
let googleIndexHTML = await getGoogleIndexHTML()
console.log(googleIndexHTML)
await writeFile('./google-index.html', googleIndexHTML, 'utf8')
console.log('google-index.html written.')
} catch(err) {
console.log(err)
}
}
printAndWriteGoogleIndex()

rendering parties of other site with express

I have a basic Express application with one function that uses nodejs request and takes some divs using selectors. After that, I want to render this with jade.
var express = require('express');
var voc = require('vocabulaire');
var async = require('async');
var router = express.Router();
router.get('/', function (req, res) {
res.render('index', {title: 'Espace de la diffusion'});
});
var result;
router.get('/search/:mot', function (req, res) {
async.series([
function () {
result = main(['conj', req.params.mot]);
console.log('in 1');
},
function () {
res.render('index', {title: 'Espace de la diffusion', data: result});
res.send(html);
console.log('in 2');
},
]);
});
module.exports = router;
var request = require('request')
, cheerio = require('cheerio');
function doit(verbe, result) {
var url = 'http://www.babla.ru/%D1%81%D0%BF%D1%80%D1%8F%D0%B6%D0%B5%D0%BD%D0%B8%D1%8F/%D1%84%D1%80%D0%B0%D0%BD%D1%86%D1%83%D0%B7%D1%81%D0%BA%D0%B8%D0%B9/' + verbe;
request(url, function (err, resp, body) {
$ = cheerio.load(body);
var temps = $('.span4.result-left h5');
if (temps.length == 0) {
console.log('results not found');
}
else {
console.log('result found');
debugger;
return $('.span4.result-left');
}
});
}
function main(arg) {
switch (arg[0]) {
case 'conj':
return doit(arg[1]);
break;
default:
console.log('unknown parameter');
break;
}
}
I used async library for be sure that my result is ready to be rendered but in console I see next:
GET /search/est - - ms - -
in 1
result found
and debugger followed me to nodejs function makeTick()..
I don't know what to do.. help me please.
Your async.series() functions are missing the callback parameter that you need to call in order for the next function to execute. However, you don't really need async to just do a single async task:
main(['conj', req.params.mot], function(err, result) {
res.render('index', {title: 'Espace de la diffusion', err: err, data: result});
});
// ...
function doit(verbe, result, callback) {
var url = 'http://www.babla.ru/%D1%81%D0%BF%D1%80%D1%8F%D0%B6%D0%B5%D0%BD%D0%B8%D1%8F/%D1%84%D1%80%D0%B0%D0%BD%D1%86%D1%83%D0%B7%D1%81%D0%BA%D0%B8%D0%B9/' + verbe;
request(url, function (err, resp, body) {
if (err)
return callback && callback(err);
$ = cheerio.load(body);
var temps = $('.span4.result-left h5');
if (temps.length == 0) {
callback && callback();
}
else {
callback && callback(null, $('.span4.result-left'));
}
});
}
function main(arg, callback) {
switch (arg[0]) {
case 'conj':
doit(arg[1], callback);
break;
default:
callback && callback(new Error('unknown parameter'));
break;
}
}

Resources