Get data from webpage with cheerio - node.js

I am trying to get the API response time from a website with cheerio. I have to wait first though for the site to fetch the time, though I am not sure how exactly to do that. Here is what I have tried. At the moment it does not work because it doesnt wait for the website to fetch the time.
request.get("site", function (err, res, body) {
if (!err) {
var $ = cheerio.load(body);
$('.metrics-container').filter(function(){
var data = $(this);
var response_time_api = data.children().children().children()[1].children;
console.log(response_time_api)
});
}
});
here is the image from the source am fetching

Related

Https request in Node.js

I'm using the request library in Node.js to do a https request to get data from another service. This is called asynchronously, right? So my code keeps running before all of the data is there, correct?
My problem is that the data is needed right afterwards to calculate some things. My code throws an error during that calculation because the data from the service is undefined...
Could it be possible that the data is just not there yet? And if so, what do you do against that?
Here is a copy of the request:
const request = require('request');
request(someUrl, {"Accept": "application/json"}, (err, res, body) => {
if (err)
handleError(err);
body = JSON.parse(body);
return body;
});
This kind of situation is pretty common in react/angular/vue kinda web apps, sometimes you need the data right away. But it is not available then, after a Rest call or something it becomes available.
So, the simplest solution?
Just add a check, for example:
const calculate = (someVal)=>{
if(!someVal) return ;
//otherwise do the calculation
}
There are plenty of other ways, by mostly making the calculation async. For your function, you can do this
const promOp = function(){
return new Promise((resolve, reject) => {
request(someUrl, {"Accept": "application/json"}, (err, res, body) => {
if (err) reject(err);
body = JSON.parse(body);
resolve(body);
});
}
}
//then
promOp()
.then((body)=>{
//calculate here
})
//or can use the `Async/Await` syntax instead of then
const op = async () => {
const body = await promOp;
//calculate here
}

effective way of sending the body as a callback

In my app.js
var employees = require('../models/employees');
employees.read(req.params.id, function(body) {
console.log(body.firstName);
});
in my models/employees
var request = require('request');
var employees = {
read: function(id, callback) {
request
.get('http://api.mysite.com/employees/' + id, function(error, response, body) {
body = JSON.parse(body);
return callback(body);
})
},
};
module.exports = employees;
this works. (returns the employee name correctly) but I´m not sure if this is the correct (async) way of getting data from an api and displaying it.
thank you!
Node.js by default is asynchronous so you don't have to 'make' it work in an async manner.
For future use though, once you have more requests, there may be times where you have to wait for certain request to finish before you can fire the next one off, i.e. run tasks synchronously. In that case you'll have to use something like http://caolan.github.io/async/ and queue function calls in a waterfall/series model.

Node.js: Request + Cheerio, website is unresponsive

I've been trying to scrape this url unsuccessfully and retrieve the "Date of Organization in Massachusetts." I suspect I might just be mislabeling the DOM, but have already tried a series of ids and classes. Any suggestions - I'm using cheerio and request.
var url = 'http://corp.sec.state.ma.us/CorpWeb/CorpSearch/CorpSummary.aspx?FEIN=800829800&SEARCH_TYPE=1';
request(url, function(err, resp, body) {
if (err)
throw err;
$ = cheerio.load(body);
var orgdate = $('#MainContent_tblOrg .p1 td #MainContent_lblOrganisationDate').text();
console.log(orgdate);
});

Using request within routes node / express

I'm playing around with using nodejs as a custom front end for drupal and i'm trying to come up with a way to match the backend menu system, blocks and views with the routing in express.
example route
module.exports = {
'/work': function(req, res){
//get view json for this page
request('http://site.api/casestudies', function(err, response, body){
views_body = JSON.parse(body);
//get node id from alias
request('http://site.api/alias-to-nid' + req.url, function(err, response, body){
body = JSON.parse(body);
var reqUrl = 'http://site.api/rest/api/' + body.path;
request(reqUrl, function(err, response, body){
body = JSON.parse(body);
//get the data we need
var node_title = body.title,
node_body = body.body.und[0].safe_value,
pageclass = 'not-front section-work';
res.render('work', {title: node_title, class:pageclass, node_title:node_title, node_body:node_body, views_body:views_body});
});
});
});
}
}
So, i hit /work and grab the json for the casestudies view that should exist on that page, then i lookup the node id from the /work alias using another request and finally use the node id in yet another nested request call to grab the rest of the json for the page before finally sending it on the the template.
Now - I have a feeling that this is a terrible way to go about this. What should I be doing instead!?

nodejs & mongodb refresh bug?

I am new to nodejs but I did get something to work last night with mongodb on a IIS server with iisnode. :)
But I am wondering over one thing which seems to be a refresh bug or something.
When I go to "http://localhost/mongo.js" in my browser the results will just be "[]" the first time. If I hit refresh the results will be what I expected (an json array with persons). Dosen't this seems wrong?
Let's say now I do it with a query, "http://localhost/mongo.js?name=Daniel", and get all Persons with the name Daniel. The response the first time will be "all the persons" because that's what we asked for above, and when I hit refresh the results will be all the Persons named Daniel. Why is this happening?
It seems that the server cached the query's i've made, and I don't want to hit refresh everytime to get the correct results.
This is my code I am using: (also available here http://pastebin.com/PnVfrQmh)
/* GLOBALS
----------------------------------------------------------------------*/
var rdata = [];
/* SERVER SETTINGS
----------------------------------------------------------------------*/
//load http module to ceate an http server.
var http = require('http');
var url = require('url');
//configure to respond http server with message
http.createServer(function (request, response) {
//request name parameter
var url_parts = url.parse(request.url, true);
var query = url_parts.query;
//do the mongo
var mongo = require('mongodb');
var db = new mongo.Db('nodedb', new mongo.Server('localhost', 27017, {}), {});
db.open(function() {
db.collection('Persons', function(err, collection) {
var cursor = collection.find(query);
cursor.each(function(err, doc) {
if(doc) {
rdata.push(doc);
}
});
});
});
//write what type of response
response.writeHead(200, {'Content-Type': 'application/json;charset=utf-8'});
//return data json array
response.end(JSON.stringify(rdata));
//clear rdata
rdata = [];
}).listen(process.env.PORT);
You have to remember that you are always in an async world with node.js. This caught me out as well coming from a more sync background.
What is happening here is your response is returning before the logic is run. This is because when you call db.open(function() { this goes on the event loop and returns. This is non-blocking so the next line of code that runs is response.writeHead(200, {'Content-Type': 'application/json;charset=utf-8'});. Then the callback for db.open is called, sometime in the future.
In order to correct this. return the response after the cursor.each(function(err, doc) { loop is finished.
Hope this helps.

Resources