Sort scraped Data into table on server page - node.js

Hi Im working on a scraper script so far Ive been able scrape from 2 elements . At this testing state I do not have a database setup thus far. So I thought Id just Sort this straight to my server page. This is my working code
var http = require('http');
var request = require('request');
var cheerio = require('cheerio');
http.createServer(function (req, res) {
request('http://www.xscores.com/soccer', function (error, response,
html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var HomeTeam = "";
var AwayTeam = "";
$('div.score_home_txt.score_cell.wrap').each(function (i, element) {
var a = $(this).text();
var a = a.toLowerCase();
HomeTeam += "<tr><td>" + a + "</td>";
//console.log(a);
});
$('div.score_away_txt.score_cell.wrap').each(function (i, element) {
var b = $(this).text();
var b = b.toLowerCase();
AwayTeam += "<td>" + b + "</td><tr>";
//console.log(b);
});
var html = "<table><th>" + "HomeTeam</th><th>AwayTeam</th>" + HomeTeam + AwayTeam + "</table>"
res.writeHead(200, {
'Content-Type': 'text/html'
});
res.end(html);
}
});
}).listen(8080);
console.log('Server is running at http://178.62.253.206:8080/');
The plan was to sort this in a table with 2 Columns Home in Col A and Away in ColB, But im a little unsure how to write this so it gets sorted correctly.
The code above sort this into a single row. Ive tried a few different approaches but haven figured out the correct way yet :/
Any help would be much appreciated
Frederik

You need to find a common parent, looking at the website you are scraping .score_line looks like a reasonable option
// assume we're always going to return html
res.set('Content-Type', 'text/html');
// hit API
request('http://www.xscores.com/soccer', (err, response, html) => {
if (err || response.statusCode !== 200) {
// log error internally
console.error(err ? err.message : `API status code: ${response.statusCode}`);
// return client response
return res.status(500).send('<b>Internal Server Error</b>');
}
const $ = cheerio.load(html);
const rows = [];
// find each row
$('.score_line').each((i, el) => {
// extract each column
const homeScore = el.find('.score_home.score_cell.wrap').text().toLowerCase();
const awayScore = el.find('.score_away.score_cell.wrap').text().toLowerCase();
// build row
rows.push(`<tr><td>${homeScore}</td><td>${awayScore}</td></tr>`);
});
// build & send table
res.send(`<table>${rows.join('')}</table>`);
});

Related

Multipurpose variables in Node.js

I run game servers and wrote a Node.js bot to send/receive chat messages from/to Discord to enable communication between them.
Now I want to integrate it's functionality to receive the latest logs from the game to Discord.
I have managed to make it work for a single player but now I want to make it work for several players.
I have three variables: channelID, server, playerID.
So, my question is what's the best method to have multiple arrays of ChannelID which will contain multiple server values and playerID values to be processed by a single function?
My current working code:
var channelID = "channel1";
var server = "server1";
var playerID = "player1";
//Last log message is stored in Webdis
var webdisURL = "http://127.0.0.1:7379/GET/" + server + playerID;
setInterval(() => lastLog(webdisURL, function(body) {
Object.keys(body).forEach(e => result = `${body[e]}`);
client.channels.cache.get(channelID).send(result);
}), 3000);
function lastLog(webdisURL, callback) {
request({
url: webdisURL,
json: true
}, function(error, response, body) {
if (!error && response.statusCode === 200) {
callback(body);
}
});
}
So I have managed to do it like this:
var webdisURL = "http://127.0.0.1:7379/GET/";
class Players {
constructor(players, channelID) {
players = players.split(',');
players.forEach(getPlayers);
function getPlayers(value) {
var playerName = value.split(":")[0];
var playerSRV = value.split(":")[1];
var playerID = value.split(":")[2];
var getLogURL = webdisURL + playerSRV + playerID;
var result;
var previousResult;
setInterval(() => lastLog(getLogURL, function (body) {
Object.keys(body).forEach(e => result = `${body[e]}`);
if (result !== previousResult) {
client.channels.cache.get(channelID).send("[" + playerName + "] - [" + playerSRV + "]: " + result);
previousResult = result;
}
}), 3000);
function lastLog(getLogURL, callback) {
request({
url: getLogURL,
json: true
}, function (error, response, body) {
if (!error && response.statusCode === 200) {
callback(body);
}
});
}
}
}
}
// "playerName1:playerSRV1:playerID1,playerName2:playerSRV2:playerID2", "channelID"
let playerTeam1 = new Players("Perseus:myServer1:ID1", "channel1");
let playerTeam2 = new Players("Another Player:myServer2:ID1,Another Player2:myServer3:ID1", "channel2");
Example Output:
[Perseus] - [myServer1]: Day 153, 11:59:43: Your 'Campfire' was auto-decay destroyed!
[Another Player] - [myServer2]: Day 153, 11:59:43: Your 'Campfire' was auto-decay destroyed!
[Another Player2] - [myServer3]: Day 153, 11:59:43: Your 'Campfire' was auto-decay destroyed!
Does it look OK? I'll appreciate any suggestions for improvement.

Get express/node to loop through request sent to NOAA API

So I am making a kind of API middleware for my company that will grab information from the NOAA API and then store in in my database. It does more then but that a separate part. I have set it up so that it works it will get the information and store it in my sql database perfectly The issue is the information I get is based off of zipcode. One request is the information for one zipcode. I need to be able to 'loop" through a list of zipcode one at a time and store the information in the database. I am not sure how to properly get it to work. I have tested a couple of ways but have not been able to get it to work so if someone can get me pointed in the right direction it would be appreciated.
Sorry in advance my code is not cleaned up.
Everything below apiRequest.end() has little function for the question. I keep it for context.
let mysql = require('mysql');
let config = require('./config.js');
var https = require("https");
var express = require("express");
var app = express();
const port = 3000;
var fs= require('fs');
var csv = require('fast-csv');
//last test
//array will replace this zip variable
let zip = '90012';
api(zip);
function api(zips){
//All of the parts for building the get requests url
app.get("/", function(req, response) {
var apiKey = "gPaEVizejLlbRVbXexyWtXYkfkWkoBhd";
let webapi = 'https://www.ncdc.noaa.gov/cdo-web/api/v2/data?';
let datasetid="datasetid=GHCND";
let datatypeid="&datatypeid=TMAX";
let location="&locationid=ZIP:";
const zipcode = zips;
let startdate="&startdate=2019-01-01";
let enddate="&enddate=2020-01-01";
let units = "&units=standard";
let limit="&limit=1000";
let url = webapi + datasetid + datatypeid + location + zipcode + startdate + enddate + units + limit;
var options = {
port: 443,
method: "GET",
headers: {
"token": apiKey
}
};
let data = "";
//request to grab from NOAA api
let apiRequest = https.request(url, options, function(res) {
console.log("Connected");
//grabing all data
res.on("data", chunk => {
data += chunk;
});
res.on("end", () => {
console.log("data collected");
//Format JSON data
response.send(JSON.parse(data));
var getData = JSON.parse(data);
if(isEmpty(getData)){
emptyCorrect();
}
dataFormat(getData);
});
});
apiRequest.end();
});
//fix format for date Can add more formating if needed here
function dataFormat(formData){
for(x in formData.results){
let date = formData.results[x].date;
formData.results[x].date = date.slice(0,10);
}
jsonToSQL(formData.results);
}
//test function is going to be used for inserting the zip
function test(){
var content = "";
console.log("your test worked see ***************");
return "92507";
}
//function to add grabed JSON data into the SQL database
function jsonToSQL(datafin){
var zipcode = zips;
let connection = mysql.createConnection(config);
// insert statment
let stmt = `INSERT INTO test1(ZIPCODE,DATE, TEMP) VALUES ? `;
let values = [];
for(let x in datafin){
values.push([zipcode,datafin[x].date,datafin[x].value]);
}
// execute the insert statment
connection.query(stmt, [values], (err, results, fields) => {
if (err) {
return console.error("error");
}
// get inserted rows
console.log('Row inserted:' + results.affectedRows);
});
// close the database connection
connection.end();
}
function emptyCorrect(){
console.log("Eror correction");
var zipcode = zips;
let connection = mysql.createConnection(config);
// insert statment
let stmt = `INSERT INTO test1(ZIPCODE,DATE, TEMP) VALUES ? `;
let valueE = [];
valueE.push([zipcode,"0","No Data"]);
// execute the insert statment
connection.query(stmt, [valueE], (err, results, fields) => {
if (err) {
return console.error("error");
}
// get inserted rows
console.log('Row inserted:' + results.affectedRows);
});
// close the database connection
connection.end();
}
function isEmpty(obj) {
for(var key in obj) {
if(obj.hasOwnProperty(key))
return false;
}
return true;
}
app.listen(port, () => console.log(`Example app listening on port ${port}!`))
}
As I understand your problem can roughly be summarized as "How to loop through asynchronous evaluations in Nodejs".
There are some options for you. I would recommend wrapping call to the NOAA API with a promise and then chain those promises. This can be done as follows:
app.get('/', async function(req, response) {
var apiKey = 'some value';
let webapi = 'https://www.ncdc.noaa.gov/cdo-web/api/v2/data?';
let datasetid = 'datasetid=GHCND';
let datatypeid = '&datatypeid=TMAX';
let location = '&locationid=ZIP:';
let startdate = '&startdate=2019-01-01';
let enddate = '&enddate=2020-01-01';
let units = '&units=standard';
let limit = '&limit=1000';
var options = {
port: 443,
method: 'GET',
headers: {
token: apiKey
}
};
const zipCodes = ['90012', '90013']; // Place a call to your function for fetching zip codes here
let datas = [];
prom = Promise.resolve();
zipCodes.forEach(zipcode => {
prom = prom.then(() =>
new Promise((resolve, reject) => {
let url =
webapi +
datasetid +
datatypeid +
location +
zipcode +
startdate +
enddate +
units +
limit;
let apiRequest = https.request(url, options, function(res) {
console.log('Connected');
let data = '';
res.on('data', chunk => {
data += chunk;
});
res.on('end', () => {
console.log('data collected for zip ' + zipcode);
datas.push(data);
resolve();
});
});
apiRequest.end();
})
);
});
prom.then(() => {
// All requests have now been handled sequentially
response.send(/* You'll need to figure out what to do here */);
});
});
An alternative is to use something like the async library for dealing with sequentially calling callbacks. The async library (https://github.com/caolan/async) describes itself as:
Async is a utility module which provides straight-forward, powerful functions for working with asynchronous JavaScript.
See e.g. Node.js: How do you handle callbacks in a loop? for a similar problem (not with regards to callign an API, but dealing with asynchronous function in a loop).

Trouble making my script print None in case there is no result to display

I've created a script in node using request and cheerio to fetch the title of different posts and their concerning links from a webpage. My script can fetch them in the right way.
The thing is the variables defined within my script item and item_link may not always have the desired results and in that cases the script will throw any error.
How can I implement try/except clause or something similar within my script so that the variables item and item_link will store None or "" (for some posts) in case there is no result to display?
I've tried so far (working errorlessly as the titles and links are always present):
var request = require('request');
var cheerio = require('cheerio');
const url = 'https://stackoverflow.com/questions/tagged/web-scraping';
request(url, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('.summary').each(function(){
var item = $(this).find('.question-hyperlink').text();
var item_link = $(this).find('.question-hyperlink').attr("href");
console.log({
item,
item_link
});
});
}
});
If I try like the following (used a wrong selector in item_link):
request(url, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('.summary').each(function(){
try{var item = $(this).find('.question-hyperlink').text();}catch(err){item = "";}
try{var item_link = $(this).find('.question-hyperlin').attr("href");}catch(err){item_link = "";}
console.log({
item,
item_link
});
});
}
});
The output I expected "" but I got undefined in item_link:
{ item: 'Trouble making my script print None in case there is no result to display',
item_link: undefined }
Try it like this:
$('.summary').each((i, summary) => {
let el = $(summary).find('.question-hyperlink')[0]
console.log({
item: el ? $(el).text() : 'None',
item_link: el ? $(el).attr('href') : 'None'
});
})
You want to avoid try/catch for things like this.
Try this :
$('.summary').each(function(){
var item = ""; var item_link="";
try{item = $(this).find('.question-hyperlink').text();}catch(err){item = "";}
try{item_link = $(this).find('.question-hyperlin').attr("href");}catch(err){item_link = "";}
console.log({
item : item || "",
item_link: item_link || ""
});
});

scraping a page that redirects

i try to scrape a simple page (require cheerio and request):
https://www.ishares.com/uk/individual/en/products/251824/
The code fails. I believe it is because, in order to get to the above, users are prompted on previous page for "individual" or "institutional" so are being redirected.
I have tried different variations of the url, but all fail.
how can i get the raw HTML using node.js ?
here is the code:
var express = require('express');
var path = require('path');
var request = require('request');
var cheerio = require('cheerio'); // fast flexible implement of jQuery for server.
var fs = require('fs');
var app = express();
var port = 8000;
var timeLog = []; // for dl to measure the time of events.
// var startTime = Date.now();
timeLog[0] = Date.now();
console.log('program initiated at time: '+new Date());
// example 1: pull the webpage and print to console
var url ="https://www.ishares.com/uk/individual/en/products/251824/ishares-jp-morgan-emerging-markets-bond-ucits-etf";
url = "https://www.ishares.com/uk/individual/en/products/251824/";
url="https://www.ishares.com/uk/individual/en/products/251824/ishares-jp-morgan-emerging-markets-bond-ucits-etf?siteEntryPassthrough=true&locale=en_GB&userType=individual";
request(url,function functionName(err,resp,body) {
var $ = cheerio.load(body);
var distYield = $('.col-distYield');
var distYieldText = distYield.text();
console.log('we got to line 24');
console.log(distYieldText);
timeLog[2] = Date.now();
console.log('data capture time: '+(timeLog[2] - timeLog[0])/1000+' seconds');
if (err) {
console.log(err);
}else {
//console.log(body);
console.log('the body was written: success');
}
});
// example 2: download webpage and save file
var destination = fs.createWriteStream('./downloads/iSharesSEMB.html');
request(url)
.pipe(destination);
// example 3:
var destination = fs.createWriteStream('./downloads/iSharesSEMB2.html');
request(url)
.pipe(destination)
.on("finish",function () {
console.log('done');
})
.on('error',function (err) {
console.log(err);
});
timeLog[1] = Date.now();
console.log('program completed at time: '+new Date());
console.log('Asynchronous program run time: '+(timeLog[1] - timeLog[0])/1000+' seconds');
Alright, I got it to work. I enabled cookie support for request but then got into a redirect loop. Adding a promise worked it out. Here's only the relevant HTML request part:
const request = require('request'),
cheerio = require('cheerio');
const url = "https://www.ishares.com/uk/individual/en/products/251824/ishares-jp-morgan-emerging-markets-bond-ucits-etf?siteEntryPassthrough=true&locale=en_GB&userType=individual";
options = {
jar: true
}
const getDistYield = url => {
return new Promise((resolve, reject) => {
request(url, options, function(err,resp,body) {
if (err) reject(err);
let $ = cheerio.load(body);
resolve($('.col-distYield'));
})
})
}
getDistYield(url)
.then((tag) => {
console.log(tag.text())
}).catch((e) => {
console.error(e)
})
Outputs:
Distribution Yield
The distribution yield represents the ratio of distributed income over the last 12 months to the fund’s current NAV.
as of 20-Feb-2018
4.82
Also, notice I've used the last URL you provided.
I hope this works it out for you :)
have amended the resolve part to just get the value (and not the text) which is a nested class.
resolve($('.col-distYield > span:nth-child(2)'));

I am unable to convert http.get image into base 64

app.getImage = function() {
var image = Meteor.http.get("https://turtlerock-discourse.global.ssl.fastly.net/user_avatar/talk.turtlerockstudios.com/takran/45/879.png", {
});
var prefix = "data:image/png;base64,";
var imagebase64 = new Buffer(image.content, 'binary').toString('base64');
imagebase64 = prefix + imagebase64;
console.log(imagebase64);
return imagebase64;
}
but I am not seeing results,
any help?
This is a dummy text for the error.
a pure Meteor solutions:
var response = HTTP.call('GET', url,{npmRequestOptions: { encoding: null }})
var data = "data:" + response.headers["content-type"] + ";base64," + new Buffer(response.content).toString('base64');
This is how I fixed this issue.
app.getImage = function(){
var myrequest = request.defaults({ encoding: null });
var fut = new Future();
var options = {
"url" : "https://any.domain.com/any.png"
}
myrequest.get(options, function (error, response, body) {
if (!error && response.statusCode == 200) {
var data = "data:" + response.headers["content-type"] + ";base64," + new Buffer(body).toString('base64');
fut.return(data);
}
else
fut.return(null);
});
return fut.wait();
}
I was assuming this solution should have come with meteor code itself,
but it doesn't,
I had to use nodejs way to do it.
I will still be waiting for someone to post an answer based on pure meteor way.

Resources