Node.js https request stuck - node.js

I am a node.js beginner and I'm trying to check status of domains I read from a csv file. All works fine, however when the script reaches the end it simply hangs and does not quit. Below is the code I'm working with. What am i missing here? Any help would be greatly appreciated!
```
const fs = require('fs');
const request = require('request');
let https = require('https');
const input_path = 'Path_to_csv'
function FetchUrls (callback) {
fs.readFile(input_path, 'utf8', function (err, data) {
let dataArray = data.split(/\r?\n/);
console.log(`loaded ${dataArray.length} items`)
callback(dataArray)});
}
function getData (dataArray) {
let urls = dataArray
for (let url of urls) {
https.get(url, function(res) {
if(res.statusCode == 500) {
console.log("Domain Down")
} else {
console.log("Domain Up")
}
}).on('error', function(e) {
console.log(e)
process.exit()
});
}
}
FetchUrls(function(dataArray) {
getData(dataArray)
})
```

You need to add a return after the for loop. But I would wrap it in a promise like:
FetchUrls(input_path, function(dataArray) {
let promises = []
for (let domain of dataArray) {
promises.push(new Promise((resolve, reject) => {
// ... do the https.getting things, and then resolve() where you think it should end
}))
}
Promise.all(promises).then( Nothing => return Nothing)
.catch(error => return error)
}

Related

How do I store axios response so that I can use it in different functions

I have a nodejs project and I need to store the response data of this axios request
let px2resp;
async function sendPx2() {
try {
let data = await axios.post('https://collector-pxajdckzhd.px-cloud.net/api/v2/collector', qs.stringify(PX2data), {
headers: PX2headers
});
px2resp = data;
return px2resp;
} catch (e) {
return e;
}
}
the way that I'm doing it right now is
let testing = async () => {
var a = await sendPx2();
console.log(a)
}
testing();
But the issue with this is that it makes the request everytime I want to use the data which is no ideal. Is there anyway for me to store this response data and use it without having to make the request multiple times?
(Assuming you don't want to store data in a file)
This may be the approach you are looking for:
let px2resp;
let alreadyFetched = false;
let cachedData = null;
async function sendPx2() {
// check if data was fetch already (to avoid same request multiple times)
// if so, return previous fetched data
if (alreadyFetched) return cachedData;
// else fetch data
let data = await axios.post(
'https://collector-pxajdckzhd.px-cloud.net/api/v2/collector',
qs.stringify(PX2data),
{
headers: PX2headers,
}
).catch((e) => console.log(e)); // This is optional (use try/catch if you still want)
// set variables
alreadyFetched = true;
cachedData = data;
return data;
}
And you can still use your existing code as normal but this time it will not fetch the data everytime if it was already fetched before.
let testing = async () => {
var a = await sendPx2();
console.log(a)
}
testing();
You can store the data inside a JSON file... Using the writeFile function in Node JS :)
fs = require('fs');
fs.writeFile('helloworld.json', px2resp, function (err) {
if (err) return console.log(err);
console.log('Data Written');
});
Then read it from the file to use in the Functions.
fs = require('fs')
fs.readFile('helloworld.json', 'utf8', function (err,data) {
if (err) {
return console.log(err);
}
console.log(data);
});
Make sure to add utf-8, so it will not return Buffer Data.
You can store the data on a file
var fs = require("fs");
const util = require('util');
const readFile = util.promisify(fs.readFile);
const expiry = 10*60*1000 // 10 minutes
function cachedFunction(fn){
return async (...params) => {
let path = `${fn.name}:${params.join(":")}.json`
if (fs.existsSync(path)) {
let rawdata = await readFile(path, 'utf-8');
let response = JSON.parse(rawdata.toString());
console.log(response)
if(response.created_at + expiry > Date.now()){
console.log("getting from cache")
return rawdata
}
}
const data = await fn(...params)
if(data && typeof data === 'object'){
const stringifiedData = JSON.stringify({...data,created_at: Date.now()})
fs.writeFileSync(path, stringifiedData);
}
return data
}
}
async function hello(){
return {a:1,b:2}
}
async function test() {
console.log(await cachedFunction(hello)("x"))
}
test()

Nodejs request async problem for loop not work

I'm a beginner of nodejs, async bothers me.
I want my code run sequencely or it will breaks.
I have a for loop, and it simply doesn't work...
Here are all the codes:
const util = require('util');
const request = require('request');
const cheerio = require('cheerio');
var host = "http://www.nicotv.me";
var url = "http://www.nicotv.me/video/play/57838-1-%s.html";
var len = 99;
var tab = /-(\d)-/.exec(url);
tab = tab[1] // '1' not '-1-'
function getLen(url) {
//you can ignore this function, it gives len=2
request(url, function (err, response, html) {
if (err) {
console.log('url:', url);
console.log('error:', err);
console.log('statusCode:', response && response.statusCode);
}
else{
var $ = cheerio.load(html);
var cls = '.ff-playurl-dropdown-%s';
$(util.format(cls, tab)).filter(function (){
var data = $(this);
len = data.html().match(/<a href=/g).length;
console.log("episode:", len);
});
getLink(len, function(){
});
}
});
}
getLen(util.format(url, 1)); //len = 2
var getLink = function(lengths, callback){
for (let i = 1; i <= lengths; i++) {
var tmp = util.format(url, i);
try {
request(tmp, function (err, res, html){
console.log('url:', tmp);
if(err){
console.log("error:", err);
console.log("statusCode:", res && res.statusCode);
}else{
var reg = /src="(\/player.php?.{1,})"/;
var result = reg.exec(html);
console.log(result[1]);
}
});
callback();
} catch (error) {
console.log(error);
break;
}
}
}
here is my output:
episode: 2
url: http://www.nicotv.me/video/play/57838-1-2.html
/player.php?u=aHR0cDovL3R5angyLmtpbmdzbnVnLmNuLzM2MHl1bi0xNS5waHA/dmlkPTE1NzkxMzU2MzEyNDAwNTQ5&p=360biaofan&c=0&j=aHR0cDovL2ppZXhpLmtpbmdzbnVnLmNuLzM2MGJpYW9mYW4ucGhwP3VybD0=&x=10&y=&z=
url: http://www.nicotv.me/video/play/57838-1-2.html
/player.php?u=aHR0cDovL3R5angyLmtpbmdzbnVnLmNuLzM2MHl1bi0xNS5waHA/dmlkPTE1Nzg1MDQyMDYyNDAwNTgx&p=360biaofan&c=0&j=aHR0cDovL2ppZXhpLmtpbmdzbnVnLmNuLzM2MGJpYW9mYW4ucGhwP3VybD0=&x=10&y=&z=aHR0cDovL3R5angyLmtpbmdzbnVnLmNuLzM2MHl1bi0xNS5waHA/dmlkPTE1NzkxMzU2MzEyNDAwNTQ5
First problem is these two /player*** link are from 57838-1-1.html
And one of them are not complete.
Second problem is the url output shows 57838-1-2.html twice.
Thanks for your kindly help.
Yesterday had the same problem, so I solved with:
Using request-promise
Replace the loop method arrTitles.Each with for (const jt of arrTitles)
Here a sample:
const request = require('request-promise');
const cheerio = require('cheerio');
var getUrlData =
async function (url) {
console.log(url);
try {
return await request.get(url);
}
catch (err) {
console.error(`${err}: ${url}`);
}
return;
};
var run =
async function (pageUrl) {
var arrData =
await fn.getUrlData(pageUrl)
.then(response => readTable(response));
console.log(arrData);
};
var readTable =
function (document) {
var $;
let arrData = [];
try {
$ = cheerio.load(document);
$('table tr')
.each(
function (trN) {
$(this)
.children('td')
.each(
function (tdN) {
arrData.push($(this).text().trim());
}
)
});
}
catch { }
return arrData;
};
run();

Unable to make use of links to fetch different titles

I've created a script in node using promise in combination with request and cheerio to parse the links under Province column from this webpage then reuse those links to scrape all the urls under Office column from all of such pages and finally make use these links to collect the title from all of such target pages, as in Cairos main Post Office in this page.
My current script most of the times gets stuck. However, sometimes it throws this error UnhandledPromiseRejectionWarning: TypeError: Cannot read property 'parent' of undefined. I've checked each of the functions and found that they are all working in the right way individually.
Although the script looks a bit bigger, it is built upon a very simple logic which is make use of each links from it's landing page until it reaches the title of it's target page.
This is my try so far:
const request = require('request');
const cheerio = require('cheerio');
const link = 'https://www.egyptcodebase.com/en/p/all';
const base_link = 'https://www.egyptcodebase.com/en/';
const items = [];
const nitems = [];
let getLinks = () => {
return new Promise((resolve, reject) => {
request(link, function(error, response, html) {
let $ = cheerio.load(html);
if (error) return reject(error);
try {
$('.table tbody tr').each(function() {
items.push(base_link + $(this).find("a").attr("href"));
});
resolve(items);
} catch (e) {
reject(e);
}
});
});
};
let getData = (links) => {
const promises = links
.map(nurl => new Promise((resolve, reject) => {
request(nurl, function(error, response, html) {
let $ = cheerio.load(html);
if (error) return reject(error);
try {
$('.table tbody tr').each(function() {
nitems.push(base_link + $(this).find("a").attr("href"));
});
resolve(nitems);
} catch (e) {
reject(e);
}
})
}))
return Promise.all(promises)
}
let FetchData = (links) => {
const promises = links
.map(nurl => new Promise((resolve, reject) => {
request(nurl, function(error, response, html) {
let $ = cheerio.load(html);
if (error) return reject(error);
try {
resolve($(".home-title > h2").eq(0).text());
} catch (e) {
reject(e);
}
})
}))
return Promise.all(promises)
}
getLinks().then(resultList => {
getData(resultList).then(resultSet => {
FetchData(resultSet).then(title =>{
console.log(title);
})
})
})
How can I scrape the titles from target pages making use of all the links from landing pages?
It would be much easier to ask the website Owner about the data which you need.
He might understand your request and give it to you for free, instead of scraping his site.
P.S: I was surprised to find a question about how to scrape my own website.
P.S2: If you just need all post office titles I could have given it for you for free :D
P.S3: Your error is maybe happening because of some time the page doesn't have the element which you are trying to parse using cheerio.
So the issue is with 2D array. If you go through carefully over your getData function, you're returning 2D array.
map return an array and within that map you're resolving another array nitems.
Here's the working code:
const base_link = 'https://www.egyptcodebase.com/en/';
// helper wrapper DRY
const getHtmls = (url) => {
return new Promise((resolve, reject) => {
request({ uri: url, method: 'GET', followAllRedirects: true } , function(error, response, html) {
if (error) reject(error);
else resolve(html);
});
})
}
let getLinks = async () => {
const link = 'https://www.egyptcodebase.com/en/p/all';
const items = [];
try {
const html = await getHtmls(link);
let $ = cheerio.load(html);
$('.table tbody tr').each(function() {
items.push(base_link + $(this).find("a").attr("href"));
});
} catch (e) {
// handling error here so execution can continue for good eggs
console.error(e.message)
}
return items;
};
let getData = async (links) => {
const out = [];
try {
const promises = links.map(nurl => getHtmls(nurl));
const htmls = await Promise.all(promises);
htmls.forEach(html => {
let $ = cheerio.load(html);
$('.table tbody tr').each(function() {
out.push(base_link + $(this).find("a").attr("href"));
});
})
} catch (e) {
// handling error here so execution can continue for good eggs
console.error(e.message)
}
return out;
}
let FetchData = async (links) => {
const out = [];
try {
const promises = links.map(nurl => getHtmls(nurl));
const htmls = await Promise.all(promises)
htmls.forEach(html => {
try {
let $ = cheerio.load(html);
out.push($(".home-title > h2").eq(0).text());
} catch (e){
// handling error here so execution can continue for good eggs
console.error(e.message)
}
})
} catch (e) {
// handling error here so execution can continue for good eggs
console.error(e.message)
}
return out;
}
getLinks().then(resultList => {
getData(resultList).then(resultSet => {
FetchData(resultSet).then(title =>{
console.log(title);
})
})
})
Note: Instead of writing your own Promise wrapper, you could use request-promise package
Issue with your code is in FetchData function, as in that function you are passing links and then using map over it.
But if you look inside that map function and check the value of 'nurl' variable it will be an array of links and its data type would be object.
According to the semantics of request function, its first param should be string, so if you iterate over the 'nurl' variable to get the values, then it would work.
My code snippet for one url from array

Unable to get callback from function

Trying to get the variable random_song from the function functions.random_song
Function:
functions.random_song = () => {
fs.readFile('auto_playlist.txt', 'utf8', function(err, data) {
if (err) {
console.log(`${`ERR`.red} || ${`Error fetching song from auto playlist ${err}`.red}`);
}
let songs = data.split('\n');
songs.splice(-1, 1);
var random = Math.floor(Math.random()*songs.length);
let random_song = (songs[random]);
return random_song;
})
}
Attempting to callback the random song
functions.random_song(song => {
console.log(song)
})
The code just return undefined Ideas?
Your function:
functions.random_song = (callback) => {
fs.readFile('auto_playlist.txt', 'utf8', function(err, data) {
if (err) {
// Do stuff
return callback(err);
}
// Do stuff
callback(null, random_song);
})
}
And then call it:
functions.random_song((err, song) => {
// Do stuff
});
You may want to read more about using Promise/async-await instead of callback.
functions.random_song = () => {
let data = fs.readFileSync('auto_playlist.txt', 'utf8');
let songs = data.split('\n');
songs.splice(-1, 1);
var random = Math.floor(Math.random()*songs.length);
let rand_song = (songs[random]);
return rand_song;
}
console.log(functions.random_song())
got it working, thanks for the help <3
Following is the code which use Promise to handle file read and data processing task asynchronously
const fs = require('fs')
const functions = {}
functions.random_song = () => {
return new Promise((resolve, reject) => {
fs.readFile('auto_playlist.txt', 'utf8', function(err, data) {
if (err) {
console.log(err)
reject(err)
}
let songs = data.split('\n');
songs.splice(-1, 1);
var random = Math.floor(Math.random()*songs.length);
let random_song = (songs[random]);
resolve(random_song);
})
})
}
functions.random_song()
.then(song => console.log('Song Name', song))
.catch(err => console.error('Error fetching songs', err))
console.log('It will be executed before promise is resolved')
fs.readFile is asynchronus and your function will return before the file is read. Use fs.readFileSync instead. Like this:
functions.random_song = () => {
const data = fs.readFileSync('auto_playlist.txt', 'utf8');
let songs = data.split('\n');
songs.splice(-1, 1);
var random = Math.floor(Math.random()*songs.length);
let random_song = (songs[random]);
return random_song;
}

nodejs: check until URL get's online

I need a "forever-watchdog" for checking if an URL is reachable or not. My case is that I have to see if a media stream gets online, and if, I would need to run a shell-script. If it's not reachable (404'd), I would keep trying again in x seconds.
Could anyone please guide me into the right directions, in terms on modules, or flowing? I had problems with resolving the destination with the
isReachable = require('is-reachable');
module.
Thanks for helping!
Try this, should resolve you task
'use strict';
const co = require('co');
const request = require('request');
const exec = require('child_process').exec;
let getRemoteSourceStatusPromise = function(urlForCheck){
return new Promise(function (resolve, reject) {
request(urlForCheck, function(error, response){
console.log('send request...');
if(error)
return reject(error);
let result = false;
console.log('Resource response statusCode ' + response.statusCode);
if(200 === response.statusCode){
result = true;
}
return resolve(result);
});
});
};
let callShScriptPromise = function(){
let shScriptPath = './script.sh';
return new Promise(function (resolve, reject) {
console.log('Calling sh...');
exec(shScriptPath,
function (error) {
if (error)
return reject(error);
resolve(true);
});
});
};
let sleep = function (sleepInterval) {
return new Promise(function (resolve) {
setTimeout(function () {
console.log('sleep...');
resolve();
}, sleepInterval);
});
};
let loopGenerator = function* (urlForCheck, sleepInterval){
let bShScriptStarted = false;
while(true){
let bSourceStatus = yield getRemoteSourceStatusPromise(urlForCheck);
if(bSourceStatus === true){
if(!bShScriptStarted)
bShScriptStarted = yield callShScriptPromise();
}else{
bShScriptStarted = false;
}
yield sleep(sleepInterval);
}
};
//Starting loop
co(loopGenerator('http://google.com', 30000))
.catch(function (error) {
console.log(error);
})
const rp = require('request-promise');
const checkIfOnline = (() -> {
return new Promise((resolve, reject)=>{
rp( url ).then( response => {
// this is where you run your script
resolve();
}).catch(error=>{
setTimeout( resolve(checkIfOnline()), 5000 );
});
});
})();
You can send requests to the URL repeatedly. Something like this:
//I don't know how you create nodejs server, I just leave out that part.
//I use request module. sudo npm install --save request
var request = require('request');
var URL = 'https://www.google.com';
var X = 5000; //5 seconds
var sendRequest = function(){
request(URL, function(error, response, body){
if(response.statusCode === 200){//If good
runScript(); //This is the function you write to run sh
}
setTimetout(sendRequest, X);//run the function again no matter what
});
}
sendRequest();
If you want a better forever loop, I suggest you use promise.
var rp = require('request-promise');
var options = {
uri : 'https://www.google.com',
json : true
};
var X = 5000; //5 seconds, the actual time is 5 seconds plus the time to resolve
var sendRequest_promise = function(){
//return a request promise
return rp(options).then(function(response){
return response;
});
};
var sendRequest = function(){
sendRequest_promise().then(function(response){
if(response.statusCode === 200){
runScript();
}
setTimeout(sendRequest(), X);
});
};

Resources