Wait for something in node.js - node.js

I'm trying to make a webscraper, but I can't get my function to wait for the second request to fill the name key on my object. It always return undefined.
const request = require('request');
const cheerio = require('cheerio');
const base_url = 'https://www.supremenewyork.com';
const shop_url = 'https://www.supremenewyork.com/shop/';
function getItems(category) {
var items = [];
return new Promise(function(resolve, reject) {
request.get(shop_url + category, function(err, res, body) {
if(err) {
reject(err);
} else {
var $ = cheerio.load(body);
$('a', '.inner-article').each(function(i, el) {
var url = base_url + $(this).attr('href');
var isSoldout = false;
var name;
if($(this).find('div').attr('class', 'sold_out_tag').length === 1)
isSoldout = true;
request.get(url, function(err, res, html) {
var $ = cheerio.load(html);
name = $('h1', 'div').text();
})
items.push({name: name, url: url, isSoldout: isSoldout});
})
resolve(items);
}
})
})
}
I expect the name key to be fill but no, i get undefined

Use the request-promise package which wraps request in Promise. Then you can use async/await to wait for result like:
const rp = require('request-promise');
const cheerio = require('cheerio');
const base_url = 'https://www.supremenewyork.com';
const shop_url = 'https://www.supremenewyork.com/shop/';
// notice async keyword
async function getItems(category) {
var items = [];
try {
// using await to wait for promise to resolve
const body = await rp.get(shop_url + category)
var $ = cheerio.load(body);
$('a', '.inner-article').each(function(i, el) {
var url = base_url + $(this).attr('href');
var isSoldout = false;
var name;
if($(this).find('div').attr('class', 'sold_out_tag').length === 1)
isSoldout = true;
try {
const html = await rp.get(url)
var $ = cheerio.load(html);
name = $('h1', 'div').text();
items.push({name: name, url: url, isSoldout: isSoldout});
} catch (err) {
throw err;
}
})
} catch (e) {
throw e;
}
return items;
}
More about async/await at MDN

Related

How do I await a nodejs request-promise object

I'm trying to return the results of an http request from a function and thought request-promise was supposed to return after an await. I obviously don't have this right. I get "undefined" returned from the function.
var s = getUrl();
console.log('result: ' + s)
function getUrl() {
var rp = require('request-promise');
var url = 'http://myserver.com?param=xxx';
rp(url)
.then(function (data) {
return data;
})
.catch(function (err) {
console.log(err);
});
}
How do I await the call so getUrl returns the data?
EDIT: After Kevin's comment. I tried to put this into a module and call it but it's returning [object Promise].
function getUrl(url, params) {
var rp = require('request-promise');
return rp(url + '?' + params)
.then(function (data) {
return data;
})
.catch(function (err) {
console.log(err);
});
}
async function getUpdate(o) {
var url = 'http://myserver.com';
var params = 'param=xxx';
var s = await getUrl(url, params);
return s;
}
exports.askVelo = (o) => {
var sRtn = '';
console.log(o.code);
switch (o.code) {
case 'g':
sRtn = getUpdate(o);
break;
}
console.log('heres rtn: ' + sRtn); // sRtn is [object Promise]
return sRtn;
}
getUpdate is now just returning [object Promise]... Why is that not working now?
I really needed to get the data in a synchronous fashion, so I finally gave up and used sync-request and skipped all the await/promise stuff.
sr = require('sync-request');
var url = 'http://myserver';
var qstring = 'param=xxx';
var res = sr('GET', url + '?' + qstring);
var str = res.body.toString();

Nodejs request async problem for loop not work

I'm a beginner of nodejs, async bothers me.
I want my code run sequencely or it will breaks.
I have a for loop, and it simply doesn't work...
Here are all the codes:
const util = require('util');
const request = require('request');
const cheerio = require('cheerio');
var host = "http://www.nicotv.me";
var url = "http://www.nicotv.me/video/play/57838-1-%s.html";
var len = 99;
var tab = /-(\d)-/.exec(url);
tab = tab[1] // '1' not '-1-'
function getLen(url) {
//you can ignore this function, it gives len=2
request(url, function (err, response, html) {
if (err) {
console.log('url:', url);
console.log('error:', err);
console.log('statusCode:', response && response.statusCode);
}
else{
var $ = cheerio.load(html);
var cls = '.ff-playurl-dropdown-%s';
$(util.format(cls, tab)).filter(function (){
var data = $(this);
len = data.html().match(/<a href=/g).length;
console.log("episode:", len);
});
getLink(len, function(){
});
}
});
}
getLen(util.format(url, 1)); //len = 2
var getLink = function(lengths, callback){
for (let i = 1; i <= lengths; i++) {
var tmp = util.format(url, i);
try {
request(tmp, function (err, res, html){
console.log('url:', tmp);
if(err){
console.log("error:", err);
console.log("statusCode:", res && res.statusCode);
}else{
var reg = /src="(\/player.php?.{1,})"/;
var result = reg.exec(html);
console.log(result[1]);
}
});
callback();
} catch (error) {
console.log(error);
break;
}
}
}
here is my output:
episode: 2
url: http://www.nicotv.me/video/play/57838-1-2.html
/player.php?u=aHR0cDovL3R5angyLmtpbmdzbnVnLmNuLzM2MHl1bi0xNS5waHA/dmlkPTE1NzkxMzU2MzEyNDAwNTQ5&p=360biaofan&c=0&j=aHR0cDovL2ppZXhpLmtpbmdzbnVnLmNuLzM2MGJpYW9mYW4ucGhwP3VybD0=&x=10&y=&z=
url: http://www.nicotv.me/video/play/57838-1-2.html
/player.php?u=aHR0cDovL3R5angyLmtpbmdzbnVnLmNuLzM2MHl1bi0xNS5waHA/dmlkPTE1Nzg1MDQyMDYyNDAwNTgx&p=360biaofan&c=0&j=aHR0cDovL2ppZXhpLmtpbmdzbnVnLmNuLzM2MGJpYW9mYW4ucGhwP3VybD0=&x=10&y=&z=aHR0cDovL3R5angyLmtpbmdzbnVnLmNuLzM2MHl1bi0xNS5waHA/dmlkPTE1NzkxMzU2MzEyNDAwNTQ5
First problem is these two /player*** link are from 57838-1-1.html
And one of them are not complete.
Second problem is the url output shows 57838-1-2.html twice.
Thanks for your kindly help.
Yesterday had the same problem, so I solved with:
Using request-promise
Replace the loop method arrTitles.Each with for (const jt of arrTitles)
Here a sample:
const request = require('request-promise');
const cheerio = require('cheerio');
var getUrlData =
async function (url) {
console.log(url);
try {
return await request.get(url);
}
catch (err) {
console.error(`${err}: ${url}`);
}
return;
};
var run =
async function (pageUrl) {
var arrData =
await fn.getUrlData(pageUrl)
.then(response => readTable(response));
console.log(arrData);
};
var readTable =
function (document) {
var $;
let arrData = [];
try {
$ = cheerio.load(document);
$('table tr')
.each(
function (trN) {
$(this)
.children('td')
.each(
function (tdN) {
arrData.push($(this).text().trim());
}
)
});
}
catch { }
return arrData;
};
run();

Synchronous/sequential REST calls in loop

I'm trying to call a REST API in a "for" loop, however, the results aren't what I'm expecting.
I've attempted to wrap everything in a promise, but the order of operations is still off, executing it asynchronously rather than synchronously.
var https = require('https');
var zlib = require("zlib");
var axios = require('axios');
const cheerio = require('cheerio');
var page = 1;
var hasMore = "true";
function delay() {
return new Promise(resolve => setTimeout(resolve, 300));
}
async function getLocation(page) {
// notice that we can await a function
// that returns a promise
await delay();
var apiUrl = 'https://my.api.com/search/advanced?page=' + page +
'&pagesize=5';
https.get(apiUrl, function(response) {
console.log("headers: ", response.headers);
console.log(response.statusCode)
if (response.statusCode == 200) {
var gunzip = zlib.createGunzip();
var jsonString = '';
response.pipe(gunzip);
gunzip.on('data', function(chunk) {
jsonString += chunk;
});
gunzip.on('end', function() {
obj = JSON.parse(jsonString);
var url = obj.items[0].owner.link;
axios(url)
.then(response => {
const html = response.data;
const $ = cheerio.load(html);
//OUTPUT LOCATION
console.log($('h3.location').text().trim());
})
.catch(console.error);
});
gunzip.on('error', function(e) {
console.log(e);
});
} else {
console.log("Error");
}
});
}
async function startGetLocation() {
var page = 1;
var hasMore = "true";
do {
//OUTPUT PAGE NUMBER
console.log(page.toString());
await getLocation(page);
page = page + 1;
} while (page < 3);
}
startGetLocation();
Based on the sample code, I would have expected the below to output:
1
New York
2
However, it's outputting:
1
2
New York
The problem is that the callback function that you passed to the https.get() function gets executed asynchronously and that the getLocation function does not wait until this part resolves.
So you could simply wrap the https.get() call and the unzipping part in a promise, wait for it to resolve and then do the axios-part.
async function getLocation(page) {
await delay();
var apiUrl = 'https://my.api.com/search/advanced?page=' + page +
'&pagesize=5';
const fetchAndUnzipPromise = new Promise((resolve, reject) => {
https.get(apiUrl, function (response) {
console.log("headers: ", response.headers);
console.log(response.statusCode)
if (response.statusCode == 200) {
var gunzip = zlib.createGunzip();
var jsonString = '';
response.pipe(gunzip);
gunzip.on('data', function (chunk) {
jsonString += chunk;
});
gunzip.on('end', function () {
obj = JSON.parse(jsonString);
var url = obj.items[0].owner.link;
resolve(url);
});
gunzip.on('error', function (e) {
reject(e);
});
} else {
reject(new Error("Statuscode not as exepcted"));
}
});
});
return fetchAndUnzipPromise.then(url => {
return axios(url)
.then(response => {
const html = response.data;
const $ = cheerio.load(html);
//OUTPUT LOCATION
console.log($('h3.location').text().trim());
})
.catch(console.error);
})
}

Synchronous request in Node.js with async.retry

nodejs version v10.14.2
OS win7 sp1 64bit
I need the following four lines of code to execute in order
var res2 = await fetchData(url_2, file_path_2);
await console.log('step one.');
var res1 = await fetchData(url_1, file_path_1);
await console.log('step two.');
However, the actual order of their execution is
step one.
step two.
........
........
How can we achieve synchronous execution of these four lines of codes?
var https = require('https');
var fs = require('fs');
var request = require('request');
var async = require("async");
(async() => {
var url_1 = 'https://aaa.bbb.ccc.com';
var url_2 = 'https://library.harvard.edu/';
var file_path_1 = fs.createWriteStream('./intelcenter/aaa.bbb.ccc.txt');
var file_path_2 = fs.createWriteStream('./intelcenter/harvard.txt');
var res2 = await fetchData(url_2, file_path_2);
await console.log('step one.');
var res1 = await fetchData(url_1, file_path_1);
await console.log('step two.');
})();
async function fetchData(url,locpath) {
return new Promise(function (resolve) {
async.retry(request(url).pipe(locpath), function(err, result) {
resolve(result);
});
});
}
Synchronous Code
I have added callback function for request module and passing the result via next method.
By Default async.retry will make 5 retry.
var fs = require("fs");
var request = require("request");
var async = require("async");
(async () => {
var url_1 = "https://google.com";
var url_2 = "https://library.harvard.edu/";
var file_path_1 = fs.createWriteStream("./intelcenter/aaa.bbb.ccc.txt");
var file_path_2 = fs.createWriteStream("./intelcenter/harvard.txt");
var res2 = await fetchData(url_2, file_path_2);
console.log("step one." + res2);
var res1 = await fetchData(url_1, file_path_1);
console.log("step two." + res1);
})();
function fetchData(url, locpath) {
console.log("inside fetchData");
return new Promise(function(resolve, reject) {
async.retry(
function(next) {
request(url, function(err, result) {
if (err) {
console.log(err);
}
console.log("inside request");
next(result);
}).pipe(locpath);
},
function(result) {
resolve(result);
}
);
});
}
To know more about async.
Synchronous Code without async.retry
var fs = require("fs");
var request = require("request");
(async () => {
var url_1 = "https://google.com";
var url_2 = "https://library.harvard.edu/";
var file_path_1 = fs.createWriteStream("./intelcenter/aaa.bbb.ccc.txt");
var file_path_2 = fs.createWriteStream("./intelcenter/harvard.txt");
var res2 = await fetchData(url_2, file_path_2);
console.log("step one.");
var res1 = await fetchData(url_1, file_path_1);
console.log("step two.");
})();
function fetchData(url, locpath) {
console.log("inside fetchData");
return new Promise(function(resolve, reject) {
request(url, function(err, result) {
if (err) {
console.log(err);
}
console.log("got result");
result=JSON.stringify(result);
locpath.write(result);
resolve(result);
});
});
}

Nodejs http get and return statusCode

I want to run through a list of urls and check if they are valid if no 404 status code is returned. My code is as below. But isValid key in my data object doesn't seem to be populated accordingly.
var https = require('https');
const check = async url => {
let result = false;
try {
const request = await https.get(url);
result = request.statusCode !== 404;
} catch (err) {
console.log(err);
}
return result;
};
// and in another function, I am doing this
const data = [];
const urls = [/* urls here */];
urls.forEach(url => {
data.push({
url,
isValid: await isValidUrl(url)
})
});
You are calling the different function, Use Promise.all to get data.
var https = require('https');
const check = async (url) => {
let isValid = false;
try {
const request = await https.get(url);
isValid = request.statusCode !== 404;
} catch (err) {
console.log(err);
}
return {
isValid,
url
};
};
// and in another function, I am doing this
const data = [];
const urls = [ /* urls here */ ];
const result = Promise.all(urls.map(url => check(url)))

Resources