Nodejs request async problem for loop not work - node.js

I'm a beginner of nodejs, async bothers me.
I want my code run sequencely or it will breaks.
I have a for loop, and it simply doesn't work...
Here are all the codes:
const util = require('util');
const request = require('request');
const cheerio = require('cheerio');
var host = "http://www.nicotv.me";
var url = "http://www.nicotv.me/video/play/57838-1-%s.html";
var len = 99;
var tab = /-(\d)-/.exec(url);
tab = tab[1] // '1' not '-1-'
function getLen(url) {
//you can ignore this function, it gives len=2
request(url, function (err, response, html) {
if (err) {
console.log('url:', url);
console.log('error:', err);
console.log('statusCode:', response && response.statusCode);
}
else{
var $ = cheerio.load(html);
var cls = '.ff-playurl-dropdown-%s';
$(util.format(cls, tab)).filter(function (){
var data = $(this);
len = data.html().match(/<a href=/g).length;
console.log("episode:", len);
});
getLink(len, function(){
});
}
});
}
getLen(util.format(url, 1)); //len = 2
var getLink = function(lengths, callback){
for (let i = 1; i <= lengths; i++) {
var tmp = util.format(url, i);
try {
request(tmp, function (err, res, html){
console.log('url:', tmp);
if(err){
console.log("error:", err);
console.log("statusCode:", res && res.statusCode);
}else{
var reg = /src="(\/player.php?.{1,})"/;
var result = reg.exec(html);
console.log(result[1]);
}
});
callback();
} catch (error) {
console.log(error);
break;
}
}
}
here is my output:
episode: 2
url: http://www.nicotv.me/video/play/57838-1-2.html
/player.php?u=aHR0cDovL3R5angyLmtpbmdzbnVnLmNuLzM2MHl1bi0xNS5waHA/dmlkPTE1NzkxMzU2MzEyNDAwNTQ5&p=360biaofan&c=0&j=aHR0cDovL2ppZXhpLmtpbmdzbnVnLmNuLzM2MGJpYW9mYW4ucGhwP3VybD0=&x=10&y=&z=
url: http://www.nicotv.me/video/play/57838-1-2.html
/player.php?u=aHR0cDovL3R5angyLmtpbmdzbnVnLmNuLzM2MHl1bi0xNS5waHA/dmlkPTE1Nzg1MDQyMDYyNDAwNTgx&p=360biaofan&c=0&j=aHR0cDovL2ppZXhpLmtpbmdzbnVnLmNuLzM2MGJpYW9mYW4ucGhwP3VybD0=&x=10&y=&z=aHR0cDovL3R5angyLmtpbmdzbnVnLmNuLzM2MHl1bi0xNS5waHA/dmlkPTE1NzkxMzU2MzEyNDAwNTQ5
First problem is these two /player*** link are from 57838-1-1.html
And one of them are not complete.
Second problem is the url output shows 57838-1-2.html twice.
Thanks for your kindly help.

Yesterday had the same problem, so I solved with:
Using request-promise
Replace the loop method arrTitles.Each with for (const jt of arrTitles)
Here a sample:
const request = require('request-promise');
const cheerio = require('cheerio');
var getUrlData =
async function (url) {
console.log(url);
try {
return await request.get(url);
}
catch (err) {
console.error(`${err}: ${url}`);
}
return;
};
var run =
async function (pageUrl) {
var arrData =
await fn.getUrlData(pageUrl)
.then(response => readTable(response));
console.log(arrData);
};
var readTable =
function (document) {
var $;
let arrData = [];
try {
$ = cheerio.load(document);
$('table tr')
.each(
function (trN) {
$(this)
.children('td')
.each(
function (tdN) {
arrData.push($(this).text().trim());
}
)
});
}
catch { }
return arrData;
};
run();

Related

How do I await a nodejs request-promise object

I'm trying to return the results of an http request from a function and thought request-promise was supposed to return after an await. I obviously don't have this right. I get "undefined" returned from the function.
var s = getUrl();
console.log('result: ' + s)
function getUrl() {
var rp = require('request-promise');
var url = 'http://myserver.com?param=xxx';
rp(url)
.then(function (data) {
return data;
})
.catch(function (err) {
console.log(err);
});
}
How do I await the call so getUrl returns the data?
EDIT: After Kevin's comment. I tried to put this into a module and call it but it's returning [object Promise].
function getUrl(url, params) {
var rp = require('request-promise');
return rp(url + '?' + params)
.then(function (data) {
return data;
})
.catch(function (err) {
console.log(err);
});
}
async function getUpdate(o) {
var url = 'http://myserver.com';
var params = 'param=xxx';
var s = await getUrl(url, params);
return s;
}
exports.askVelo = (o) => {
var sRtn = '';
console.log(o.code);
switch (o.code) {
case 'g':
sRtn = getUpdate(o);
break;
}
console.log('heres rtn: ' + sRtn); // sRtn is [object Promise]
return sRtn;
}
getUpdate is now just returning [object Promise]... Why is that not working now?
I really needed to get the data in a synchronous fashion, so I finally gave up and used sync-request and skipped all the await/promise stuff.
sr = require('sync-request');
var url = 'http://myserver';
var qstring = 'param=xxx';
var res = sr('GET', url + '?' + qstring);
var str = res.body.toString();

Synchronous/sequential REST calls in loop

I'm trying to call a REST API in a "for" loop, however, the results aren't what I'm expecting.
I've attempted to wrap everything in a promise, but the order of operations is still off, executing it asynchronously rather than synchronously.
var https = require('https');
var zlib = require("zlib");
var axios = require('axios');
const cheerio = require('cheerio');
var page = 1;
var hasMore = "true";
function delay() {
return new Promise(resolve => setTimeout(resolve, 300));
}
async function getLocation(page) {
// notice that we can await a function
// that returns a promise
await delay();
var apiUrl = 'https://my.api.com/search/advanced?page=' + page +
'&pagesize=5';
https.get(apiUrl, function(response) {
console.log("headers: ", response.headers);
console.log(response.statusCode)
if (response.statusCode == 200) {
var gunzip = zlib.createGunzip();
var jsonString = '';
response.pipe(gunzip);
gunzip.on('data', function(chunk) {
jsonString += chunk;
});
gunzip.on('end', function() {
obj = JSON.parse(jsonString);
var url = obj.items[0].owner.link;
axios(url)
.then(response => {
const html = response.data;
const $ = cheerio.load(html);
//OUTPUT LOCATION
console.log($('h3.location').text().trim());
})
.catch(console.error);
});
gunzip.on('error', function(e) {
console.log(e);
});
} else {
console.log("Error");
}
});
}
async function startGetLocation() {
var page = 1;
var hasMore = "true";
do {
//OUTPUT PAGE NUMBER
console.log(page.toString());
await getLocation(page);
page = page + 1;
} while (page < 3);
}
startGetLocation();
Based on the sample code, I would have expected the below to output:
1
New York
2
However, it's outputting:
1
2
New York
The problem is that the callback function that you passed to the https.get() function gets executed asynchronously and that the getLocation function does not wait until this part resolves.
So you could simply wrap the https.get() call and the unzipping part in a promise, wait for it to resolve and then do the axios-part.
async function getLocation(page) {
await delay();
var apiUrl = 'https://my.api.com/search/advanced?page=' + page +
'&pagesize=5';
const fetchAndUnzipPromise = new Promise((resolve, reject) => {
https.get(apiUrl, function (response) {
console.log("headers: ", response.headers);
console.log(response.statusCode)
if (response.statusCode == 200) {
var gunzip = zlib.createGunzip();
var jsonString = '';
response.pipe(gunzip);
gunzip.on('data', function (chunk) {
jsonString += chunk;
});
gunzip.on('end', function () {
obj = JSON.parse(jsonString);
var url = obj.items[0].owner.link;
resolve(url);
});
gunzip.on('error', function (e) {
reject(e);
});
} else {
reject(new Error("Statuscode not as exepcted"));
}
});
});
return fetchAndUnzipPromise.then(url => {
return axios(url)
.then(response => {
const html = response.data;
const $ = cheerio.load(html);
//OUTPUT LOCATION
console.log($('h3.location').text().trim());
})
.catch(console.error);
})
}

Wait for something in node.js

I'm trying to make a webscraper, but I can't get my function to wait for the second request to fill the name key on my object. It always return undefined.
const request = require('request');
const cheerio = require('cheerio');
const base_url = 'https://www.supremenewyork.com';
const shop_url = 'https://www.supremenewyork.com/shop/';
function getItems(category) {
var items = [];
return new Promise(function(resolve, reject) {
request.get(shop_url + category, function(err, res, body) {
if(err) {
reject(err);
} else {
var $ = cheerio.load(body);
$('a', '.inner-article').each(function(i, el) {
var url = base_url + $(this).attr('href');
var isSoldout = false;
var name;
if($(this).find('div').attr('class', 'sold_out_tag').length === 1)
isSoldout = true;
request.get(url, function(err, res, html) {
var $ = cheerio.load(html);
name = $('h1', 'div').text();
})
items.push({name: name, url: url, isSoldout: isSoldout});
})
resolve(items);
}
})
})
}
I expect the name key to be fill but no, i get undefined
Use the request-promise package which wraps request in Promise. Then you can use async/await to wait for result like:
const rp = require('request-promise');
const cheerio = require('cheerio');
const base_url = 'https://www.supremenewyork.com';
const shop_url = 'https://www.supremenewyork.com/shop/';
// notice async keyword
async function getItems(category) {
var items = [];
try {
// using await to wait for promise to resolve
const body = await rp.get(shop_url + category)
var $ = cheerio.load(body);
$('a', '.inner-article').each(function(i, el) {
var url = base_url + $(this).attr('href');
var isSoldout = false;
var name;
if($(this).find('div').attr('class', 'sold_out_tag').length === 1)
isSoldout = true;
try {
const html = await rp.get(url)
var $ = cheerio.load(html);
name = $('h1', 'div').text();
items.push({name: name, url: url, isSoldout: isSoldout});
} catch (err) {
throw err;
}
})
} catch (e) {
throw e;
}
return items;
}
More about async/await at MDN

Polling a URL until certain value is set in JSON response : Mocha, Integration testing

I am working on automating an End to end scenario using Mocha.
I have a url endpoint which is to be polled until a certain value is obtained in the resulting response. Is there any way to do it ?
Example with request and callback approach:
const request = require('request');
describe('example', () => {
it('polling', function (done) {
this.timeout(5000);
let attemptsLeft = 10;
const expectedValue = '42';
const delayBetweenRequest = 100;
function check() {
request('http://www.google.com', (error, response, body) => {
if (body === expectedValue) return done();
attemptsLeft -= 1;
if (!attemptsLeft) return done(new Error('All attempts used'));
setTimeout(check, delayBetweenRequest);
});
}
check();
});
});
Example with got and async/await approach:
const utils = require('util');
const got = require('got');
const wait = utils.promisify(setTimeout);
describe('example', () => {
it('polling', async function (done) {
this.timeout(5000);
const expectedValue = '42';
const delayBetweenRequest = 100;
for (let attemptsLeft = 10; attemptsLeft; attemptsLeft -= 1) {
const resp = await got.get('http://www.google.com');
if (resp.body === expectedValue) return done();
await wait(delayBetweenRequest);
}
done(new Error('All attempts used'));
});
});
This is how I was able to do it with WebdriverIO and Mocha
describe("wait for value in content of page", () => {
it("should be able to wait to value in url", () => {
var max_seconds_to_wait = 10;
var seconds_counter = 0;
var should_continue = true;
while (should_continue) {
browser.url('http://your.url.com');
var response = JSON.parse(browser.getText("body"));
console.log(response)
if (response == 'something') {
should_continue = false;
}
browser.pause(1000);
seconds_counter++;
if (seconds_counter > max_seconds_to_wait) {
throw 'Waiting for json from url timeout error';
}
}
});
});

Node.js : Call function using value from callback or async

I have written below .js file to call below defined function.
objectRepositoryLoader.readObjectRepository() returns me a hashmap from where i have to use values in enterUserName(), enterPassword(), clickLoginButton() functions.
var path = require('path');
var elementRepoMap = {}
var LandingPage = function(){
var fileName = path.basename(module.filename, path.extname(module.filename))
objectRepositoryLoader.readObjectRepository(fileName+'.xml' , function(elementRepo){
console.log(elementRepo) //values are being printed here
this.elementRepoMap = elementRepo
});
this.enterUserName = function(value){
console.log(elementRepoMap) //values are not being printed here
//Some Code
};
this.enterPassword = function(value){
//Some Code
};
this.clickLoginButton = function(){
//Some Code
};
};
module.exports = new LandingPage();
The objectRepositoryLoader.readObjectRepository() function defined in another file is as below:
var ObjectRepositoryLoader = function() {
this.readObjectRepository = function(fileName, callback) {
var filePath = './elementRepository/'+fileName;
this.loadedMap = this.objectRepoLoader(filePath, function(loadedMap){
return callback(loadedMap);
});
}
this.objectRepoLoader = function(filePath, callback){
if (filePath.includes(".xml")) {
this.xmlObjectRepositoryLoader(filePath, function(loadedMap){
return callback(loadedMap);
});
}
this.xmlObjectRepositoryLoader = function (xmlPath, callback){
var innerMap = {};
var elementName;
fs.readFile(xmlPath, "utf-8",function(err, data) {
if(err){
console.log('File not found!!')
}
else{
var doc = domparser.parseFromString(data,"text/xml");
var elements = doc.getElementsByTagName("A1");
for(var i =0 ; i< elements.length;i++){
var elm = elements[i];
elementName = elm.getAttribute("name");
var params = elm.getElementsByTagName("AS");
innerMap = {};
for(var j =0 ; j< params.length;j++){
var param = params[j];
var locatorType = param.getAttribute("type");
var locatorValue = param.getAttribute("value");
innerMap[locatorType] = locatorValue;
}
loadedMap[elementName] = innerMap;
innerMap={};
};
}
return callback(loadedMap);
});
};
How can I call enterUserName(), enterPassword(), clickLoginButton() function from spec.js file and is there any way I can avoid using callback and use async.js and call enterUserName(), enterPassword(), clickLoginButton() from spec.js file ?
EDIT
I have modified my file like below:
this.xmlObjectRepositoryLoader = function (xmlPath){
var innerMap = {};
var elementName;
var filePath = xmlPath+'.xml'
var self = this
return new Promise(
function(resolve, reject){
console.log("In xmlObjectRepositoryLoader : "+filePath)
self.readFilePromisified(filePath)
.then(text => {
var doc = domparser.parseFromString(text,"text/xml");
var elements = doc.getElementsByTagName("Element");
for(var i =0 ; i< elements.length;i++){
var elm = elements[i];
elementName = elm.getAttribute("name");
var params = elm.getElementsByTagName("param");
innerMap = {};
for(var j =0 ; j< params.length;j++){
var param = params[j];
var locatorType = param.getAttribute("type");
var locatorValue = param.getAttribute("value");
innerMap[locatorType] = locatorValue;
}
map[elementName] = innerMap;
innerMap={};
}
console.log(map) // prints the map
resolve(text)
})
.catch(error => {
reject(error)
});
});
}
this.readFilePromisified = function(filename) {
console.log("In readFilePromisified : "+filename)
return new Promise(
function (resolve, reject) {
fs.readFile(filename, { encoding: 'utf8' },
(error, data) => {
if (error) {
reject(error);
} else {
resolve(data);
}
})
})
}
I am calling above function from another file as below:
objectRepositoryLoader.readObjectRepository(fileName)
.then(text => {
console.log(text);
})
.catch(error => {
console.log(error);
});
But it gives me error as
.then(text => { ^
TypeError: Cannot read property 'then' of undefined
In this case how can I use promise to call another promise function and then use the returned value in one more promise function and return calculated value to calling function where I can use the value in other functions. I sound a bit confused. Please help
You can use async.waterfall and async.parallel to perform this task
see the reference
I just tried your code to make it working, I explained the way of implementation in comment.
async.waterfall([
function(next){
objectRepositoryLoader.readObjectRepository(fileName+'.xml' ,next)//pass this next as parameter in this function defination and after manipulation return result with callback like this(null,result)
}
],function(err,result){
if(!err){
//Do wahtever you want with result
async.parallel([
function(callback){
this.enterUserName = function(value){
console.log(elementRepoMap)
//Some Code
};
},
function(callback){
this.enterPassword = function(value){
//Some Code
};
},
function(callback){
this.clickLoginButton = function(){
//Some Code
};
}
], function(err, results) {
// optional callback
};
}
})

Resources