I'm creating a scraper using Express and Request.
An array of URL's is passed to request which is then iterated through a for loop. Once all the data is parsed and all data is resolved the callback calls res.send.
I'm trying to convert this into promises but I believe the for loop I am using will not allow me. If the loop is causing the issue, is there another way I can code this and achieve the same result?
Callback Method
function scrape(callback){
for(var i = 0; i < urls.length; i++){
request(urls[i], function(error, response, html){
if(!error && response.statusCode == 200){
// LOAD Cherio (jQuery) on the webpage
var $ = cheerio.load(html);
try{
var name = $(".name").text();
var mpn = $(".specs.block").contents().get(6).nodeValue.trim();
var jsontemp = {"MPN": "", "Name": "", "PriceList": {}};
jsontemp.MPN = mpn;
jsontemp.Name = name;
// Traverse the DOM to get tr tags and extract info
$(".wide-table tbody tr").each(function (i, row) {
var $row = $(row),
merchant = $row. attr("class").trim(),
total = $row.children(".total").text();
jsontemp.PriceList[merchant] = merchant;
jsontemp.PriceList[merchant] = total;
});
}
catch(err){
console.log('Error occured during data scraping:');
}
list.push(jsontemp);
}
else{
console.log(error);
}
count++;
callback();
});
}
}
});
scrape(() => {
console.log(count);
if(count == urls.length){res.send(list)}
});
Promise Implementation Attempt
var urls = [
"http://test.com/",
"http://test.com/2"
];
var list = [];
var count = 0;
scrape().then((data) => {
list.push(data)
if(count == urls.length){res.send(list)}
})
.catch(error => console.log(error))
function scrape(){
for(var i = 0; i < urls.length; i++){
return new Promise(function (resolve, reject) {
request(urls[i], function(error, response, html){
if(!error && response.statusCode == 200){
var $ = cheerio.load(html);
try{
var name = $(".name").text();
var mpn = $(".specs.block").contents().get(6).nodeValue.trim();
var jsontemp = {"MPN": "", "Name": "", "PriceList": {}};
jsontemp.MPN = mpn;
jsontemp.Name = name;
// TRAVERSING DOM FOR DATA //
$(".wide-table tbody tr").each(function (i, row) {
var $row = $(row),
merchant = $row. attr("class").trim(),
total = $row.children(".total").text();
jsontemp.PriceList[merchant] = merchant;
jsontemp.PriceList[merchant] = total;
});
}
catch(err){
console.log('Error occured during data scraping:');
}
resolve(jsontemp);
}
else{
console.log(error);
return reject(error);
}
count++;
});
}
}
You need to store these promises in a list, and then call Promise.all to get a single promise for everything:
function scrape() {
var promises = []; // array of promises
for(var i = 0; i < urls.length; i++) {
var url = urls[i];
var promise = new Promise(function(resolve, reject) {
// ...
};
// add to array
promises.push(promise);
}
// return a single promise with an array of the results
// by using Promise.all
return Promise.all(promises);
}
Also, don't use loop variables (like i) inside a function inside a loop when using var. Instead, you should declare a url variable outside the promise callback function, or replace var with the newer let.
Related
I am trying to finish my script, but for some reason i don't know, it refuses to execute in the order i put it in.
I've tried placing a 'wait' function between the JoinRequest update function and the following code, but when run, it acts as if the function call and wait function were the other way round, countering the point of the wait().
const Roblox = require('noblox.js')
var fs = require('fs');
var joinRequests = []
...
function wait(ms) {
var d = new Date();
var d2 = null;
do { d2 = new Date(); }
while(d2-d < ms*1000);
};
...
function updateJReqs() {
Roblox.getJoinRequests(4745601).then((array) => {
var i;
var final = [];
for(i = 0; i < array.length; i++) {
final.push(array[i].username);
};
if(final === '') {
final = '-None';
};
joinRequests = final
console.log('Updated join requests.')
});
}
function check() {
setTimeout(() => {
fs.readFile('Request.txt',encoding = 'utf-8', function(err, data) {
if (err) {
check();
} else {
updateJReqs(); //for some reason this function is executed alongside the below, not before it.
// Tried putting wait(x) in here.
console.log('Request received: ' + data)
var solution = joinRequests
console.log('Fuffiling request with ' + solution)
fufillRequest(solution)
fs.unlink('Request.txt', function(err) {
if(err) throw err;
});
check();
}
});
}, 400)
}
check();
The script is supposed to wait until a file is created (accomplished), update the list of join requests (accomplished) and then create a new file with the list of join requests in(not accomplished).
if I understand your code you work with async code, you need to return a promise in updateJReqs and add a condition of leaving from the function because you have an infinite recursion
function updateJReqs() {
return new Promise(resolve => {
Roblox.getJoinRequests(4745601).then((array) => {
var i;
var final = [];
for(i = 0; i < array.length; i++) {
final.push(array[i].username);
};
if(final === '') {
final = '-None';
};
joinRequests = final
console.log('Updated join requests.')
resolve();
});
}
}
async function check() {
setTimeout(() => {
fs.readFile('Request.txt',encoding = 'utf-8', function(err, data) {
if (err) {
await check();
} else {
await updateJReqs();
// Tried putting wait(x) in here.
console.log('Request received: ' + data)
var solution = joinRequests
console.log('Fuffiling request with ' + solution)
fufillRequest(solution)
fs.unlink('Request.txt', function(err) {
if(err) throw err;
});
// you dont have an exit from your function check();
return 'Success';
}
});
}, 400)
}
check().then(res => console.log(res));
I currently have two promises, whereas the child is dependent on the parents success. I want to Resolve/reject the parent promise from the child promises "then".
const UserApplicaiton = require('../applications/user'), User = new UserApplicaiton();
class CheckParams {
constructor() { }
required(params, required_params) {
return new Promise(function(resolve, reject, onCancel) {
// set i
var i;
// set missed_required_params
var missed_required_params = [];
// check for userCredentials if user_id is required param, convert from credentials to user_id
if(required_params.includes("user_id")){
// set as const
const user_key = String(params.userCredentials.user_key);
const user_secret = String(params.userCredentials.user_secret);
// check in database
User.info(user_key, user_secret).then((data) => {
// if data
if(data) {
// add user_id to params
params.user_id = data[0]._id;
// loop params
for(i = 0; i < required_params.length; i++){
// if params that's required is there, else add to array
if(!(required_params[i] in params)){
missed_required_params.push(required_params[i]);
}
}
if(missed_required_params.length !== 0){
reject("Missed parameters: " + missed_required_params);
}else{
resolve(params);
}
}
}).catch((err) => {
reject(err);
});
}else{
// loop params
for(i = 0; i < required_params.length; i++){
// if params that's required is there, else add to array
if(!(required_params[i] in params)){
missed_required_params.push(required_params[i]);
}
}
if(missed_required_params.length !== 0){
console.log("hello");
return reject("Missed parameters: " + missed_required_params);
}else{
console.log("hello2");
resolve(1);
}
}
});
}
}
module.exports = CheckParams;
The goal for the second promise is to add to an object based on the response, and then resolve the parent promise, which will be used later in the code.
This doesn't work at all. Async doesn't really help.
Your problem appears to be that if(data) { is missing an else clause where you would settle the promise as well. Avoiding the Promise constructor antipattern helps to avoid such mistakes as well.
required(params, required_params) {
var promise;
if (required_params.includes("user_id")) {
const user_key = String(params.userCredentials.user_key);
const user_secret = String(params.userCredentials.user_secret);
promise = User.info(user_key, user_secret).then((data) => {
if (data) {
params.user_id = data[0]._id;
}
// else
// throw error? keep user_id undefined?
})
} else {
promise = Promise.resolve();
}
return promise.then(() => {
var missed_required_params = [];
for (var i = 0; i < required_params.length; i++) {
if (!(required_params[i] in params)) {
missed_required_params.push(required_params[i]);
}
}
if (missed_required_params.length !== 0) {
throw new Error("Missed parameters: " + missed_required_params);
} else {
return params;
}
});
}
when we console.log(restaurantdata) inside the .on('end') function return log but outside function log cant give value return undefined.
var restaurantdata=[];
fs.createReadStream('restaurantsa9126b3.csv')
.pipe(csv())
.on('data', (data) => restaurantdata.push(data))
.on('end', () => {
return restaurantdata;
})
console.log(restaurantdata);
yield this.render('home',{
restaurantdata:restaurantdata,
});
You can try this...
function convert(data) {
data = data.split('\n');
var heading = data[0].split(',');
var arr = [];
var line,words,err;
for(var i=1;i<data.length-1 ;++i){
var obj = {};
line=data[i];
words = line.split(',');
var k =0;
for(var j=0;j<words.length;++j){
if (words[j].indexOf('\"')==-1){
obj[heading[k]] = words[j];
k++;
} else{
err = [];
err.push(words[j].substring(1,words[j].length));
j++;
while (words[j].indexOf('\"')==-1) {
err.push(words[j]); j++;
}
err.push(words[j].substring(0,words[j].length-1));
obj[heading[k]] = err;
k++;
}
}
arr.push(obj);
}
return arr;
}
Hope, This may help you...
I'm doing middleware module that will extract data from redis and put to req.my_session.[here]
This is function that call inside app.use();
function parse_cookies(req){
if(req.headers.cookie != null){
var result = req.headers.cookie.match(new RegExp('m:[^=]*=[^; ]*', 'ig'));
if(result != null){
for(var i = 0; i < result.length; i++){
var result1 = result[i].split('=');
req.my_session[result1[0].substr(2)] = result1[1];
// get from redis value
client.get('sess:'+result1[1], function(err, reply) {
// reply is null when the key is missing
console.log(reply);
let li = i;
req.my_session[result1[0].substr(2)] = reply;
console.log('li = ' + li);
console.log('result1.lenght= ' + result.length);
if(i == result.length){
console.log('call the next');
}
});
}
}
}
} // parse_cookies
in console i outputs always 3, how can I get all data from database using redis.get and on last data call next() function for get out from my function?
problem it's get data from database in my middleware, I can't because redis has callback function
client.get("missingkey", function(err, reply) {
// reply is null when the key is missing
console.log(reply);
});
I think the issue is becuase of async in loop you can try the following
function parse_cookies(req){
if(req.headers.cookie != null){
var result = req.headers.cookie.match(new RegExp('m:[^=]*=[^; ]*', 'ig'));
if(result != null){
var promises = [];
for(var i = 0; i < result.length; i++){
var result1 = result[i].split('=');
promises.push(getFromRd(req, result1));
}
return Promise.all(promises)
.then(() => {
return next()
})
.catch((e) => {
return next({error: e})
})
}
}
} // parse_cookies
function getFromRd(req, result1) {
req.my_session[result1[0].substr(2)] = result1[1];
// get from redis value
return client.get('sess:'+result1[1], function(err, reply) {
if (err) {
throw Error(' failed to find ' + 'sess:' + result1[1])
}
// reply is null when the key is missing
console.log(reply);
let li = i;
req.my_session[result1[0].substr(2)] = reply;
console.log('li = ' + li);
console.log('result1.lenght= ' + result.length);
return {success:true}
});
}
I need to return a json object to my api. To do this I have a module that does some requests and should return the results.
My problem is grasping the promise concept and implementing it.
server.js
app.get('/users', function(req, res){
request.getUsers()
.then(function(users){
console.log(users);
res.contentType('application/json');
res.send(JSON.stringify(users));
})
.catch(function(){
console.log(users);
});
});
module.js
exports.getUsers = function(){
var params = {search_string:""};
var users = [];
return new Promise(function(resolve, reject){
var result = connection.Users.get(params, function(error,response)
{
var user = [];
for(let i = 0; i < response.data.length; i++)
{
user = response.data;
}
users.push({user});
});
if(result != null)
{
console.log(result);
resolve(result);
}
else
{
reject(new Error('Try Again'));
}
});
}
When I run the server I get the typeError: expecting a function but got [object object]
I did not really get what is wrong.
How could I return an array from my module to my API using promises?
EDIT:
app.get('/users', function(req, res){
request.getUsers()
.then(function(users){
console.log(users);
res.contentType('application/json');
res.send(JSON.stringify(users));
})
.catch(function(){
console.log("not resolved");
});
});
My problem now is actually that I am getting the .catch even before any request is made the at /users endpoint and I dont know why.
In module.js you used new Promise() constructor but the input parameter should be a function and not an object, so to fix that use:
return new Promise(function(resolve, reject) {
var result = connection.Users.get(params, function(error,response)
...
});
Notice its not new Promise({function(...) but new Promise(function(...)) ...
Read more here:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise
Edit:
I have modified your code to work to fix the second problem:
exports.getUsers = function(){
var params = {search_string:""};
var users = [];
return new Promise(function(resolve, reject){
var result = connection.Users.get(params, function(error,response) {
if(error || !response)
{
// report error
reject(new Error('Try Again'));
}
else
{
//process response
var user = [];
for(let i = 0; i < response.data.length; i++)
{
user = response.data;
}
users.push({user});
// report success
resolve(users);
}
});
}
You need to call resolve or reject inside connection.Users.get(params, function(error,response) {
Modify your module.js code as below. You passed an object instead of a function.
register.getUsers = function () {
var params = { search_string: "" };
var users = [];
return new Promise(function (resolve, reject) {
var result = connection.Users.get(params, function (error, response) {
var user = [];
for (let i = 0; i < response.data.length; i++) {
user = response.data;
}
users.push({ user });
});
if (result != null) {
console.log(result);
resolve(result);
}
else {
reject(new Error('Try Again'));
}
});
};
you declared user variable as an array and inside the for loop isn't useful because the user variable is always equals to response.data
if response.data is array of JSON object you can push it to users array inside loop
for (let i = 0; i < response.data.length; i++) {
users.push(response.data[i]);
}
I guess you want to return the array of objects
also I recommend you to use bluebird module to return promises
and also you can use Promise.mapSeries instead of for loop like:
return Promise.mapSeries(response.data, item => {
users.push(item)
})