Requesting URL's one by one - node.js

I'm trying to get some data from a lot of URLs using 'request', but I can't manage to do it one url at a time.
I've tried to understand async/promises in order to make it work, but with no success. Trial and error didn't work.
I've seen other methods using different modules, but this requires rewriting most of my code and I believe there is an easier way to adapt it to my current code.
Here is a minimized version of the code :
const request = require('request');
const fs = require('fs');
const prod = fs.readFileSync('prod.txt', "utf8");
const prodid = prod.split("|");
var i;
var summary=[];
for (i=0;i<prodid.length;i++){
request('https://www.api.example.com/id='+prodid[i], { json: true }, (err, res, body) => {
if (err) { return console.log(err); }
if (body == 'NULL') {
console.log("Page " + i + " out of " + prodid.length + " is NULL!");
} else {
summary.push(body.items[0].Name);
summary.push(body.items[0].ISOnr);
summary.push(body.items[0].GTIN);
console.log("Page " + i + " out of " + prodid.length + " is done!");
fs.appendFileSync('data.txt',JSON.stringify(summary));
}
});
}
There is no async/promise involved in the example above, just the requests inside a loop.
From what I've seen, when I get the results, there is no particular order (probably is the order of which finishes first).
In the console, I always see page 500 out of 500, not 1/500, 2/500, etc.
What I'm trying to achieve, is by making each request in the order of URLs (preferably with a 1000ms delay between them)

You can promisify your request:
for (i = 0; i < prodid.length; i++) {
const result = await new Promise((resolve, reject) =>
request(
'https://www.api.example.com/id=' + prodid[i],
{ json: true },
(err, res, body) => {
if (err) {
reject(err);
}
if (body == 'NULL') {
console.log('Page ' + i + ' out of ' + prodid.length + ' is NULL!');
} else {
resolve(body);
}
}
)
);
if (result) {
summary.push(result.items[0].Name);
summary.push(result.items[0].ISOnr);
summary.push(result.items[0].GTIN);
console.log('Page ' + i + ' out of ' + prodid.length + ' is done!');
fs.appendFileSync('data.txt', JSON.stringify(summary));
}
}

Related

Why is my variable not altering after changing once

I am making a simple web-application that fetches the definition of a word using an API. There's definitely nothing wrong with the API keys because the application semi-works - Once i search for a word for the first time it will give back a result however, on the second try and onwards the definition doesn't change (The definition matches the first word instead of the latest one).
I'm thinking it has something to do with scopes and constants but I tried that already (Change var to let or const). I've read online about that callback functions are Asynchronous but I don't think that's the issue at all.
app.get('/definition/:word', (req, res) => {
if (word !== req.params.word) {
console.log("If statement");
word = req.params.word;
console.log(word);
word = word.toLowerCase();
}
options.path += '/' + language_code + '/' + word + '?fields=definitions' +
'&strictMatch=true';
url += options.path;
console.log(options.path);
request(url, options, (error, response, body) => {
if (error) {
res.redirect('/error');
} else {
let statusCode = (response.statusCode);
console.log(statusCode);
if (statusCode === 200) {
let data = JSON.parse(body);
console.log(definition);
definition = String(data.results[0].lexicalEntries[0].entries[0].senses[0].definitions);
console.log(definition);
res.render('definition', {
wordTitle: word,
definitions: definition
});
} else {
res.redirect('/error');
}
}
});
});
app.post('/', (req, res) => {
console.log("Post");
word = String(req.body.Word);
word = word.toLowerCase();
console.log(word);
res.redirect('/definition/' + word);
});
EDIT 1:
full index.js source code: https://github.com/NikodemBieniek/dictionary/blob/myfeature/server/index.js
I think the issue is with below line of code
options.path += '/' + language_code + '/' + word + '?fields=definitions' +
'&strictMatch=true';
So first time when you make API call and pass a param as 'XYZ'
the value in option.path will be
/en-gb/xyz?fields=definition&strictMatch=true
The second time you make API call and pass a param as 'PQR' the value in option.path will be
/en-gb/xyz?fields=definition&strictMatch=true/en-gb/pqr?fields=definition&strictMatch=true
Because you are doing string concatination.
The possible fix can be
app.get('/definition/:word', (req, res) => {
if (word !== req.params.word) {
console.log("If statement");
word = req.params.word;
console.log(word);
word = word.toLowerCase();
}
const options = {
host: 'https://od-api.oxforddictionaries.com',
port: '443',
path: '/api/v2/entries',
method: "GET",
headers: {
'app_id': process.env.APP_ID,
'app_key': process.env.APP_KEY,
}
};
options.path += '/' + language_code + '/' + word + '?fields=definitions' +
'&strictMatch=true';
let url = `${options.host}${options.path}`;
request(url, options, (error, response, body) => {
if (error) {
res.redirect('/error');
} else {
let statusCode = (response.statusCode);
console.log(statusCode);
if (statusCode === 200) {
let data = JSON.parse(body);
definition = String(data.results[0].lexicalEntries[0].entries[0].senses[0].definitions);
res.render('definition', {
wordTitle: word,
definitions: definition
});
} else {
res.redirect('/error');
}
}
});
});
Remove option and URL declared on line 12 and 26

Node.js promises and bookshelf.js

I'm new into Promises.
I use Bookshelf.js as ORM.
I fetch a number of webpages, get person info (about actors) from those pages and add them into my database if they don't exist.
But there's a problem, even though console.log(name) returns actor names in the right order, my query checks for only one actor, the latest one, which is 9.
What's wrong here?
var entities = require("entities");
var request = require('request');
var cheerio = require('cheerio');
// create promisified version of request()
function requestPromise(options) {
return new Promise(function (resolve, reject) {
request(options, function (err, resp, body) {
if (err) return reject(err);
resolve(body);
});
});
}
var person = require('./models').person;
app.get('/fetch', function (req, res) {
var promises = [];
var headers = {
'User-Agent': req.headers['user-agent'],
'Content-Type': 'application/json; charset=utf-8'
};
for (var i = 1; i < 10; i++) {
promises.push(requestPromise({url: "http://www.example.com/person/" + i + "/personname.html", headers: headers}));
}
Promise.all(promises).then(function (data) {
// iterate through all the data here
for (var i = 0; i < data.length; i++) {
if ($ = cheerio.load(data[i])) {
var links = $("#container");
var name = links.find('span[itemprop="name"]').html(); // name
if (name == null) {
console.log("null name returned, do nothing");
} else {
name = entities.decodeHTML(name);
console.log(name); // returns names in the right order
// does this person exist in the database?
person.where('id', i).fetch().then(function (result) {
if (result) {
console.log(i + "exists");
} else {
console.log(i + " doesn't exist");
// returns "9 doesn't exists" 9 times instead of
// checking each ID individually, why?
}
});
}
} else {
console.log("can't open");
}
}
}, function (err) {
// error occurred here
console.log(err);
});
});
EDIT #2
Now the order is broken and my ID's aren't the same with the site's I fetch data from. I see ID's like 11 and 13 even though I iterate from 1 to 5 and it seems to overrule something since it adds duplicate entries.
Here's what I'm trying to do in a nutshell. "Visit these urls in order and add the data you fetch (e.g. names) in the same order (id1 = name1; id2 = name2, etc) to the database".
app.get('/fetch', function (req, res) {
var promises = [];
var headers = {
'User-Agent': req.headers['user-agent'],
'Content-Type': 'application/json; charset=utf-8'
};
for (var i = 1; i < 5; i++) {
promises.push(requestPromise({url: "http://example.com/person/ + i + "/personname.html", headers: headers}));
}
Promise.all(promises).then(function (data) {
// iterate through all the data here
data.forEach(function (item, i) {
var $ = cheerio.load(item);
var name = $("#container span[itemprop='name']").text();
if (!name) {
console.log("null name returned, do nothing");
} else {
// name exists
person.where('id', i).fetch({require: true}).then(function (p) {
console.log(i + " exists");
}).catch(function () {
console.log(i + " does not exist");
new person({id: i, name: name}).save(null, {method: 'insert'}).then(function () {
console.log("success" + i);
});
});
}
}, function (err) {
// error occurred here
console.log(err);
});
});
});
When you run your code through jshint, you will see a warning that says
Don't make functions within a loop.
In this piece of code the callback inside then does not run in sync with the enclosing for loop. It runs whenever the database has fetched your result.
person.where('id', i).fetch().then(function (result) {
if (result) {
console.log(i + "exists");
} else {
console.log(i + " doesn't exist");
}
});
Therefore, when that callback runs eventually, the loop has long finished. Your callback function holds a reference to the loop counter i - which, by now, has the value 9.
It's better to use a function that accepts a parameter than to refer to a loop counter.
Luckily node makes this easy, you can use the forEach array function:
data.forEach(function (item, i) {
var $ = cheerio.load(item);
var name = $("#container span[itemprop='name']").text();
if (!name) {
console.log("null name returned, do nothing");
} else {
console.log("successfully scraped name: " + name);
person.where('id', i).fetch({require: true}).then(function (p) {
console.log(i + " exists");
}).catch(function () {
console.log(i + " does not exist");
});
}
});
Note that you can make Bookshelf.js throw instead of silently passing over non-existing records with {require: true}.
More generally speaking, I don't see a real connection between scraping the name from a website and retrieving a model from the database. These two things should probably be done in separate functions that each return an individual promise for the respective thing. That way, requests to the database can run in parallel with requests to the web server.
It looks like you need a closure for person.where('id', i).fetch().
also use node-fetch instead of hand rolling request-promise.

Iteration of http requests in nodejs

I am trying to make multiple http get requests in nodeJs. I need to continue getting records until there are no more available. I am having problems trying to do the iteration of a chunk of code. I had been reading about the async module but I am not understanding how to apply it. Please help.
Here is the New code 11/25/2014:
The step I want to repeat is the GetUrl(newBlock) is the step I want to repeat until there is no more blocks. I determine that there is no more blocks when the result.lenght of the request response is less than 5,000.
Is there a way of me getting this parameter pack from newBlock function and then stop the loop if I use the whilst async module?
// JS Script: GPSAPIClient
// Code Description: API client code to retrieve GPS data for customer with account number: 47631
// This script requires Node.Js environment installed with Request() and Node-crontab modules (through NPM).
// It will run (as a backend script) every minute and retrieve the GPS available.
//
// Author : Vanessa Torres, Professional Services
var request = require("request");
var fs = require('fs');
var async = require('async');
var crontab = require('node-crontab');
var csvFile = "";
var debug = false;
process.argv.forEach(function(val, idx, array) {
if (val.toLowerCase() === 'debug') {
debug = true;
console.log('\n\n\n******************DEBUG MODE ACTIVE(Do not run in prod)******************\n\n\n')
}
});
console.log('waiting to start running');
var jobId = crontab.scheduleJob('* * * * *', function() {
//Gettting system date and adding leading zeros when are single digits. I need this to build the get request with date filters.
var d = new Date();
var nday = d.getDate();
var nmonth = d.getMonth();
var nhour = d.getHours();
var nmin = d.getMinutes();
var nfullyear = d.getFullYear();
if (nday < 10) {
nday = '0' + nday;
};
//1 minute substraction except for when is zero (the value will negative).
if (nmin != 0) {
var nmin = nmin - 1;
};
if (nmin < 10) {
nmin = '0' + nmin;
};
// added because TLC Plumbing is 2 hours behind us. (MST)
nhour = nhour - 2;
if (nhour < 10) {
nhour = '0' + nhour;
};
var nmonth = nmonth + 1;
if (nmonth < 10) {
nmonth = '0' + nmonth;
};
var options = {
//url: 'https://credentials#api.comettracker.com/v1/gpsdata' + '?fromdate=' + nfullyear + '-' + nmonth + '-' + nday + 'T' + nhour + '%3a' + nmin + '%3a' + '00',
url: 'https://credentials#api.comettracker.com/v1/gpsdata?fromdate=2015-11-23T17%3a00%3a00&todate=2015-11-24T10%3a00%3a00',
method: 'GET',
rejectUnauthorized: !debug
};
function GetUrl(callback) {
return request(options, function(error, response, body) {
console.log('request for links');
if (error) throw new Error(error);
var result = JSON.parse(body)['links'];
var urlnext = result[2].href;
console.log('UrlNext: ' + urlnext);
console.log(result[2].rel);
//moreBlocks = newBlock(urlnext);
moreBlocks = callback(urlnext);
});
// request to get the next block of records (maximun 5,000)
};
// HTTP get request - 1st request
request(options, function(error, response, body) {
if (error) throw new Error(error);
var result = JSON.parse(body)['gps-recs'];
console.log(result.length);
//console.log(result);
//create .csv file if result.length is > = 1 (If we received GPS records)
if (result.length >= 1 && result.length < 5000) {
console.log('entered first if, result.length: ' + result.length);
buildCSV(result);
};
//add the subsequent request when result.length = 5000
if (result.length == 5000) {
console.log('entered second if, result.length: ' + result.length);
buildCSV(result);
console.log('I came back from buildCSV. result.length is : ' + result.length);
**GetUrl(newBlock)**;
//console.log('moreblocks back from newblock: ' + moreBlocks);
};
});
});
function newBlock(urlnext) {
console.log('URL passed to newBlock: ' + urlnext);
// option 2 - variables needed to built the url (like the first request)
var n = urlnext.length;
var rest = urlnext.substr(40, n);
console.log('Rest: ' + rest);
var options = {
url: 'https://credentials#api.comettracker.com/v1/gpsdata/' + rest,
method: 'GET',
rejectUnauthorized: !debug
};
request(options, function(error, response, body) {
console.log('nextblock request');
if (error) throw new Error(error);
var result = JSON.parse(body)['gps-recs'];
if (result.length == 0) {
//no records for filter
console.log('first if' + result.length);
moreBlocks = false;
};
if (result.length < 5000 && result.length > 0) {
//last block appends record and ends
appendCSV(result);
moreBlocks = false;
console.log('second if' + result.length);
};
if (result.length == 5000) {
//appends records but has to keep running
appendCSV(result);
console.log('third if- moreBlocks' + result.length);
};
console.log('moreBlocks: ' + moreBlocks);
});
};
function buildCSV(result) {
var csvFile = " ";
console.log('before csvFile: ' + csvFile);
//adding headers
csvFile = csvFile.concat('UserNumber' + ',' + 'UserTimeTag' + ',' + 'Latitude' + ',' + 'Longitude' + ',' + 'SpeedMph' + ',' + 'Heading' + ',' + 'Status' + '\r\n');
// loop runs result.length times
for (var i = 0; i < result.length; i++) {
csvFile = csvFile.concat(result[i].UserInfo.UserNumber + ',' + result[i].UserTimeTag + ',' + result[i].Latitude + ',' + result[i].Longitude + ',' + result[i].SpeedMph + ',' + result[i].Heading + ',' + result[i].Status + '\r\n');
};
//console.log(csvFile);
fs.writeFile('file.csv', csvFile, function(err) {
if (err) throw err;
console.log('file saved');
});
};
//appending additional blocks
function appendCSV(result) {
var csvString = " ";
// loop runs result.length times
for (var i = 0; i < result.length; i++) {
csvString = csvString.concat(result[i].UserInfo.UserNumber + ',' + result[i].UserTimeTag + ',' + result[i].Latitude + ',' + result[i].Longitude + ',' + result[i].SpeedMph + ',' + result[i].Heading + ',' + result[i].Status + '\r\n');
};
// console.log(csvString);
fs.appendFile('file.csv', csvString, function(err) {
if (err) throw err;
console.log('Data appended');
});
};
It is difficult to see if moreBlocks is in the same scope as the while. On the other hand, the while statement will start making calls indefinetilly until that flags changes, and that might be too late. You need to make sure you do each subsequent request at a time.
You could use async's ´whilst´ function:
var moreBlocks = true;
whilst(function(){
return moreBlocks === true;
},function(callback){
return request(options, function(err, res, body){
console.log('request for links');
if (err) throw new Error(err);
var result = JSON.parse(body)['links'];
var urlnext = result[2].href;
console.log('UrlNext: ' + urlnext);
console.log(result[2].rel);
//You NEED to make sure you update moreBlocks
moreBlocks = newBlock(urlnext);
// Go to next step.
return callback(res);
})
},{
//You are done!
//Call any function you'd like.
});
Check the docs here!
Edit, if newBlock is asynchronous:
var moreBlocks = true;
whilst(function(){
return moreBlocks === true;
},function(callback){
return request(options, function(err, res, body){
console.log('request for links');
if (err) throw new Error(err);
var result = JSON.parse(body)['links'];
var urlnext = result[2].href;
console.log('UrlNext: ' + urlnext);
console.log(result[2].rel);
//Now status has the info about moreBlocks.
return newBlock(urlnext, function(status){
moreBlocks = status;
// Go to next step.
return callback(res);
});
})
},{
//You are done!
//Call any function you'd like.
});
Somewhere on newBlock:
function newBlock(urlnext, callback) {
// some code..
return request(options, function(){
//here status decides if we are done.
return callback(status);
});
}

NODE.JS function callback to render page

I have a function getthem() that checks a mongo db for listings and returns name,streamurl for it.
I pass those as var stream to the renderme that renders the /dashboard page.
My problem is that i get the console.log("END OF FIND:"+JSON.stringify(stream))
to show my test input, but nothing goes to the render.
im using ejs to render. How can i get the result passed to the page ?
router.get('/dashboard', function (req, res) {
var foo = getthem();
function getthem() {
var group = "tesint";
console.log('geting for group : ' + group);
var mdlListings = require('../models/listings.js');
var myresult = "tet";
mdlListings.find({}, "name streamurl", function (err, data) {
if (err) {
console.error(err);
return;
}
if (data === null) {
console.log("No results");
return;
}
var stream = { };
data.forEach(function (streams) {
console.log("Got " + streams.name + " " + streams.streamurl);
stream[streams.name] = streams.streamurl;
// stream += 'name: '+streams.name+'},{streamurl: '+streams.streamurl;
// console.log("stram arry "+stream[streams.name] )
console.log("END OF FIND:"+JSON.stringify(stream))
}, renderme(stream));
// console.log("Result:", votes);
//var myresult = Object.keys(stream).map(function (name) {
// return { name: name, url: stream[name] };
//})
console.log("before return stream "+stream);
return stream;
});
}
function renderme(resa) {
console.log("RESA"+JSON.stringify(resa))
var resa = JSON.stringify(resa);
res.render('dashboard', {
title: 'Dashboard',
user: req.user,
listing: resa
}
)
}
You're passing the result of renderme(stream) as a second argument to forEach(). renderme(stream) is then evaluated immediately before your forEach() callback is called, when stream is still an empty object. My guess is you want something like this:
data.forEach(function (streams) {
console.log("Got " + streams.name + " " + streams.streamurl);
stream[streams.name] = streams.streamurl;
console.log("END OF FIND:"+JSON.stringify(stream))
});
renderme(stream);
Actually i figure that why would i pass the function as i could just do whatever i need to directly in there.
That worked perfectly, thanks for the tip.
data.forEach(function (streams) {
console.log("Got " + streams.name + " " + streams.streamurl);
stream[streams.name] = streams.streamurl;
});
res.render('dashboard', {
title: 'Dashboard',
user: req.user,
listing: data
}
)

azure mobile api variable scope

I have the following Azure Mobile Services API get function. I want to set the isValidUser flag based on if there is a record for the providerKey (from the twitter authentication) in the SQLMembership table (AspNetUserLogins).
While testing, I see the following in the log file, confirming that the flag is being set.
"setting isValidUser [object Object] Number of records = 1"
However, if(isValidUser) is not getting evaluated correctly. The result I get from the API is "Not a registered user." which is set from the else portion of the if(isValidUser) check.
Why is it that the value set inside the mssql.query() is not available outside of it?
exports.get = function(request, response) {
var authenticatedUserId = request.user.userId;
var providerName = authenticatedUserId.split(":")[0];
var providerUserId = authenticatedUserId.split(":")[1];
var isValidUser = false;
console.log('providerName = ' + providerName.trim());
console.log('providerUserId = ' + providerUserId.trim());
request.service.mssql.query(
"select userId from dbo.AspNetUserLogins where LoginProvider = '" + providerName + "' and ProviderKey = '" + providerUserId + "'",
{
success: function(results)
{
console.log('inside success AspNetUserLogins. ' + results + " Number of records = " + results.length);
if (results.length == 1) {
console.log('setting isValidUser ' + results + " Number of records = " + results.length);
isValidUser = true;
}
},
error : function(err)
{
console.log('inside error AspNetUserLogins. ' + err);
response.send(statusCodes.INTERNAL_SERVER_ERROR, { error: err });
}
}
);
if (isValidUser) {
request.service.mssql.query('select * from dbo.Church',
{
success: function(results)
{
console.log('inside success Church' + results);
response.json(statusCodes.OK, results);
},
error : function(err)
{
console.log('inside error : ' + err);
response.send(statusCodes.INTERNAL_SERVER_ERROR, { error: err });
}
}
);
} else {
response.send(statusCodes.INTERNAL_SERVER_ERROR, { error: "Not a registered user." + isValidUser });
}
response.send(statusCodes.INTERNAL_SERVER_ERROR, { error: "Unexpected end." });
};
The call to mssql.query is an asynchronous function (like many other functions in node.js); when it returns the callback (either the success or the error) hasn't been executed yet, so when you check the isValidUser flag, it still has the original value (false).
What you need to do is to move that code to inside the success callback, and that should work:
exports.get = function(request, response) {
var authenticatedUserId = request.user.userId;
var providerName = authenticatedUserId.split(":")[0];
var providerUserId = authenticatedUserId.split(":")[1];
var isValidUser = false;
console.log('providerName = ' + providerName.trim());
console.log('providerUserId = ' + providerUserId.trim());
var sql = "select userId from dbo.AspNetUserLogins where LoginProvider = ? and ProviderKey = ?";
var sqlParams = [providerName, providerUserId];
request.service.mssql.query(sql, sqlParams, {
success: function(results)
{
console.log('inside success AspNetUserLogins. ' + results + " Number of records = " + results.length);
if (results.length == 1) {
console.log('setting isValidUser ' + results + " Number of records = " + results.length);
isValidUser = true;
}
if (isValidUser) {
request.service.mssql.query('select * from dbo.Church', {
success: function(results)
{
console.log('inside success Church' + results);
response.send(statusCodes.OK, results);
},
error : function(err)
{
console.log('inside error : ' + err);
response.send(statusCodes.INTERNAL_SERVER_ERROR, { error: err });
}
});
} else {
response.send(statusCodes.INTERNAL_SERVER_ERROR, { error: "Not a registered user." + isValidUser });
}
},
error : function(err)
{
console.log('inside error AspNetUserLogins. ' + err);
response.send(statusCodes.INTERNAL_SERVER_ERROR, { error: err });
}
});
};
One more thing: do use parameters in the SQL (also shown above), instead of composing the query directly. In this case (you're using the user id) it shouldn't be a problem, but as a general rule you should use parameters whenever possible to prevent SQL injection attacks.

Resources