How could we access the request parameters from the response callback when using Request module?
For example, the following "dog" (and so on) value could be passed via looping a list:
var u = require('util');
var url = "http://example.com/animals/%s";
request.get({uri: u.format(url, "dog")}, function (error, response, body) {
if (!error && response.statusCode == 200) {
//how could we access the value 'dog' here?
//something like this: console.log(uri.params.animal);
}
}
You can use closure property of js.
var u = require('util');
var url = "http://example.com/animals/%s";
var param = "dog";
request.get({uri: u.format(url, param )}, function (error, response, body) {
if (!error && response.statusCode == 200) {
//how could we access the value 'dog' here?
console.log(param);
}
}
Related
I've written a script in node using two different functions getPosts() and getContent() supplying callback within them in order to print the result calling a standalone function getResult(). The selectors defined within my script is flawless.
However, when I execute my script, It prints nothing. It doesn't throw any error either. I tried to mimic the logic provied by Neil in this post.
How can I make it a go?
I've written so far:
var request = require('request');
var cheerio = require('cheerio');
const url = 'https://stackoverflow.com/questions/tagged/web-scraping';
function getPosts(callback){
request(url, function (error,response, html) {
if (!error && response.statusCode == 200){
var $ = cheerio.load(html);
$('.summary .question-hyperlink').each(function(){
var items = $(this).text();
var links = $(this).attr("href");
callback(items,links);
});
}
});
}
function getContent(item,link,callback){
request(link, function (error,response, html) {
if (!error && response.statusCode == 200){
var $ = cheerio.load(html);
var proLink = $('.user-details > a').eq(0).attr("href");
callback({item,link,proLink});
}
});
}
function getResult() {
getPosts(function(item,link) {
getContent(item,link,function(output){
console.log(output);
});
});
}
getResult();
The link value that you receive from getPosts is a relative link which means that the request fails. You can extract the hostname inside its own variable and create the full URL from the hostname + the relative link.
const host = 'https://stackoverflow.com';
const url = '/questions/tagged/web-scraping';
// ...
function getContent(item,link,callback){
// Here we use the absolute URL
request(host + link, function (error,response, html) {
if (!error && response.statusCode == 200){
var $ = cheerio.load(html);
var proLink = $('.user-details > a').eq(0).attr("href");
callback({item,link,proLink});
}
});
}
Here is the code
request.post({
headers: {"content-type": "application/x-www-form-urlencoded"},
url: "https://testardor.jelurida.com/nxt?",
form:
{requestType: "sendMoney"}
},
function (error, response, body) {
if (!error && response.statusCode == 200){
var transactionBytes = JSON.parse(response.body).transactionBytes;
}
},
);
I would like to take transactionBytes and pass it into another API request after this. How do I make it a global variable? I tried global.transactionBytes and window.transactionBytes and that didn't work. I have also read that is bad to declare a global variable like this, is there a better way to do this?
If it is just one file you can declare global variable just declaring out side of your function.
var globalvariable = 0;
function ApiCall() {
request.post({
headers: {"content-type": "application/x-www-form-urlencoded"},
url: "https://testardor.jelurida.com/nxt?",
form:
{requestType: "sendMoney"}
},
function (error, response, body) {
if (!error && response.statusCode == 200){
var transactionBytes = JSON.parse(response.body).transactionBytes;
}
},
);
}
if you want to use same global variable in multiple files, you can create a helper file example
help.js
var globalVariable = 0;
module.exports = globalVariable
firstfile.js
var globalVariable = require('./help');
function ApiCall() {
request.post({
headers: {"content-type": "application/x-www-form-urlencoded"},
url: "https://testardor.jelurida.com/nxt?",
form:
{requestType: "sendMoney"}
},
function (error, response, body) {
if (!error && response.statusCode == 200){
var transactionBytes = JSON.parse(response.body).transactionBytes;
globalVariable = transactionBytes;
}
},
);
}
anotherfile.js
var globalVariable = require('./help');
function someotherthing() {
console.log(globalVariable)
}
Using a global variable is bad unless really required, other way is just passing the value to next function. Global variable can cause memory leaks if not used properly
how call a callback function, after all the functions are finished?
All functions must start at the same time, and when all functions finish running, run the callback
function step_one(callback){
parse1site();
parse2site();
parse3site();
parse4site();
parse5site();
parse6site();
parse7site();
parse8site();
parse9site();
parse10site();
parse11site();
parse12site();
parse13site();
parse14site();
parse15site();
parse16site();
parse17site();
parse18site();
parse19site();
parse20site();
}
Example function
function parse1site(){
var URL = "https://site1.com";
needle.get(URL, function(error, response){
if (!error && response.statusCode == 200){
data["site1"] = response.body;
console.log("OK");
} else{
console.log("error");
}
});
}
I would change your usage of needle to the promise API and then use Promise.all
var p1 = needle('get', 'https://server.com/posts/12');
var p2 = needle('get', 'https://server.com/posts/13');
//...
Promise.all([p1,p2]).then((data)=>{
// here you will get the response of all of your requests in array data
});
In my main code, I do the following:
var module = require('./module')
module.FooA(module.FooB);
module.js contains the next code:
var request = require('request'); //using of npm "request"
exports.FooB = function(data){ /*operations on data here*/ };
exports.FooA = function(callback){
var url = some_link;
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
callback(body);
};
});
};
The issue is that apparently, callback(body) doesn't run even if the conditions meet. var result = request(url) followed by exports.FooB(result) does the job, but as far as I can see, obviously does not act like a callback, and would produce troubles.
What is the proper way of defining a callback function in such a case? Do I need at all, or it is actually synchronous and I missed to notice it?
Use first function callback params with error, this is an default in node.js core and is google for your project functions.
And like #ShanSan commend, use console.log, console.error or console.trace for debug.
Example:
var request = require('request'); //using of npm "request"
exports.FooB = function(error, data){ /*operations on data here*/ };
exports.FooA = function(callback){
var url = some_link;
request(url, function (error, response, body) {
if (error || response.statusCode != 200) {
// pass error to callback and if use return you dont need other code block bellow
console.error('Error in request', error);
return callback(error, null);
}
// here run if dont have errors
// if need more info use the console.log(request); or console.log(body);
// use error in first param in callback functions
callback(null, body);
});
};
I have the below code and it is working fine to get:
<troveUrl>http://trove.nla.gov.au/work/23043869</troveUrl>
But I would like to get the value after 'id' in the following from the same page and cannot get it!
<work id="23043869" url="/work/23043869">
here is the code that i currently have
var request = require ('request'),
cheerio = require ('cheerio');
request('http://api.trove.nla.gov.au/result?key=6k6oagt6ott4ohno&zone=book&q-year1-date=2000&l-advformat=Thesis&l-australian=y&q-term2=&q-term3=&q-term0=&q-field1=title%3A&q-type2=all&q-field0=&q-term1=&q-type3=all&q-field3=subject%3A&q-type0=all&q-field2=creator%3A&q-type1=all&l-availability=y%2Ff&q=+date%3A[2000+TO+2014]&q-year2-date=2014&n=1', function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('troveurl').each(function(i, element){
var id = $(this);
console.log(id.text());
});
}
});
Any assistance appreciated.
You should pass xmlMode: true in the options object, then you can parse it as XML.
You can then grab the tag and data with $('tag').attr('attribute') and $('tag').text() to get the data between the tags as you've done.
var request = require('request'),
cheerio = require('cheerio');
request('http://api.trove.nla.gov.au/result?key=6k6oagt6ott4ohno&zone=book&q-year1-date=2000&l-advformat=Thesis&l-australian=y&q-term2=&q-term3=&q-term0=&q-field1=title%3A&q-type2=all&q-field0=&q-term1=&q-type3=all&q-field3=subject%3A&q-type0=all&q-field2=creator%3A&q-type1=all&l-availability=y%2Ff&q=+date%3A[2000+TO+2014]&q-year2-date=2014&n=1', function(error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html, {
xmlMode: true
});
console.log($('work').attr('id'))
}
});
The real issue lies in the syntax you used to get the value after 'id'. The following code will not console.log the id out.
var id = $(this);
console.log(id.text());
The correct syntax should be $('your element').attr('id') like is mentioned in Ben Fortune's answer above. However, passing xmlMode: true in the options object is not a necessity.
Passing xmlMode: false in the options will also work if you are using the correct syntax.