I'm working on an application which have to read excel files and send each line to a server;
Now I would like to do it asynchronously, I mean I would like to get the result of server.postRequest(1); before sending another request server.postRequest(2);
for (let row = 2; row <= nrows; row++) {
Excel.mapExcelLine(workbook.Sheets[firstSheetName], row, ncols)
.then((data) => {
let idExcel = data.id;
database.get("SELECT * FROM sync WHERE idExcel=?", [idExcel], async function (err, row) {
if (row == undefined) {
let promise = await server.postRequest(row.id);
}
}
});
}
Here is the postRequest function
postRequest(id){
return new Promise(function (resolve, reject) {
options.body = id;
request.post(options, function (error, response, body) {
if (response.headers['msg'] == 'authrequired') {
request.post(authOptions, function (error, response, body) {
if(response.headers['msg'] == 'ok')
postRequest(id);
else resolve(402);
});
}
else if(response.headers['msg'] == 'ok'){
logger.info('Demand' + id + 'created'
resolve(200);
}
});
});
}
Now when I'm trying to loop through this array for example:
[1,2,3,4]
messages from postResquest function don't appear in the right order.
How to wait for each request to finish before sending another one ?
the expected output is:
Demand 1 created
Demand 2 created
Demand 3 created
Demand 4 created
You must use await for the requests you are sending asynchronously
Related
In my Nodejs application I need to select a set of data from the database by some queries. An amount of queries not defined at the start and I need to check after each DB query whether I need to make another query or not. So the process like this
var total_result = [];
var n = 0;
db.query('.....query...', function(result, error){
// Callback with db data response
total_results.push(...result...);
n += result.length;
if (n < req_n) {
// ... Here is the code to repeat the same query ...
// ?????
}
});
Thank you.
var total_result = [],
n = 0
while(await query())
async function query() {
return new Promise((resolve, reject) => {
db.query('...query...', (result, error) => {
if(error) reject(error)
// Callback with db data response
total_results.push(...result...)
n += result.length
if (n < req_n) resolve(true)
else resolve(false)
})
})
}
After I perform a request to a .m3u8 Master Playlist Url like https://something.example.com/master.m3u8 with
request(input, function (error, response, body) {
...
}
and after some processing I get back a set of renditions, I store them into an object with some other things prepared, result is like
var set = {
input_url:'https://something.example.com/master.m3u8',
renditions:[
{
id:0,
url:'https://something.example.com/master_264.m3u8',
chunks:[]
}, {
id:1,
url:'https://something.example.com/master_578.m3u8',
chunks:[]
}, {
id:2,
url:'https://something.example.com/master_928.m3u8',
chunks:[]
}]
};
So I have my set of renditions stored in order from lowest to highest rendition. Now I need to perform another request for each single rendition and store their response into the chunks of each rendition.
Problem: After the first callback I get an error message TypeError: Cannot read property 'chunks' of undefined - Here is my code
for (var i = 0; i < set.renditions.length; i++) {
var done = false;
request(set.renditions[i].url, function(error, response, body) {
set.renditions[i].chunks.push(body);
completed_requests++;
if (completed_requests == set.renditions.length) {
return callback(null,set);
}
});
}
I believe this has something to do with a scope ? If am not referencing to original var set within the callback, but push the repsonses to another array, just as they come in, it works, but then they get stored out of order of course. So first the repsonse of 5th rendition, then 3rd rendition, then 4th rendition and so on (as it is an asynch operation) .
Any ideas how I should solve this, maybe something completely different ? Best practice ? Thanks !
=======================
First Try:
I tried using a forEach, but seems I don't get any result from it ...
function chunks (rendition_set, callback) {
var request_complete = false;
var completed_requests = 0
rendition_set.renditions.forEach ( function (entry) {
request(entry.url, function(error, response, body) {
var split = body.split('\n');
for (var i = 0; i < split.length; i++) {
if (split[i].indexOf('.ts') != -1 ) {
entry.chunks.push(split[i]);
}
}
completed_requests++;
});
})
while(request_complete == false) {
if (completed_requests == rendition_set.renditions.length) {
request_complete = true;
return callback(null,rendition_set);
}
}
}
So I ended up using async which basically handles all requests for all urls automatically, waits until everything is finished and gives me the result back in one package & in order the request was made ... which is pretty neat.
async.map(urls, httpGet, function (err, res) {
if (err) return console.log(err);
return callback (null, res)
});
So this is the code I used to crawl my pages (i'm using request and cheerio modules:
for (let j = 1; j < nbRequest; j++)
{
const currentPromise = new Promise((resolve, reject) => {
request(
`https://www.url${j}`,
(error, response, body) => {
if (error || !response) {
console.log("Error: " + error);
}
console.log("Status code: " + response.statusCode + ", Connected to the page");
var $ = cheerio.load(body);
let output = {
ranks: [],
names: [],
numbers: [],
};
$('td.rangCell').each(function( index ) {
if ($(this).text().trim() != "Rang")
{
output.ranks.push($(this).text().trim().slice(0, -1));
nbRanks = nb_ranks+1;
}
});
$('td.nameCell:has(label)').each(function( index ) {
output.names.push($(this).find('label.nameValue > a').text().trim());
});
$('td.numberCell').each(function( index ) {
if ($(this).text().trim() != "Nombre")
{
output.numbers.push($(this).text().trim());
}
});
console.log("HERE 1");
return resolve(output);
}
);
});
promises.push(currentPromise);
}
after that I'm parsing and saving the result in a csv file using a node module.
At this point i've been able to crawl about 100 pages, but when it comes to much bigger numbers (1000+) I'm receiving a 500 response meaning that i'm being kicked i think.
So i think the best solution is to delay requests, but i didn't find the solution.
Do you guys have any idea and how the code would look like ?
what you are looking for is called "Control Flow", you can achieve this by using async.queue for example.
If you add every request to the the queue you can control the amount of parallel requests with the amount of workers. And you could add setTimeouts to the final part of the request's callback to achieve the delaying of requests.
Additionally I'd suggest using a "crawler" package (instead of building your own) e.g. npm-crawler as they ship with build in rate-limiting and have already taken care of other things that you might face next :) e.g. user-agent pool
Update:
const async = require("async");
const delayTime = 1500; //wait 1,5 seconds after every new request
getRequestPromise(csvLine){
return new Promise( make you request here );
}
const asyncQueue = async.queue(function(task, callback) {
getRequestPromise(task).then(_ => {
setTimeout(() => {
callback(null);
}, delayTime);
});
}, 1); //1 one request at a time
for(csv){ //pseudo
asyncQueue.push(csv[i], () => {});
}
asyncQueue.drain = () => {
console.log("finished.");
};
Basically what I wanted to do is wait and get the first two functions results and pass that value to 3rd function. Therefore with the Node Q module I tried the below code.
getAddressDetail("", 51.528308, -0.3817812).then(function (pickupLoc) {
return pickupLoc.location;
}).then(function (pickupLocation) {
var drop = getAddressDetail(, 51.528308, -0.3817812);
return [pickupLocation,drop.location];
})
.then(function (pickupLocation, dropLocation) {
console.log("#####" + pickupLocation +"$$$" + dropLocation)
})
.done();
EDIT
function getAddressDetail(location = "", lat, long) {
var deferred = Q.defer();
var getLocation = "";
if (location == '' || location == 'undefined') {
var url = 'https://maps.googleapis.com/maps/api/geocode/json?key={APIKEY}&latlng=' + lat + ',' + long + '&sensor=false';
request({
url: url,
json: true
}, function(error, response, body) {
if (!error && response.statusCode === 200) {
getLocation = body.results[0].formatted_address;
deferred.resolve({
'location': getLocation
});
//console.log("*******" + getLocation);
}
})
} else {
getLocation = location;
deferred.resolve({
'location': getLocation
});
}
return deferred.promise;
}
However this code doesn't return value which defined in the 2nd then code "dropLocation", it return as undefined. Do you see any issues in here?
Thanks in advance.
There are a couple of dodgy things happening in your code.
return [pickupLocation,drop.location] wil result in 1 parameter (an array) in the next handler, so instead of .then(function (pickupLocation, dropLocation) {}) you should use .then(function (results) {})
var drop = getAddressDetail(, 51.528308, -0.3817812);looks like it isn't a promise because you immediately get the location from the result. (return [pickupLocation,drop.location];) So why then not fetch this value in the next handler?
If getAddressDetail() DOES return a promise, just compose an array of promises and use the .spread() operation as the result of the first promise is not required in the second promise.
An example;
var promiseArray = [];
promiseArray.push(getAddressDetail("", 51.528308, -0.3817812));
promiseArray.push(getAddressDetail("", 51.528308, -0.3817812));
Q.spread(promiseArray, function(pickupLocation, drop){
// first parameter (pickupLocation) = result of first promise
// second parameter (drop) = result of second promise
});
I'm using cheerio, request and Node.js.
When I run the script below, it outputs names in a wrong order. I believe that it's caused by asynchronous nature of it, how can I make it work in the "right" order? Do I need to use a sync package or is there a way to change it in a way so it'll work in a sync way?
app.get('/returned', function (req, res) {
for (var y = 0; y < 10; y++) {
var url = "http://example.com" + y + "/person.html";
request(url, function (err, resp, body) {
$ = cheerio.load(body);
var links = $('#container');
var name = links.find('span[itemprop="name"]').html(); // name
if (name == null) {
console.log("returned null");
} else {
console.log(name);
}
});
}
});
Promise makes this relatively easy:
app.get('/returned', function (req, res) {
let urls = [];
for (let y = 0; y < 10; y++) {
urls.push('http://example.com' + y + '/person.html');
}
Promise.all(urls.map(function (url) {
return new Promise(resolve, reject) {
request(url, function (err, resp, body) {
if (err) {return reject(err);}
let $ = cheerio.load(body);
let links = $('#container');
let name = links.find('span[itemprop="name"]').html(); // name
resolve({name: name, links: links, url: url});
});
});
}).then(function (result) {
result.forEach(function (obj) {
if (obj.name == null) {
console.log(obj.url, "returned null");
} else {
console.log(obj.url, obj.name);
}
});
}).catch(function (err) {
console.log(err);
});
});
I started by creating an array of urls to get, then I mapped that to an array of promises. When each of the requests are complete, i resolved the promise with the name, url, and links. When all promises were complete, I then looped over the result which will will be in the original order. This runs in parallel.
Nope, you shouldn't have to use a sync package. IMO the cleanest way is to use a mature 3rd party library.
I'd recommend async.
The async.series method would execute all request functions in the order they are given, then allow you to register a callback to fire when all requests have been made, or when an error has occurred.
https://github.com/caolan/async#seriestasks-callback