Request to API within while loop with accumulator - node.js

Many people have asked on this site how to loop through a list of URLs and make a GET request to each of them. This doesn't exactly serve my purpose, as the number of times I make a GET request will be dependent on the values I get from the initial API request.
As a general outline of what I currently have:
var total = null;
var curr = 0;
while (total == null || cur < total) {
request.get('https://host.com/skip=' + curr, function(error, response, body) {
var data = JSON.parse(body);
total = data['totalItems'];
curr += data.items.length;
}
}
Due to Node.js and how it uses asynchronous requests, this gives me a forever loop, as total and cur always stay as null and 0 respectively. I'm not really sure how to rework this to use Promises and callbacks, can someone please help?

So there's a few ways to do this, but the easiest is probably to just recurse on the function that fetches the results.
It's not tested but should be in the ballpark:
function fetch(skip, accumulator, cb) {
// do some input sanitization
request.get('https://host.com/skip=' + skip, (err, res, body) => {
// commonly you'd just callback the error, but this is in case you've fetched a number of results already but then got an error.
if(err) return cb(err, accumulator);
var data = JSON.parse(body);
accumulator.total: data['totalItems'];
accumulator.items.concat(data.items);
if(accumulator.items.length === accumulator.total) return cb(null, accumulator);
return fetch(accumulator.items.length, accumulator, cb);
});
}
fetch(0, { items: [] }, console.log);

Related

In Node/Express, how to match or compare values in nested promises using csv-to-array

I have a Node/Express partial that is being called with AJAX, and is supposed to send a status update back to the view, after 2 subsequent API calls are made. This workflow relies on the csv-to-array module to read a ship-orders.csv file, and determine if the second API call (POST to Shipments) has already occured. It is supposed to do this by matching the OrderNumber in the csv file to the returned OrderNumber from the FindOrders endpoint (the first API).
The problem is that I am creating 2 arrays of order numbers to compare, but matching the first set of order numbers to the second set either always returns true or always returns false, and it very clearly should show "true" for the first record in the csv, and "false" for every other.
Before getting into the bulk of the code, here's the promise that reads the csv file into an array:
csv-to-array:
var csvShipPromise = new Promise(function(resolve, reject){
var csvColumns = ['ChannelName', 'OrderNumber', 'LineNumber', 'WarehouseCode', 'Qty', 'Carrier', 'TrackingNumber', 'Shipdate', 'ShipMethod'];
var csvShipArr;
var csvArr;
csvArray({
file: shipLog,
columns: csvColumns
}, function(err, array){
csvShipArr = array;
resolve(csvShipArr);
});
});
Next I have a long promise that gets executed when the request to the partial is made. The comparison between logged OrderNumbers and OrderNumbers that need to be posted to Shipments is the 5th "then" block (and it's commented in the code below).
router.get and chained promise:
router.get('/', function(req, res, next) {
findPromise.then(function(findData){
//Properly format xml string
var foundData = replaceAll(findData, '<', '<');
foundData = replaceAll(foundData, '>', '>');
return foundData;
}).then(function(foundData){
//Parse xml to JSON and stringify
var parsedFound;
parseString(foundData, function(err, result){ //uses an xml to json module
parsedFound = JSON.stringify(result);
});
return(parsedFound);
}).then(function(parsedStr){
//Parse JSON and return an array of objects
var parsedJson = JSON.parse(parsedStr);
var orders = parsedJson['soap:Envelope']['soap:Body'][0]['FindOrders'][0]['orders'][0]['order'];
return orders;
}).then(function(orders){
//Get only orders with a tracking number.
var trackArray = [];
var ord;
for(ord in orders){
var postObj = orders[ord];
if(postObj.TrackingNumber[0].length > 1){
trackArray.push(postObj);
}
}
return trackArray; //array of orders that contain tracking numbers
}).then(function(trackArray){
/**** This is the block that is causing problems. *****/
var tItm;
var loggedOrders = [];
for(tItm in trackArray){
var alreadyLogged = false;
var trackedItm = trackArray[tItm];
var trackedOrderNum = trackedItm.ReferenceNum;
csvShipPromise.then(function(csvOrders){
var csv;
var loggedOrderArr = [];
for (csv in csvOrders){
var csvItm = csvOrders[csv];
var csvOrderNum = csvItm.OrderNumber; //gets the OrderNumber as expected
loggedOrderArr.push(csvOrderNum);
}
return loggedOrderArr; //Return a simple array of all OrderNumbers
}).then(function(loggedOrderArr){
console.log(loggedOrderArr);
console.log(trackedOrderNum);
var ord;
for (ord in loggedOrderArr){
if(trackedOrderNum == loggedOrderArr[ord]){
console.log('found');
alreadyLogged = true;
}
else {
console.log('not found');
alreadyLogged = false;
}
}
return loggedOrderArr; //Simply returning this value because the alreadyLogged test isn't working.
});
/* Here is where the test fails.
It shouldn't, because there are, say, 4 OrderNumbers in the result of the first API call,
and only 1 Order number logged in the CSV.
So it should be true once, and false 3 times.
But it is true all the time.
*/
if(alreadyLogged){
console.log('found'); //Always logs true/found.
} else {
console.log('not found');
}
}
return trackArray; //Just passing the array to the view, for now.
}).then(function(obj){
res.send(obj);
return(obj);
}).catch(function(err){
console.log(err);
});
});
When I console.log the values of trackArray and loggedOrderArr, I see that there should be an intersection between an array of 4 values and an array of 1 value, but for some reason the comparison, if(trackedOrderNumber == loggedOrderArr[ord]) isn't working.
Alright, I'm gonna be honest, your code made my eyes swim. but as far as I can tell, a few things pop up:
move var alreadyLogged = false; to before the loop;
then add alreadyLogged = false; after if(alreadyLogged) statement
I think it has to do with scope. You are basically checking bool value of a var that has not changed yet because your promises has not resolved at the point of if(alreadyLogged)
Might I suggest a different approach?
why not make use of array.indexOf() ?
lets say you have two arrays to compare arrA & arrB; you can see if an item exists like so:
var index = arrA.indexOf(arrB[0]);
if(index == -1){
console.log('No Match');
}
else{
console.log('Match found');
}
no need for any preset flags to see if one array contains an element.
Hope it helps.
A bit more context:
var index = loggedOrderArray.indexOf(trackedOrderNum);
if(index == -1){
console.log('No Match');
// -1 basicaly means that there is not instance of trackedOrderNum in loggedOrderArray
}
else{
console.log('Match found');
}
What you are attempting appears to be reasonably simple. You are just overwhelming yourself with awkward flow control and bulky code.
As it stands, asynchronous flow isn't quite right chiefly due to parseString() not being promisified. A value returned from a raw nodeback won't propagate down a .then chain.
In addition, asynchronous flow will improve with :
application of Promise.all() up front to aggregate the two essential data-delivering promises csvShipPromise and findPromise.
the realisation that wholly synchronous steps in a promise chain can be merged with next step.
And, the bulk of the synchronous code will reduce by employing several Array methods:
Array.prototype.filter()
Array.prototype.map()
Array.prototype.includes()
Boiling it down to somewhere near the bare minimum, I get the following router.get() expression:
router.get('/', function(req, res, next) {
return Promise.all([csvShipPromise, findPromise])
.then([csvOrders, findData] => { // destructuring
let loggedOrderArr = csvOrders.map(order => order.OrderNumber);
let foundData = replaceAll(findData, '<', '<');
foundData = replaceAll(foundData, '>', '>');
return new Promise((resolve, reject) => { // promisify parseString() on the fly
parseString(foundData, (err, result) => {
if(err) reject(err);
else resolve(result['soap:Envelope']['soap:Body'][0].FindOrders[0].orders[0].order); // does this expression really return `orders` (plural)?
});
})
.then(orders => {
let trackArray = orders.filter(postObj => postObj.TrackingNumber[0].length > 1); // filter orders to eliminate those without a tracking number.
let loggedOrders = trackArray.filter(trackedItm => loggedOrderArr.includes(trackedItm.ReferenceNum));
// let unloggedOrders = trackArray.filter(trackedItm => !loggedOrderArr.includes(trackedItm.ReferenceNum));
res.send(loggedOrders); // or res.send(unloggedOrders), depending on what you want.
});
})
.catch(err => {
console.log(err);
res.error(err); // or similar
});
});
untested - I may have made mistakes, though hopefully ones that are simple to correct

Concatenating API responses in the correct sequence asynchronously

So I'm building a simple wrapper around an API to fetch all results of a particular entity. The API method can only return up to 500 results at a time, but it's possible to retrieve all results using the skip parameter, which can be used to specify what index to start retrieving results from. The API also has a method which returns the number of results there are that exist in total.
I've spent some time battling using the request package, trying to come up with a way to concatenate all the results in order, then execute a callback which passes all the results through.
This is my code currently:
Donedone.prototype.getAllActiveIssues = function(callback){
var url = this.url;
request(url + `/issues/all_active.json?take=500`, function (error, response, body) {
if (!error && response.statusCode == 200) {
var data = JSON.parse(body);
var totalIssues = data.total_issues;
var issues = [];
for (let i=0; i < totalIssues; i+=500){
request(url + `/issues/all_active.json?skip=${i}&take=500`, function (error, response, body){
if (!error && response.statusCode == 200) {
console.log(JSON.parse(body).issues.length);
issues.concat(JSON.parse(body).issues);
console.log(issues); // returns [] on all occasions
//callback(issues);
} else{
console.log("AGHR");
}
});
}
} else {
console.log("ERROR IN GET ALL ACTIVE ISSUES");
}
});
};
So I'm starting off with an empty array, issues. I iterate through a for loop, each time increasing i by 500 and passing that as the skip param. As you can see, I'm logging the length of how many issues each response contains before concatenating them with the main issues variable.
The output, from a total of 869 results is this:
369
[]
500
[]
Why is my issues variable empty when I log it out? There are clearly results to concatenate with it.
A more general question: is this approach the best way to go about what I'm trying to achieve? I figured that even if my code did work, the nature of asynchronicity means it's entirely possible for the results to be concatenated in the wrong order.
Should I just use a synchronous request library?
Why is my issues variable empty when I log it out? There are clearly
results to concatenate with it.
A main problem here is that .concat() returns a new array. It doesn't add items onto the existing array.
You can change this:
issues.concat(JSON.parse(body).issues);
to this:
issues = issues.concat(JSON.parse(body).issues);
to make sure you are retaining the new concatenated array. This is a very common mistake.
You also potentially have sequencing issues in your array because you are running a for loop which is starting a whole bunch of requests at the same time and results may or may not arrive back in the proper order. You will still get the proper total number of issues, but they may not be in the order requested. I don't know if that is a problem for you or not. If that is a problem, we can also suggest a fix for that.
A more general question: is this approach the best way to go about
what I'm trying to achieve? I figured that even if my code did work,
the nature of asynchronicity means it's entirely possible for the
results to be concatenated in the wrong order.
Except for the ordering issue which can also be fixed, this is a reasonable way to do things. We would have to know more about your API to know if this is the most efficient way to use the API to get your results. Usually, you want to avoid making N repeated API calls to the same server and you'd rather make one API call to get all the results.
Should I just use a synchronous request library?
Absolutely not. node.js requires learning how to do asynchronous programming. It is a learning step for most people, but is how you get the best performance from node.js and should be learned and used.
Here's a way to collect all the results in reliable order using promises for synchronization and error propagation (which is hugely useful for async processing in node.js):
// promisify the request() function so it returns a promise
// whose fulfilled value is the request result
function requestP(url) {
return new Promise(function(resolve, reject) {
request(url, function(err, response, body) {
if (err || response.statusCode !== 200) {
reject({err: err, response: response});
} else {
resolve({response: response, body: body});
}
});
});
}
Donedone.prototype.getAllActiveIssues = function() {
var url = this.url;
return requestP(url + `/issues/all_active.json?take=500`).then(function(results) {
var data = JSON.parse(results.body);
var totalIssues = data.total_issues;
var promises = [];
for (let i = 0; i < totalIssues; i+= 500) {
promises.push(requestP(url + `/issues/all_active.json?skip=${i}&take=500`).then(function(results) {
return JSON.parse(results.body).issues;
}));
}
return Promise.all(promises).then(function(results) {
// results is an array of each chunk (which is itself an array) so we have an array of arrays
// now concat all results in order
return Array.prototype.concat.apply([], results);
})
});
}
xxx.getAllActiveIssues().then(function(issues) {
// process issues here
}, function(err) {
// process error here
})

Node.js: async.map getting slower

Hello,
I use Node.js to provide an API for storing data on a MongoDB database.
I ran multiple tests on a read method, which takes ids and returns the corresponding documents. The point is that I must return these documents in the specified order. To ensure that, I use the following code:
// Sequentially fetch every element
function read(ids, callback) {
var i = 0;
var results = [];
function next() {
db.findOne(ids[i], function (err, doc) {
results.push(err ? null : doc);
if (ids.length > ++i) {
return next();
}
callback(results);
});
}
next();
}
This way, documents are fetched one-by-one, in the right order. It takes about 11s on my laptop to retrieve 27k documents.
However, I thought that it was possible to improve this method:
// Asynchronously map the whole array
var async = require('async');
function read(ids, callback) {
async.map(ids, db.findOne.bind(db), callback):
}
After running a single test, I was quite satisfied seeing that the 27k documents were retrieved in only 8s using simpler code.
The problem happens when I repeat the same request: the response time keeps growing (proportionally to the number of elements retrieved): 9s 10s 11s 12s.... This problem does not happen in the sequential version.
I tried two versions of Node.js, v6.2.0 and v0.10.29. The problem is the same. What causes this latency and how could I suppress it?
Try to use async.mapLimit to prevent overload. You need some tests to tune limit value with your environment.
But find({_id: {$in: list}}) is always better, because single database request instead of multiple.
I suggest you to try to perform restore of original order client-side.
Something like this:
function read(ids, cb) {
db.find(
{_id: {$in: ids.map(id => mongoose.Types.ObjectId(id))}},
process
);
function process(err, docs) {
if (err) return cb(err);
return cb(null, docs.sort(ordering))
}
function ordering(a, b) {
return ids.indexOf(b._id.toString()) - ids.indexOf(a._id.toString());
}
}
May be, find query needs to be corrected, I can't to know what exact mongodb driver you use.
This code is first-try, more manual sorting can improve performance alot. [].indexOf is heavy too(O(n)).
But I'm almost sure, even as-is now, it will work much faster.
Possible ordering replacement:
var idHash = {};
for(var i = 0; i < ids.length; i++)
idHash[ids[i]] = i;
function ordering(a, b) {
return idHash[b._id.toString()] - idHash[a._id.toString()];
}
Any sort algorithm has O(nlogn) in best case, but we already know result position of each found document, so, we can restore original order by O(n):
var idHash = ids.reduce((c, id, i) => (c[id] = i, c), {});
function process(err, docs) {
if (err) return cb(err);
return cb(null,
docs.reduce(
(c, doc) => (c[idHash[doc._id.toString()]] = doc, c),
ids.map(id => null))) //fill not_found docs by null
}
Functional style makes code flexier. For example this code can be easy modified to use async.reduce to be less sync-blocking.

Perform arbitrary set of asynchronous tasks

My input is streamed from another source, which makes it difficult to use async.forEach. I am pulling data from an API endpoint, but I have a limit of 1000 objects per request to the endpoint, and I need to get hundreds of thousands of them (basically all of them) and I will know they're finished when the response contains < 1000 objects. Now, I have tried this approach:
/* List all deposits */
var depositsAll = [];
var depositsIteration = [];
async.doWhilst(this._post(endpoint_path, function (err, response) {
// check err
/* Loop through the data and gather only the deposits */
for (var key in response) {
//do some stuff
}
depositsAll += depositsIteration;
return callback(null, depositsAll);
}, {limit: 1000, offset: 0, sort: 'desc'}),
response.length > 1000, function (err, depositsAll) {
// check for err
// return the complete result
return callback(null, depositsAll);
});
With this code I get an async internal error that iterator is not a function. But in general I am almost sure the logic is not correct as well.
If it's not clear what I'm trying to achieve - I need to perform a request multiple times, and add the response data to a result that at the end contains all the results, so I can return it. And I need to perform requests until the response contains less than 1000 objects.
I also looked into async.queue but could not get the hang of it...
Any ideas?
You should be able to do it like that, but if that example is from your real code you have misunderstood some of how async works. doWhilst takes three arguments, each of them being a function:
The function to be called by async. Gets argument callback that must be called. In your case, you need to wrap this._post inside another function.
The test function (you would give value of response.length > 1000, ie. a boolean, if response would be defined)
The final function to be called once execution is stopped
Example with each needed function separated for readability:
var depositsAll = [];
var responseLength = 1000;
var self = this;
var post = function(asyncCb) {
self._post(endpoint_path, function(err, res) {
...
responseLength = res.length;
asyncCb(err, depositsAll);
});
}
var check = function() {
return responseLength >= 1000;
};
var done = function(err, deposits) {
console.log(deposits);
};
async.doWhilst(post, check, done);

What if I need synchronicity for users to have a chance to respond to output before giving input?

Imagine implementing an fdupes sort of scenario in Node.js. It seems impossible. What are people's suggestions for this?
Take the 'prompt' module on npm. This is roughly what my code looks like:
for(var i = 0; i < 50; i++) {
log('shit up console with lots of messy and space-consuming output, pushing everything else off the screen.');
prompt.get('foo', function(err, resp){
doSomethingWith(resp.foo);
});
}
Before the user could even have time to enter their first response, 50 more sets of output have crowded the information that they would need to make a decision about their second response off of the screen.
This seems like a major fail of node's too-hip-for synchronicity gimmick, does it not? Am I missing something?
This is rather another case where you can't (always) apply the same kind of coding pattern that you'd use when programming in a strictly synchronous language.
One way to solve the problem in Node:
function showPrompt( i ) {
log('fill console with lots of messy and space-consuming output, pushing everything else off the screen.');
prompt.get('foo', function(err, resp) {
doSomethingWith(resp.foo);
if( i < 50 ) {
showPrompt( i + 1 );
}
});
}
showPrompt( 0 );
If you want to loop over some asynchronous functions, you should try using async's timesSeries, which applies a function to n times in series. If any function returns an error, a main error handler will be called.
Here is an example, using your code:
var async = require('async');
async.timesSeries(50, function (n, next) {
prompt.get('foo', function (err, res) {
var value = doSomethingWith(resp.foo);
if (value !== 'bar') next(new Error('value !== bar'));
else next(null, value);
}
}, function (err, res) {
// err !== null
// or res is an array with 50 elements
});

Resources