Batching and Queuing API calls in Node - node.js

I am hitting an API that takes in addresses and gives me back GPS coordinates. The API only accepts a single address, but it can handle 50 live connections at any given time. I am trying to build a function that will send 50 requests, wait until they all return and send 50 more. Or send 50 request and send the next one as a previous is returned. Below is the code I have been working with, but I am stuck.
One issue is in batchFunct. The for loop sends all the API calls, doesn’t wait for them to come back, then runs the if statement before updating returned. This makes since considering the asynchronicity of Node. I tried to put an await on the API call, but that seemingly stops all the async process (anyone have clarification on this) and effectively makes it send the requests one at a time.
Any advice on adapting this code or on finding a better way of batching and queuing API requests?
const array = ['address1', 'address2', 'address3', 'address4', '...', 'addressN']
function batchFunc(array) {
return new Promise(function (resolve, reject) {
var returned = 1
for (let ele of array) {
apiCall(ele).then(resp => { //if but an await here it will send one at a time
console.log(resp)
returned++
})
};
if (returned == array.length) {
resolve(returned);
}
})
}
async function batchCall(array) {
while (array.length > 0) {
let batchArray = []
if (array.length > 50) {
for (let i = 0; i < 50; i++) {
batchArray.push(array[0])
array.splice(0, 1)
}
} else {
batchArray = array
array = []
}
let result = await batchFunc(batchArray);
console.log(result);
}
}
batchCall(array)

I ended up using the async.queue, but I am still very interested in any other solutions.
const array = ['address1', 'address2', 'address3', 'address4', 'address5', 'address6']
function asyncTime(value) {
return new Promise(function (resolve, reject) {
apiCall(ele).then(resp => {
resolve(resp)
})
})
}
function test(array) {
var q = async.queue(async function(task, callback) {
console.log(await asyncTime(task))
if(callback) callback()
}, 3);
q.push(array, function(err) {
if (err) {
console.log(err)
return
}
console.log('finished processing item');
});
}

Related

Synchronous while-loop and array push

I'm new to NodeJS and I'm currently working on node-soap module that can be found from https://github.com/vpulim/node-soap.
I'm calling a Web Service by using the referenced module. I need to return the SOAP client's response to user's web browser, however, the problem is that the response gets returned before it is fetched to an array. This is related to asynchronous way of working.
The second problem is that I need to call the Web Service again until I get specific amount of results. Each result will be pushed to the same array. How to do this? I need to return this array to user but as described before, it is always empty.
How would you use soap.createClientAsync and client.methodAsync in this case?
I have already tried writing a while-loop that continues until I get specific amount of results. I tried wrapping soap.createClient to a promise as well as soap.method. Those promises are in different functions and I tried to call them in async function which returns the array.
function createSoapClient() {
return new Promise(function(resolve, reject) {
var url = '...';
soap.createClient(url, function(err, client) {
if (err) {
reject(err);
}
resolve(client);
});
});
}
function fetchServiceCustomers(client) {
return new Promise(function(resolve, reject) {
var args = {...};
client.method(args, function(error, result, rawResponse, soapHeader, rawRequest) {
if (error) {
reject(error);
}
resolve(result);
}, {timeout: 60 * 1000});
});
}
exports.getServiceCustomers = async function() {
let client = await createSoapClient();
var results = 0,
completeResult = [];
while (results <= 0 || results >= 10000) {
completeResult.push(await fetchServiceCustomers(client);
results = completeResult[completeResult.length - 1];
console.log(results);
}
return completeResult;
}

Multiple and fast external POST requests

I have a node.js server that making POST requests to an external API, each time I have to make ~10k requests (don't worry I'm not abusing the API) and I need that it will take around 2-3 minutes.
I'm using request-promise library in order to make the requests along with Promise.all() to wait for all the requests to resolve.
My problem is that the requests seems stuck and not running in parallel, I know that the promise executes as soon it's created but it seems that the resolve event can only listen to about 10 events at one time.
I tried updating the maxListeners and also using es6-promise-pool (with pool of 500) but no luck.
My next solution will probably be to use child-process with fork, will this solution seems the best for my problem?
Thanks!
code:
async function send_msg(msg) {
return new Promise(function (resolve, reject) {
request.post(options, function (err, res, body) {
if (err) {
logger.error('error sending msg ' + err);
resolve(null);
} else {
resolve(body);
}
})
});
}
}
async function send_msgs() {
let msgs = await OutgoingMessage.findAll();
for (let i = 0; i < msgs.length; i++) {
promises.push(send_msg(msgs[i]).then(async (result) => {
if (result != null) {
try {
let sid = result['MessageSid'];
let status = result['Status'];
msgs[i].update({sid: sid, status: status});
} catch (e) {
logger.error(e + JSON.stringify(result));
msgs[i].update({status: 'failed'});
}
}
}));
}
return Promise.all(promises);
}

Run parallel promises and discard rest when two of them resolved

I want to collect data from 10 different servers at the same time and when i got response 2 of them, i want to ignore/skip others. I solved this with promise.all() When i got 2 response, i call reject instead of resolve and do my job in catch(). But this seems a bit tricky to me, is there any better way to do this?
Here is something similar to the BlueBird Promise.some method suggested by #Neverever
function awaitSome(promises, count) {
if (!Array.isArray(promises) || promises.length < count) {
return Promise.reject();
}
return new Promise((resolve, reject) => {
const results = [];
const errors = [];
let completed = false;
promises.forEach((p) => {
p.then((result) => {
if (!completed) {
results.push(result);
if (results.length === count) {
completed = true;
resolve(results);
}
}
}).catch((err) => {
if (!completed) {
errors.push(err);
if (promises.length - errors.length < count) {
completed = true;
reject(errors);
}
}
});
});
});
}
And the usage whould be
awaitSome(
[
Promise.resolve(1),
Promise.reject(1),
Promise.resolve(1),
Promise.resolve(1)
],
2
)
.then((results) => {
console.log(results);
})
.catch((errors) => {
console.error(errors);
});
You should take a look at the BlueBird Promise.some
http://bluebirdjs.com/docs/api/promise.some.html
Given an Iterable(arrays are Iterable), or a promise of an Iterable, which produces promises (or a mix of promises and values), iterate over all the values in the Iterable into an array and return a promise that is fulfilled as soon as count promises are fulfilled in the array. The fulfillment value is an array with count values in the order they were fulfilled.
This example pings 4 nameservers, and logs the fastest 2 on console:
Promise.some([
ping("ns1.example.com"),
ping("ns2.example.com"),
ping("ns3.example.com"),
ping("ns4.example.com")
], 2).spread(function(first, second) {
console.log(first, second);
});
You can construct two Promises and put their resolves into an array. When one response comes back, pop() one of the resolves and call it. Repeat for the second response. Further responses can be ignored because the array is now empty.
Then, you can call Promise.all on the two constructed Promises, which will resolve when both resolves have been called.
const api = () => new Promise(res => {
const timeout = Math.random() * 3000;
setTimeout(() => {
console.log('resolving ' + timeout);
res(timeout);
}, timeout)
});
const resolves = [];
const prom1 = new Promise(resolve => resolves.push(resolve));
const prom2 = new Promise(resolve => resolves.push(resolve));
for (let i = 0; i < 10; i++) {
api().then(res => {
if (!resolves.length) return;
resolves.pop()(res);
});
}
Promise.all([prom1, prom2])
.then(([res1, res2]) => {
console.log('got 2 responses', res1, res2);
});
As you can see in the code above, although the latest 3rd to 10th API calls resolve, the Promise.all resolves as soon as the first and second quickest resolve.

limiting number of parallel request to cassandra db in nodejs

I currently parsing a file and getting its data in order tu push them in my db. To do that I made an array of query and I execute them through a loop.
The problem is that I'm limited to 2048 parallel requests.
This is the code I made:
index.js=>
const ImportClient = require("./scripts/import_client_leasing")
const InsertDb = require("./scripts/insertDb")
const cassandra = require('cassandra-driver');
const databaseConfig = require('./config/database.json');
const authProvider = new cassandra.auth.PlainTextAuthProvider(databaseConfig.cassandra.username, databaseConfig.cassandra.password);
const db = new cassandra.Client({
contactPoints: databaseConfig.cassandra.contactPoints,
authProvider: authProvider
});
ImportClient.clientLeasingImport().then(queries => { // this function parse the data and return an array of query
return InsertDb.Clients(db, queries); //inserting in the database returns something when all the promises are done
}).then(result => {
return db.shutdown(function (err, result) {});
}).then(result => {
console.log(result);
}).catch(error => {
console.log(error)
});
insertDb.js =>
module.exports = {
Clients: function (db, queries) {
DB = db;
return insertClients(queries);
}
}
function insertClients(queries) {
return new Promise((resolve, reject) => {
let promisesArray = [];
for (let i = 0; i < queries.length; i++) {
promisesArray.push(new Promise(function (resolve, reject) {
DB.execute(queries[i], function (err, result) {
if (err) {
reject(err)
} else {
resolve("success");
}
});
}));
}
Promise.all(promisesArray).then((result) => {
resolve("success");
}).catch((error) => {
resolve("error");
});
});
}
I tried multiple things, like adding an await function thats set a timout in my for loop every x seconds (but it doesn't work because i'm already in a promise), i also tried with p-queue and p-limit but it doesn't seems to work either.
I'm kinda stuck here, I'm think I'm missing something trivial but I don't really get what.
Thanks
When submitting several requests in parallel (execute() function uses asynchronous execution), you end up queueing at one of the different levels: on the driver side, on the network stack or on the server side. Excessive queueing affects the total time it takes each operation to complete. You should limit the amount of simultaneous requests at any time, also known as concurrency level, to get high throughput and low latency.
When thinking about implementing it in your code, you should consider launching a fixed amount of asynchronous executions, using your concurrency level as a cap and only adding new operations once executions within that cap completed.
Here is an example on how to limit the amount of concurrent executions when processing items in a loop: https://github.com/datastax/nodejs-driver/blob/master/examples/concurrent-executions/execute-in-loop.js
In a nutshell:
// Launch in parallel n async operations (n being the concurrency level)
for (let i = 0; i < concurrencyLevel; i++) {
promises[i] = executeOneAtATime();
}
// ...
async function executeOneAtATime() {
// ...
// Execute queries asynchronously in sequence
while (counter++ < totalLength) {;
await client.execute(query, params, options);
}
}
Ok, so I found a workaround to reach my goal.
I wrote in a file all my queries
const fs = require('fs')
fs.appendFileSync('my_file.cql', queries[i] + "\n");
and i then used
child_process.exec("cqls --file my_file", function(err, stdout, stderr){})"
to insert in cassandra all my queries

Using promises to control flow is not working properly

I am trying to control the flow of the execution in my code below, meaning I want it to be serial.
I am reading and updating data from and to my DB, and ofc I want that to happen in the correct order. Below is the function I am calling my DB from, the queries functions are wrapped in callbacks.
I am pretty new to promises so perhaps the error might be something silly I am overlooking. If you need anything to ask please do so.
function my_function(array, array2)
{
var array3 = [];
return Promise.resolve(true)
.then(function()
{
console.log("1")
for(var i=0; i< array.length; i++)
{
get(array[i], function(results){
console.log("2")
array3.push(..);
});
}
return array3;
}).then(function()
{
console.log("3")
for(var i=0; i< array2.length; i+=2)
{
//...
get(array2[i], function(results){
console.log("4")
return array3.push(...);
});
}
return array3;
}).then(function(array3)
{
console.log("5")
for(var i=0; i<array3.length; i++)
{
get(array3[i], function(results){
console.log("6")
update(.., function(callb_result){
return;
});
});
}
});
}
And here is the way I am calling the queries.
function get(array, callback)
{
db.get(`SELECT .. FROM .. WHERE ..;`, function(error, row) {
...
return callback(something);
});
}
function update(.., callback)
{
db.run(`UPDATE .. SET ...`);
return callback("updated"); //I dont want to return anything
}
Whats printed in the log
1
3
5
2
4
6
I was thinking perhaps the way I ma calling the queries is async and that's messing up everything.
You're using for loops to run asynchronous tasks and return an array that is modified by them. But because they are asynchronous the return happens before they are finished. Instead you can create an array of promises where each promise is one of the asynchronous tasks that resolves once the task is done. To wait until every task is done you can call Promise.all with the array of promises, which returns a promise that resolves with an array of the resolved results.
For the first .then you can use Array.prototype.map to easily create an array of promises. Each item in the array needs to return a new Promise that resolves with the result from the callback of get.
.then(function() {
console.log("1");
const promiseArray = array.map(function(item) {
return new Promise(function(resolve) {
get(item, function(result) {
console.log("2");
resolve(result);
});
});
});
return Promise.all(promiseArray);
})
As you return Promise.all the next .then call be executed once all the promises in the promiseArray are fulfilled. It will receive the array of results as the first parameter to the function. That means you can use them there. The second .then is similar to the first one, except that you don't want to call get on every item. In this case map is not applicable, so the for loop will just create a promise and add it to the array of promises. Before you have used array3 to store the results that you want to update, but with promises you don't really need that. In this case you can simply concat the results of both arrays.
.then(function(resultsArray) {
console.log("3");
const promiseArray2 = [];
for (var i = 0; i < array2.length; i += 2) {
const promise = new Promise(function(resolve) {
get(array2[i], function(results) {
console.log("4");
resolve(results);
});
});
promiseArray2.push(promise);
}
// Wait for all promises to be resolved
// Then concatenate both arrays of results
return Promise.all(promiseArray2).then(function(resultsArray2) {
return resultsArray.concat(resultsArray2);
});
})
This returns a promise that resolves with the concatenated array, so you will have all the results (from both .then calls) as an array, which is passed to the next .then function. In the third and final .then you simply call update on each element of the array. You don't need to call get again, as you've already done this and you passed on the results.
.then(function(finalResults) {
console.log("5");
for (var i = 0; i < finalResults.length; i++) {
console.log("6");
update(finalResults[i], function(result) {
console.log(result);
});
}
});
Full runnable code (get uses a timeout to simulate asynchronous calls)
function myFunction(array, array2) {
return Promise.resolve(true)
.then(function() {
console.log("1");
const promiseArray = array.map(function(item) {
return new Promise(function(resolve) {
get(item, function(results) {
console.log("2");
resolve(results);
});
});
});
return Promise.all(promiseArray);
})
.then(function(resultsArray) {
console.log("3");
const promiseArray2 = [];
for (var i = 0; i < array2.length; i += 2) {
const promise = new Promise(function(resolve) {
get(array2[i], function(results) {
console.log("4");
resolve(results);
});
});
promiseArray2.push(promise);
}
return Promise.all(promiseArray2).then(function(resultsArray2) {
return resultsArray.concat(resultsArray2);
});
})
.then(function(finalResults) {
console.log("5");
for (var i = 0; i < finalResults.length; i++) {
console.log("6");
update(finalResults[i]);
}
});
}
function get(item, cb) {
// Simply call the callback with the item after 1 second
setTimeout(() => cb(item), 1000);
}
function update(item) {
// Log what item is being updated
console.log(`Updated ${item}`);
}
// Test data
const array = ["arr1item1", "arr1item2", "arr1item3"];
const array2 = ["arr2item1", "arr2item2", "arr2item3"];
myFunction(array, array2);
Improving the code
The code now works as expected, but there are many improvements that make it a lot easier to understand and conveniently also shorter.
To simplify the code you can change your get function to return a promise. This makes it a lot easier, since you don't need to create a promise in every step. And update doesn't need to be a promise, neither does it need a callback as it's synchronous.
function get(array) {
return new Promise(function(resolve, reject) {
db.get(`SELECT .. FROM .. WHERE ..;`, function(error, row) {
if (err) {
return reject(error);
}
resolve(something);
});
});
}
Now you can use get everywhere you used to create a new promise. Note: I added the reject case when there is an error, and you'll have to take care of them with a .catch on the promise.
There are still too many unnecessary .then calls. First of all Promise.resolve(true) is useless since you can just return the promise of the first .then call directly. All it did in your example was to automatically wrap the result of it in a promise.
You're also using two .then calls to create an array of the results. Not only that, but they perform exactly the same call, namely get. Currently you also wait until the first set has finished until you execute the second set, but they can be all executed at the same time. Instead you can create an array of all the get promises and then wait for all of them to finish.
function myFunction(array, array2) {
// array.map(get) is equivalent to array.map(item => get(item))
// which in turn is equivalent to:
// array.map(function(item) {
// return get(item);
// })
const promiseArray = array.map(get);
for (let i = 0; i < array2.length; i += 2) {
promiseArray.push(get(array2[i]));
}
return Promise.all(promiseArray).then(results => results.forEach(update));
}
The myFunction body has been reduced from 32 lines of code (not counting the console.log("1") etc.) to 5.
Runnable Snippet
function myFunction(array, array2) {
const promiseArray = array.map(get);
for (let i = 0; i < array2.length; i += 2) {
promiseArray.push(get(array2[i]));
}
return Promise.all(promiseArray).then(results => results.forEach(update));
}
function get(item) {
console.log(`Starting get of ${item}`);
return new Promise((resolve, reject) => {
// Simply call the callback with the item after 1 second
setTimeout(() => resolve(item), 1000);
});
}
function update(item) {
// Log what item is being updated
console.log(`Updated ${item}`);
}
// Test data
const testArr1 = ["arr1item1", "arr1item2", "arr1item3"];
const testArr2 = ["arr2item1", "arr2item2", "arr2item3"];
myFunction(testArr1, testArr2).then(() => console.log("Updated all items"));

Resources