I am facing a peculiar situation.
I have a backend system (nodejs) which is being called by FE (pretty standard :) ). This endpoint (nodejs) needs to call another system (external) and get the data it produces and return them to the FE. Until now it all might seem pretty usual but here comes the catch.
The external system has async processing and therefore responds to my request immediately but is still processing data (saves them in a DB) and I have to get those data from DB and return them to the FE.
And here goes the question: what is the best (efficient) way of doing it? It usually takes a couple of seconds only and I am very hesitant of making a loop inside the function and for the data to appear in the DB.
Another way would be to have the external system call an endpoint at the end of the processing (if possible - would need to check that with the partner) and wait in the original function until that endpoint is called (not sure exactly how to implement that - so if there is any documentation, article, tutorial, ... would appreciate it very much if you could share guys)
thx for the ideas!
I can give you an example that checks the Database and waits for a while if it can't find a record. And I made a fake database connection for example to work.
// Mocking starts
ObjectID = () => {};
const db = {
collection: {
find: () => {
return new Promise((resolve, reject) => {
// Mock like no record found
setTimeout(() => { console.log('No record found!'); resolve(false) }, 1500);
});
}
}
}
// Mocking ends
const STANDBY_TIME = 1000; // 1 sec
const RETRY = 5; // Retry 5 times
const test = async () => {
let haveFound = false;
let i = 0;
while (i < RETRY && !haveFound) {
// Check the database
haveFound = await checkDb();
// If no record found, increment the loop count
i++
}
}
const checkDb = () => {
return new Promise((resolve) => {
setTimeout(async () => {
record = await db.collection.find({ _id: ObjectID("12345") });
// Check whether you've found or not the record
if (record) return resolve(true);
resolve(false);
}, STANDBY_TIME);
});
}
test();
Consider a promise-chained chunk of code for example:
return Promise.resolve()
.then(function () {
return createSomeData(...);
})
.then(function () {
return updateSomeData(...);
})
.then(function () {
return deleteSomeData(...);
})
.catch(function (error) {
return ohFishPerformRollbacks();
})
.then(function () {
return Promise.reject('something failed somewhere');
})
In the above code, let's say something went wrong in the function updateSomeData(...). Then one would have to revert the create operation that was executed before this.
In another case, if something went wrong in the function deleteSomeData(...), then one would want to revert the operations executed in createSomeData(...) and updateSomeData(...).
This would continue as long as all the blocks have some revert operations defined for themselves in case anything goes wrong.
Only if there was a way in either NodeJs or the database or somewhere in the middle, that would revert all the transactions happening under the same block of code.
One way I can think of this to happen is by flagging all the rows in database with a transactionId (ObjectID) and a wasTransactionSuccessful(boolean), so that CRUD operations could be clubbed together with their transactionIds, and in case something goes wrong, those transactions could be simply deleted from the database in the ending catch block.
I read about rolling back transactions in https://docs.mongodb.com/manual/tutorial/perform-two-phase-commits/. But I want to see if it can be done in a more simpler fashion and in a generic manner for NoSQL databases to adapt.
I am not sure if this would satisfy your use case, but I hope it would.
let indexArray = [1, 2, 3];
let promiseArray = [];
let sampleFunction = (index) => {
return new Promise((resolve, reject) => {
setTimeout(resolve, 100, index);
});
}
indexArray.map((element) => {
promiseArray.push(sampleFunction(element));
});
Promise.all(promiseArray).then((data) => {
// do whatever you want with the results
}).catch((err) => {
//Perform your entire rollback here
});
async.waterfall([
firstFunc,
secondFunc
], function (err, result) {
if (err) {
// delete the entire thing
}
});
Using the async library would give you a much elegant solution than going with chaining.
The gist of the problem is:
for (let i=0;i<list.length;i++) {
AsyncCall_1({'someProperty': list[i] })
.then((resp_1) => {
resp_1.doSomething();
resp_1.AsyncCall_2()
.then((resp_2) => {
resp_2.doSomething();
})
})
}
after last resp.AsyncCall_2.then(()=> {
//do something
})
I need to sequentially chain all the promises so that, the loop waits for the "resp.AsyncCall_2" function to be resolved for its next iteration. After last "resp.AsyncCall_2" call do something. (since all the promises will be resolved the)
Actual Problem:
for (var i=0;i<todo.assignTo.length;i++) {
Users.findOne({'username': todo.assignTo[i] })
.then((user) => {
user.assigned.push(todo.title);
user.notificationCount.assignedTodosCount++;
user.save()
.then((user) => {
console.log("todo is assigned to the user: " + user.username)
})
})
}
//to something at last call resloved (I know this is wrong way of doing this)
Users.find({})
.then((users)=> {
var promises = [];
for (var i=0;i<users.length;i++) {
users[i].notificationCount.totalTodosCount++;
promises.push(users[i].save());
}
Promise.all(promises)
.then(()=> {
res.statusCode = 200;
res.setHeader('Content-Type', 'application/json');
console.log("todo is successfully posted");
res.json({success : true, todo});
},(err) => next(err))
.catch((err) => next(err));
})
Thank You in Advance..
In modern versions of node.js, you can just use async/await and don't need to use Bluebird iteration functions:
async function someMiddlewareFunc(req, res, next) {
try {
for (let item of list) {
let resp_1 = await AsyncCall_1({'someProperty': item });
resp_1.doSomething();
let resp_2 = await resp_1.AsyncCall_2();
resp_2.doSomething();
}
// then do something here after the last iteration
// of the loop and its async operations are done
res.json(...);
} catch(err) {
next(err);
}
}
This will serialize the operations (which is what you asked for) so the 2nd iteration of the loop doesn't start until the async operations in the first iteration is done.
But, it doesn't appear in your real code that you actually need to serialize the individual operations and serializing things that don't have to be serialized usually makes the end-to-end time to complete them be longer. So, you could run all the items in your loop in parallel, collect all the results at the end and then send your response and Bluebird's Promise.map() would be quite useful for that because it combines a .map() and a Promise.all() into one function call:
function someMiddlewareFunc(req, res, next) {
Promise.map(list, (item) => {
return AsyncCall_1({'someProperty': item}).then(resp_1 => {
resp_1.doSomething();
return resp_1.AsyncCall_2();
}).then(resp_2 => {
return resp_2.doSomething();
});
}).then(results => {
// all done here
res.json(...);
}).catch(err => {
next(err);
});
}
FYI, when using res.json(...), you don't need to set these res.statusCode = 200; or res.setHeader('Content-Type', 'application/json'); as they will be done for you automatically.
Further notes about Bluebird's Promise.map(). It accepts a {concurrency: n} option that tells Bluebird how many operations are allowed to be "in flight" at the same time. By default, it runs them all in parallel at the same time, but you can pass any number you want as the concurrency option. If you pass 1, it will serialize things. Using this option can be particularly useful when parallel operation is permitted, but the array is very large and iterating all of them in parallel runs into either memory usage problems or overwhelms the target server. In that case, you can set the concurrency value to some intermediate value that still gives you some measure of parallel execution, but doesn't overwhelm the target (some number between 5 and 20 is often appropriate - it depends upon the target service). Sometimes, commercial services (like Google) also have limits about how many requests they will handle at the same time from the same IP address (to protect them from one account using too much of the service at once) and the concurrency value can be useful for that reason too.
Have you tried Promise.each?
const users = todo.assignTo.map(function(user) {
return Users.findOne({'username': assigned_to });
}
Promise.each(users, function(user) {
user.assigned.push(todo.title);
user.notificationCount.assignedTodosCount++;
user.save()
.then((user) => {
console.log("todo is assigned to the user: " + user.username)
})
}
I'm trying to update a tool that was created a while ago which uses nodejs (I am not a JS developer, so I'm trying to piece the code together) and am getting stuck at the last hurdle.
The new functionality will take in a swagger .json definition, compare the endpoints against the matching API Gateway on the AWS Service, using the 'aws-sdk' SDK for JS and then updates the Gateway accordingly.
The code runs fine on a small definition file (about 15 endpoints) but as soon as I give it a bigger one, I start getting tons of TooManyRequestsException errors.
I understand that this is due to my calls to the API Gateway service being too quick and a delay / pause is needed. This is where I am stuck
I have tried adding;
a delay() to each promise being returned
running a setTimeout() in each promise
adding a delay to the Promise.all and Promise.mapSeries
Currently my code loops through each endpoint within the definition and then adds the response of each promise to a promise array:
promises.push(getMethodResponse(resourceMethod, value, apiName, resourcePath));
Once the loop is finished I run this:
return Promise.all(promises)
.catch((err) => {
winston.error(err);
})
I have tried the same with a mapSeries (no luck).
It looks like the functions within the (getMethodResponse promise) are run immediately and hence, no matter what type of delay I add they all still just execute. My suspicious is that the I need to make (getMethodResponse) return a function and then use mapSeries but I cant get this to work either.
Code I tried:
Wrapped the getMethodResponse in this:
return function(value){}
Then added this after the loop (and within the loop - no difference):
Promise.mapSeries(function (promises) {
return 'a'();
}).then(function (results) {
console.log('result', results);
});
Also tried many other suggestions:
Here
Here
Any suggestions please?
EDIT
As request, some additional code to try pin-point the issue.
The code currently working with a small set of endpoints (within the Swagger file):
module.exports = (apiName, externalUrl) => {
return getSwaggerFromHttp(externalUrl)
.then((swagger) => {
let paths = swagger.paths;
let resourcePath = '';
let resourceMethod = '';
let promises = [];
_.each(paths, function (value, key) {
resourcePath = key;
_.each(value, function (value, key) {
resourceMethod = key;
let statusList = [];
_.each(value.responses, function (value, key) {
if (key >= 200 && key <= 204) {
statusList.push(key)
}
});
_.each(statusList, function (value, key) { //Only for 200-201 range
//Working with small set
promises.push(getMethodResponse(resourceMethod, value, apiName, resourcePath))
});
});
});
//Working with small set
return Promise.all(promises)
.catch((err) => {
winston.error(err);
})
})
.catch((err) => {
winston.error(err);
});
};
I have since tried adding this in place of the return Promise.all():
Promise.map(promises, function() {
// Promise.map awaits for returned promises as well.
console.log('X');
},{concurrency: 5})
.then(function() {
return console.log("y");
});
Results of this spits out something like this (it's the same for each endpoint, there are many):
Error: TooManyRequestsException: Too Many Requests
X
Error: TooManyRequestsException: Too Many Requests
X
Error: TooManyRequestsException: Too Many Requests
The AWS SDK is being called 3 times within each promise, the functions of which are (get initiated from the getMethodResponse() function):
apigateway.getRestApisAsync()
return apigateway.getResourcesAsync(resourceParams)
apigateway.getMethodAsync(params, function (err, data) {}
The typical AWS SDK documentation state that this is typical behaviour for when too many consecutive calls are made (too fast). I've had a similar issue in the past which was resolved by simply adding a .delay(500) into the code being called;
Something like:
return apigateway.updateModelAsync(updateModelParams)
.tap(() => logger.verbose(`Updated model ${updatedModel.name}`))
.tap(() => bar.tick())
.delay(500)
EDIT #2
I thought in the name of thorough-ness, to include my entire .js file.
'use strict';
const AWS = require('aws-sdk');
let apigateway, lambda;
const Promise = require('bluebird');
const R = require('ramda');
const logger = require('../logger');
const config = require('../config/default');
const helpers = require('../library/helpers');
const winston = require('winston');
const request = require('request');
const _ = require('lodash');
const region = 'ap-southeast-2';
const methodLib = require('../aws/methods');
const emitter = require('../library/emitter');
emitter.on('updateRegion', (region) => {
region = region;
AWS.config.update({ region: region });
apigateway = new AWS.APIGateway({ apiVersion: '2015-07-09' });
Promise.promisifyAll(apigateway);
});
function getSwaggerFromHttp(externalUrl) {
return new Promise((resolve, reject) => {
request.get({
url: externalUrl,
header: {
"content-type": "application/json"
}
}, (err, res, body) => {
if (err) {
winston.error(err);
reject(err);
}
let result = JSON.parse(body);
resolve(result);
})
});
}
/*
Deletes a method response
*/
function deleteMethodResponse(httpMethod, resourceId, restApiId, statusCode, resourcePath) {
let methodResponseParams = {
httpMethod: httpMethod,
resourceId: resourceId,
restApiId: restApiId,
statusCode: statusCode
};
return apigateway.deleteMethodResponseAsync(methodResponseParams)
.delay(1200)
.tap(() => logger.verbose(`Method response ${statusCode} deleted for path: ${resourcePath}`))
.error((e) => {
return console.log(`Error deleting Method Response ${httpMethod} not found on resource path: ${resourcePath} (resourceId: ${resourceId})`); // an error occurred
logger.error('Error: ' + e.stack)
});
}
/*
Deletes an integration response
*/
function deleteIntegrationResponse(httpMethod, resourceId, restApiId, statusCode, resourcePath) {
let methodResponseParams = {
httpMethod: httpMethod,
resourceId: resourceId,
restApiId: restApiId,
statusCode: statusCode
};
return apigateway.deleteIntegrationResponseAsync(methodResponseParams)
.delay(1200)
.tap(() => logger.verbose(`Integration response ${statusCode} deleted for path ${resourcePath}`))
.error((e) => {
return console.log(`Error deleting Integration Response ${httpMethod} not found on resource path: ${resourcePath} (resourceId: ${resourceId})`); // an error occurred
logger.error('Error: ' + e.stack)
});
}
/*
Get Resource
*/
function getMethodResponse(httpMethod, statusCode, apiName, resourcePath) {
let params = {
httpMethod: httpMethod.toUpperCase(),
resourceId: '',
restApiId: ''
}
return getResourceDetails(apiName, resourcePath)
.error((e) => {
logger.unimportant('Error: ' + e.stack)
})
.then((result) => {
//Only run the comparrison of models if the resourceId (from the url passed in) is found within the AWS Gateway
if (result) {
params.resourceId = result.resourceId
params.restApiId = result.apiId
var awsMethodResponses = [];
try {
apigateway.getMethodAsync(params, function (err, data) {
if (err) {
if (err.statusCode == 404) {
return console.log(`Method ${params.httpMethod} not found on resource path: ${resourcePath} (resourceId: ${params.resourceId})`); // an error occurred
}
console.log(err, err.stack); // an error occurred
}
else {
if (data) {
_.each(data.methodResponses, function (value, key) {
if (key >= 200 && key <= 204) {
awsMethodResponses.push(key)
}
});
awsMethodResponses = _.pull(awsMethodResponses, statusCode); //List of items not found within the Gateway - to be removed.
_.each(awsMethodResponses, function (value, key) {
if (data.methodResponses[value].responseModels) {
var existingModel = data.methodResponses[value].responseModels['application/json']; //Check if there is currently a model attached to the resource / method about to be deleted
methodLib.updateResponseAssociation(params.httpMethod, params.resourceId, params.restApiId, statusCode, existingModel); //Associate this model to the same resource / method, under the new response status
}
deleteMethodResponse(params.httpMethod, params.resourceId, params.restApiId, value, resourcePath)
.delay(1200)
.done();
deleteIntegrationResponse(params.httpMethod, params.resourceId, params.restApiId, value, resourcePath)
.delay(1200)
.done();
})
}
}
})
.catch(err => {
console.log(`Error: ${err}`);
});
}
catch (e) {
console.log(`getMethodAsync failed, Error: ${e}`);
}
}
})
};
function getResourceDetails(apiName, resourcePath) {
let resourceExpr = new RegExp(resourcePath + '$', 'i');
let result = {
apiId: '',
resourceId: '',
path: ''
}
return helpers.apiByName(apiName, AWS.config.region)
.delay(1200)
.then(apiId => {
result.apiId = apiId;
let resourceParams = {
restApiId: apiId,
limit: config.awsGetResourceLimit,
};
return apigateway.getResourcesAsync(resourceParams)
})
.then(R.prop('items'))
.filter(R.pipe(R.prop('path'), R.test(resourceExpr)))
.tap(helpers.handleNotFound('resource'))
.then(R.head)
.then([R.prop('path'), R.prop('id')])
.then(returnedObj => {
if (returnedObj.id) {
result.path = returnedObj.path;
result.resourceId = returnedObj.id;
logger.unimportant(`ApiId: ${result.apiId} | ResourceId: ${result.resourceId} | Path: ${result.path}`);
return result;
}
})
.catch(err => {
console.log(`Error: ${err} on API: ${apiName} Resource: ${resourcePath}`);
});
};
function delay(t) {
return new Promise(function(resolve) {
setTimeout(resolve, t)
});
}
module.exports = (apiName, externalUrl) => {
return getSwaggerFromHttp(externalUrl)
.then((swagger) => {
let paths = swagger.paths;
let resourcePath = '';
let resourceMethod = '';
let promises = [];
_.each(paths, function (value, key) {
resourcePath = key;
_.each(value, function (value, key) {
resourceMethod = key;
let statusList = [];
_.each(value.responses, function (value, key) {
if (key >= 200 && key <= 204) {
statusList.push(key)
}
});
_.each(statusList, function (value, key) { //Only for 200-201 range
promises.push(getMethodResponse(resourceMethod, value, apiName, resourcePath))
});
});
});
//Working with small set
return Promise.all(promises)
.catch((err) => {
winston.error(err);
})
})
.catch((err) => {
winston.error(err);
});
};
You apparently have a misunderstanding about what Promise.all() and Promise.map() do.
All Promise.all() does is keep track of a whole array of promises to tell you when the async operations they represent are all done (or one returns an error). When you pass it an array of promises (as you are doing), ALL those async operations have already been started in parallel. So, if you're trying to limit how many async operations are in flight at the same time, it's already too late at that point. So, Promise.all() by itself won't help you control how many are running at once in any way.
I've also noticed since, that it seems this line promises.push(getMethodResponse(resourceMethod, value, apiName, resourcePath)) is actually executing promises and not simply adding them to the array. Seems like the last Promise.all() doesn't actually do much.
Yep, when you execute promises.push(getMethodResponse()), you are calling getMethodResponse() immediately right then. That starts the async operation immediately. That function then returns a promise and Promise.all() will monitor that promise (along with all the other ones you put in the array) to tell you when they are all done. That's all Promise.all() does. It monitors operations you've already started. To keep the max number of requests in flight at the same time below some threshold, you have to NOT START the async operations all at once like you are doing. Promise.all() does not do that for you.
For Bluebird's Promise.map() to help you at all, you have to pass it an array of DATA, not promises. When you pass it an array of promises that represent async operations that you've already started, it can do no more than Promise.all() can do. But, if you pass it an array of data and a callback function that can then initiate an async operation for each element of data in the array, THEN it can help you when you use the concurrency option.
Your code is pretty complex so I will illustrate with a simple web scraper that wants to read a large list of URLs, but for memory considerations, only process 20 at a time.
const rp = require('request-promise');
let urls = [...]; // large array of URLs to process
Promise.map(urls, function(url) {
return rp(url).then(function(data) {
// process scraped data here
return someValue;
});
}, {concurrency: 20}).then(function(results) {
// process array of results here
}).catch(function(err) {
// error here
});
In this example, hopefully you can see that an array of data items are being passed into Promise.map() (not an array of promises). This, then allows Promise.map() to manage how/when the array is processed and, in this case, it will use the concurrency: 20 setting to make sure that no more than 20 requests are in flight at the same time.
Your effort to use Promise.map() was passing an array of promises, which does not help you since the promises represent async operations that have already been started:
Promise.map(promises, function() {
...
});
Then, in addition, you really need to figure out what exactly causes the TooManyRequestsException error by either reading documentation on the target API that exhibits this or by doing a whole bunch of testing because there can be a variety of things that might cause this and without knowing exactly what you need to control, it just takes a lot of wild guesses to try to figure out what might work. The most common things that an API might detect are:
Simultaneous requests from the same account or source.
Requests per unit of time from the same account or source (such as request per second).
The concurrency operation in Promise.map() will easily help you with the first option, but will not necessarily help you with the second option as you can limit to a low number of simultaneous requests and still exceed a requests per second limit. The second needs some actual time control. Inserting delay() statements will sometimes work, but even that is not a very direct method of managing it and will either lead to inconsistent control (something that works sometimes, but not other times) or sub-optimal control (limiting yourself to something far below what you can actually use).
To manage to a request per second limit, you need some actual time control with a rate limiting library or actual rate limiting logic in your own code.
Here's an example of a scheme for limiting the number of requests per second you are making: How to Manage Requests to Stay Below Rate Limiting.
Complete NodeJS testing noob here. Trying to individually test functions that are called through my API (meaning, rather than make an http request to a specific endpoint, which usually invokes several functions, which in turn make requests to different third party APIs, I want to test the functions themselves separately). The way they're called is I've built a class for each data source (data source = third party API), each class contains the same functions with the same exact signatures - getData and convertData, and return a callback with the results.
I've also created a module that creates many user mocks, since each user context returns different data (meaning, a user object is fed into getData, which uses certain user properties in order to determine what data should be returned).
The way I wanted to test this was to create numerous mocks, then run the functions for each. This is what I've got so far:
// Data sources to iterate over. Each is a class instance acquired through "require".
var dataSources = [
source1,
source2,
source3,
source4
];
describe('getData', function() {
this.timeout(10000);
describe('per data source,', function() {
context('standard call', function() {
// Associative array to hold the data returned, a key for each data source.
var finalResults = {};
// Iterate over all data sources
_.forEach(dataSources, function(dataSource) {
// Generate user mocks
var users = userMocks(10);
// Iterate over all users.
_.forEach(users, function (user) {
// Call each data source with each of the users.
// Numbers of calls to make - (users * data-sources), so in this case - 10*4.
dataSource.getData(user, function (err, data) {
if (err) return done(err);
// Convert the data returned to my format
dataSource.convertData(data, function (err, processedData) {
if (err) return done(err);
// Populate finalResults with converted data from each source
if (finalResults[dataSource.sourceName]) {
finalResults[dataSource.sourceName] = finalResults[dataSource.sourceName].concat(processedData);
} else {
finalResults[dataSource.sourceName] = processedData;
}
});
});
});
});
it('should return something', function(done) {
_.forEach(finalResults.keys, function(key) {
expect(finalResults[key]).to.not.be.empty;
expect(finalResults[key].length).to.be.greaterThan(0);
});
setTimeout(function() {
done();
}, 10000);
})
});
});
});
});`
This works (or at least the test passes when the query is valid, which is what I wanted), but it's cumbersome and (so very) far from elegant or effective, specifically the usage of timeout rather than using promises, async of some sort, or maybe a different alternative I'm not yet familiar with.
Since most of the resources I found (http://alanhollis.com/node-js-testing-a-node-js-api-with-mocha-async-and-should/, https://developmentnow.com/2015/02/05/make-your-node-js-api-bulletproof-how-to-test-with-mocha-chai-and-supertest/, https://justinbellamy.com/testing-async-code-with-mocha/, just to name a few) discuss direct API testing rather than specific async functions, I would love to get some input/best practices tips from more experienced Noders.
You need to know when bunch of asynchronous operations complete. Elegant way to test that is to use promises and promise aggregation:
Promise.all([ promise1, promise2, promise3 ]).then(function(results) {
// all my promises are fulfilled here, and results is an array of results
});
Wrap your dataSources into a promises using bluebird. You don't need to modify tested code self, bluebird provides convenience method:
var Promise = require('bluebird')
var dataSources = [
source1,
source2,
source3,
source4
].map(Promise.promisifyAll);
Use newly promisified functions to create promise for each call:
context('standard call', function() {
var finalResults = {};
var promiseOfResults = datasources.map(function(dataSource) {
var users = userMocks(10);
// Promise.all will take an array of promises and return a promise that is fulfilled then all of promises are
return Promise.all( users.map(function(user) {
// *Async functions are generated by bluebird, via Promise.promisifyAll
return dataSource.getDataAsync(user)
.then(dataSource.convertDataAsync)
.then(function(processedData) {
if (finalResults[dataSource.sourceName]) {
finalResults[dataSource.sourceName] = finalResults[dataSource.sourceName].concat(processedData);
} else {
finalResults[dataSource.sourceName] = processedData;
}
});
});
});
// promiseOfResults consists now of array of agregated promises
it('should return something', function(done) {
// Promise.all agregates all od your 'datasource' promises and is fulfilled when all of them are
// You don't need the promise result here, since you agegated finalResults yourself
return Promise.all( promiseOfResults ).then(function() {
_.forEach(finalResults.keys, function(key) {
expect(finalResults[key]).to.not.be.empty;
expect(finalResults[key].length).to.be.greaterThan(0);
});
done();
});
});
Rest of your test should use same Promise.all( promiseOfResults ), unless you need new set of results.