I'm having trouble to surf correctly between objects that are been returned by an API request.
Basically, I have an Array (tickerArr with 25 elements that I get from another request) which I use for a forEach loop for another request.
Here's what the code looks like:
var request = require('request');
var async = require('async');
var fs = require('fs')
var getTickerList = require('./getTickerList').getTickerList;
var unixTimeStamp = new Date().toISOString();
async function getTickerInfo() {
var tickerArr = await getTickerList;
var arrLength = tickerArr.length;
var init = 0;
console.log(`${arrLength} to process to the batchfile...`);
var getTickerInfo = new Promise((resolve, reject) => {
async.forEach(tickerArr, (item, callback) => {
request({
url:`https://api.kraken.com/0/public/Ticker?pair=${item}`,
json: true
}, (error, response, body) => {
var tickerInfo = JSON.stringify(body.result)
fs.appendFile(`./modules/batches/${unixTimeStamp}batch.json`, tickerInfo, (err) => {
if (err) throw err;
init ++;
var progress = Math.round(init / arrLength * 100);
console.log(`[${progress}%] - ${item} successfully added!`);
});
resolve();
})
});
});
}
getTickerInfo()
Unfortunately, and even if the request works correctly, the objects being returned have a specific path:
Error: []
Result:
Pair(x):
a: [1, 2, 3]
b: [1, 2, 3] ect...
You can find an exact example of the information being returned from the request here:
{
"error": [],
"result": {
"XXBTZEUR": {
"a": ["2212.31000", "3", "3.000"],
"b": ["2212.21100", "1", "1.000"],
"c": ["2212.31000", "0.15800000"],
"v": ["4999.06419498", "9993.55448658"],
"p": ["2206.04624", "2181.36028"],
"t": [15065, 29524],
"l": ["2167.00000", "2122.92000"],
"h": ["2239.00000", "2239.00000"],
"o": "2184.99000"
}
}
}
The only problem is: I would like to 'transform' the object I get from the request, to another object (just arranging stuff around, most of all: put the 'Pair(x)' attribute as a value for a name key) but since I don't know the pair in advance (the values in my tickerArray are enough to make the request but do not apparently correspond to reference the object), I can't access the information contained within result to manipulate it.
Anybody have any idea?
Thanks in advance!
Your question has multiple levels and maybe it's best to break up the task into its constituent parts first.
Calling the Kraken REST API and unwrapping the response
Creating small, composable functions
Transforming an object into a new layout
Writing the results to a file
0. Preface
Since you want to work with await, the first thing to know is that this is syntactical sugar for "wait for promise resolution". async is the keyword that enables await semantics inside a function.
So given this promise-returning function:
function foo() {
return Promise.resolve('some value');
}
this:
async function worker() {
var fooResult = await foo();
console.log(fooResult);
}
worker();
and this:
foo().then(function worker(fooResult) {
console.log(fooResult);
});
are effectively the same thing. (As always, it's a little more complicated than that, but that's the gist of it.)
Since we are working with promises here, the most sensible thing to do is to use libraries that also use promises. request has the request-promise counter-part, so let's use that.
var request = require('request-promise');
1. Calling the Kraken REST API and unwrapping the response
Making an HTTP request and breaking up the response (err and result parts) works the same for all API endpoints, so we can write a function that handles this task.
function getFromApi(endpoint, params) {
return request({
url: "https://api.kraken.com/0/public/" + endpoint,
qs: params,
json: true
}).then(function (data) {
if (data.error && data.error.length) {
throw new Error(`API error xyz for ${endpoint} with params ...`);
}
return data.result;
});
}
Users of this function will not have to deal with err at all and have access to result directly.
2. Creating small, composable functions
Our getFromApi() function returns a promise, therefore we can re-use it in wrappers for various API endpoints:
function getTickerList() {
return getFromApi("TickerList"); // ...or something like that
}
function getSingleTicker(pair) {
return getFromApi("Ticker", {pair: pair}).then(transformTicker);
}
function getTickerInfo(tickerList) {
return Promise.all(tickerList.map(getSingleTicker));
}
Note that, like in getFromApi(), we can use Promise#then to modify the overall output of an operation.
getTickerInfo() accepts an array of ticker names. It uses Array#map to run all API requests in parallel, and Promise#all to allow awaiting the overall result. The fairly complex operation of fetching and transforming multiple results in parallel composes into a pretty straight-forward one-liner.
3. Transforming an object into a new layout
transformTicker() is meant to accept an object in form {"XXBTZEUR": {"a": [...], ... }} and return a transformed variant of it.
function transformTicker(ticker) {
var result = {};
// ...massage ticker into desired form
Object.keys(ticker).forEach( pair => {
Object.keys(ticker[pair]).forEach( k => {
result[k] = { /* ... */ };
});
});
return result;
}
4. Writing the results to a file
Appending to a JSON file does not work. JSON can only be read and written as a whole. Let's fetch a list of ticker names, the associated tickers and write the results to a file.
async function worker() {
var tickerList = await getTickerList();
var tickerInfo = await getTickerInfo(tickerList);
var filename = `./modules/batches/${new Date().toISOString()}batch.json`;
fs.writeFile(filename, JSON.stringify(tickerInfo), (err) => {
if (err) throw err;
console.log("Done");
});
}
worker();
You can switch use a promisified version (see Bluebird#promisifyAll) of the fs module, since the plain version breaks the nice async/await semantics again by requiring a continuation callback.
With a promisified fs module, the worker could look like this:
async function worker() {
var tickerList = await getTickerList();
var tickerInfo = await getTickerInfo(tickerList);
var filename = `./modules/batches/${new Date().toISOString()}batch.json`;
await fs.writeFileAsync(filename, JSON.stringify(tickerInfo));
console.log("Done");
}
Related
I'm scanning all items from a DynamoDB table - within a Lambda function - with DocumentClient. I'm then looping through each item and extracting the payload that I need. I'll use that item from the payload as a parameter with ExpressionAttributeValues in a new query.
Everything works dandy independently. The issue is with the use of the asynchronous function queryItems when nested within an array forEach() method. I getting a parsing error with the function queryItems. I can query the table when I call the function outside of the loop but how else am I going to query each item independently?
I'm not sure how to handle this.
'use strict';
const aws = require('aws-sdk');
const docClient = new aws.DynamoDB.DocumentClient();
var paramsAll = {
TableName: 'MyTable',
Select: "ALL_ATTRIBUTES"
};
exports.handler = async (event, context) => {
try {
let arr = [];
let sequence = '';
//scan all items in table
docClient.scan(paramsAll, function(err, data) {
if (err) {
//handle error
}
else {
//Loop through each item in the table:
let items = (data.Items);
items.forEach(function(Item) {
let p = (Item.payload);
//Extract sequence from the payload
sequence = (p.seq);
arr.push(sequence);
//perform other function with this array (not listed for brevity)
});
//Here is where I'm having the issue:
arr.forEach(function(Item) {
//Pass these items as a paramater within queryItems function but getting Parsing Error: unexpected token queryItems
const results = await queryItems(Item);
//do something with the results...
})
}
});
}
catch (err) {
return { error: err };
}
};
async function queryItems(p) {
try {
var params = {
TableName: 'MyTable',
KeyConditionExpression: '#seq = :value',
ExpressionAttributeValues: { ':value': p },
ExpressionAttributeNames: { '#seq': 'seq' }
};
const data = await docClient.query(params).promise();
return data;
}
catch (err) {
return err;
}
}
I've definitely run into a similar issue. What I believe is happening is just a Javascript syntax issue, where awaiting queryItems inside the synchronous function provided to forEach will produce an error. (Although, when running the code, I do get the specific error "SyntaxError: await is only valid in async functions and the top level bodies of modules", so there might be something else going on.)
I see nothing wrong with the DynamoDB queries, but hoangdv's suggestions are spot on. Specifically, I'd also suggest using the promise style for scan, and while a for...loop will definitely work, using Promise.all and map will be a lot quicker to complete all the queries. Here's how I'd modify the code:
'use strict';
const aws = require('aws-sdk');
const docClient = new aws.DynamoDB.DocumentClient();
// avoid var unless you specifically require it's hoisting behavior.
const paramsAll = {
TableName: 'MyTable',
Select: "ALL_ATTRIBUTES" // most likely not needed, I'd review this section of the docs: https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_Scan.html#DDB-Scan-request-Select
};
exports.handler = async (event, context) => {
try {
// unless you need to assign a new array to this variable, it is better practice to use const instead.
const arr = [];
// let sequence = ''; // see line 24 for why I commented this out.
// scan all items in table.
// Destructure Items out of the response.
// You may also need to continue scanning with the LastEvaluatedKey depending on the size of your table, and/or your use case.
// You'd continue scanning in a while loop, for example.
const { Items, LastEvaluatedKey } = await docClient.scan(paramsAll).promise();
// push the sequence to the arr.
// There is most likely a reason you omitted for brevity to have sequence defined above,
// but since this example doesn't need it above, I've omitted it entirely
Items.forEach(Item => {
const p = Item.payload;
arr.push(p.seq);
});
// use a for loop or map here instead. forEach will return undefined, which cannot be await'ed.
// instead, map will return a new array of Promises (since the callback is async).
// Then, you can use Promise.all to await until each Promise in the array is resolved.
// Keep in mind, depending on how many items you are iterating through, you may run into DynamoDB's ThrottlingException.
// You would have to batch the queries (in other words, split the arr into pieces, and iterate over each piece), which would have to be done before using map. Then, sleep for a few milliseconds before starting on the next piece.
// I doubt the queries will be quick enough to cause this when using a for loop, though.
await Promise.all(arr.map(async Item => {
const results = await queryItems(Item);
// do something with the results...
}));
}
catch (err) {
// Again, not sure what the use case is, but just FYI this is not a valid return value if this lambda function is intended for use with using API Gateway.
// See here :) https://docs.aws.amazon.com/lambda/latest/dg/services-apigateway.html#apigateway-types-transforms
return { error: err };
}
};
// Presumably, MyTable has a partitionKey of seq, otherwise this KeyConditionExpression is invalid.
async function queryItems(p) {
try {
var params = {
TableName: 'MyTable',
KeyConditionExpression: '#seq = :value',
ExpressionAttributeValues: { ':value': p },
ExpressionAttributeNames: { '#seq': 'seq' }
};
const data = await docClient.query(params).promise();
return data;
}
catch (err) {
return err;
}
}
Your issue is how you await on the for loop, its best to use Promise.all() with a map to await inside of a loop:
await Promise.all(arr.map(async Item => {
const results = await queryItems(Item);
// do something with the results...
}));
However, I cannot seem to understand your logic really well.
You Scan a table called MyTable, but you do not paginate, meaning you are only getting up to 1MB worth of data.
With the results, you strip out the seq value and then once again read every item from MyTable this time using a Query and seq as the key?
Ive spent a bit of time trying to understand this. I hope the answer is obvious and just show my lack of experience
My goal is to send API request to steam for various IDs of game mods and find the time_updated for each one, to put these all into an array and then find out which one most the most recently updated
I have got the code below, but its not quite doing what I want, I think I am just getting muddled over timings
My plan was to have a few different values in arrMODID = [], and to loop through each one, get the time_updated, push that to an array and for const result = await myfunction(); to be able to access the data in the modinfoArray
however that is returning an array with just [{test},{}] in it and is being fired before the function has put any data into the array
can anyone give me a shove in the right direction please
thank you
import request from 'request';
const myfunction = async function(x, y) {
var arrMODID = ["2016338122"];
var modinfoArray = []
var timeUpdated
for (const element of arrMODID) {
request.post({
headers: {'content-type' : 'application/x-www-form-urlencoded'},
url: 'http://api.steampowered.com/ISteamRemoteStorage/GetPublishedFileDetails/v1',
body: 'itemcount=1&publishedfileids[0]=2016338122',
},
function(error, response, body){
var response = JSON.parse(body);
var myResponse = response.response.publishedfiledetails
myResponse.forEach(function(arrayItem) {
//console.log(arrayItem.time_updated)
timeUpdated = arrayItem.time_updated
//console.log(timeUpdated)
modinfoArray.push({"2016338122":arrayItem.time_updated})
console.log(modinfoArray) // only this log returns the added items
})
});
}
return ["test", modinfoArray];
};
// Start function
const start = async function(a, b) {
const result = await myfunction();
console.log(result); // this returns the empty array
}
// Call start
start();
You need to use an http request library that supports promises so you can await that inside your function. You cannot successfully mix promises and asynchronous operations like request.post() that uses plain callbacks because you can manage the control flow in a promise-like way with plain callbacks.
I'd suggest using the got() library. Also, the request() library has been deprecated and is not recommended for new code. If you absolutely wanted to stay with the request() library, you could use the request-promise module instead, but keep in mind that the request() library is in maintenance mode only (no new feature development) whereas this list of alternatives are all being actively developed.
Here's a runnable implementation using the got() library:
import got from 'got';
const myfunction = async function() {
const arrMODID = ["2016338122"];
const modinfoArray = [];
for (const element of arrMODID) {
const response = await got.post({
headers: { 'content-type': 'application/x-www-form-urlencoded' },
url: 'http://api.steampowered.com/ISteamRemoteStorage/GetPublishedFileDetails/v1',
body: 'itemcount=1&publishedfileids[0]=2016338122',
}).json();
const myResponse = response.response.publishedfiledetails;
for (const arrayItem of myResponse) {
modinfoArray.push({ "2016338122": arrayItem.time_updated });
}
}
return ["test", modinfoArray];
};
// Start function
const start = async function() {
const result = await myfunction();
console.log(result);
return result;
}
// Call start
start().then(result => {
console.log("done");
}).catch(err => {
console.log(err);
});
I'm trying to update a tool that was created a while ago which uses nodejs (I am not a JS developer, so I'm trying to piece the code together) and am getting stuck at the last hurdle.
The new functionality will take in a swagger .json definition, compare the endpoints against the matching API Gateway on the AWS Service, using the 'aws-sdk' SDK for JS and then updates the Gateway accordingly.
The code runs fine on a small definition file (about 15 endpoints) but as soon as I give it a bigger one, I start getting tons of TooManyRequestsException errors.
I understand that this is due to my calls to the API Gateway service being too quick and a delay / pause is needed. This is where I am stuck
I have tried adding;
a delay() to each promise being returned
running a setTimeout() in each promise
adding a delay to the Promise.all and Promise.mapSeries
Currently my code loops through each endpoint within the definition and then adds the response of each promise to a promise array:
promises.push(getMethodResponse(resourceMethod, value, apiName, resourcePath));
Once the loop is finished I run this:
return Promise.all(promises)
.catch((err) => {
winston.error(err);
})
I have tried the same with a mapSeries (no luck).
It looks like the functions within the (getMethodResponse promise) are run immediately and hence, no matter what type of delay I add they all still just execute. My suspicious is that the I need to make (getMethodResponse) return a function and then use mapSeries but I cant get this to work either.
Code I tried:
Wrapped the getMethodResponse in this:
return function(value){}
Then added this after the loop (and within the loop - no difference):
Promise.mapSeries(function (promises) {
return 'a'();
}).then(function (results) {
console.log('result', results);
});
Also tried many other suggestions:
Here
Here
Any suggestions please?
EDIT
As request, some additional code to try pin-point the issue.
The code currently working with a small set of endpoints (within the Swagger file):
module.exports = (apiName, externalUrl) => {
return getSwaggerFromHttp(externalUrl)
.then((swagger) => {
let paths = swagger.paths;
let resourcePath = '';
let resourceMethod = '';
let promises = [];
_.each(paths, function (value, key) {
resourcePath = key;
_.each(value, function (value, key) {
resourceMethod = key;
let statusList = [];
_.each(value.responses, function (value, key) {
if (key >= 200 && key <= 204) {
statusList.push(key)
}
});
_.each(statusList, function (value, key) { //Only for 200-201 range
//Working with small set
promises.push(getMethodResponse(resourceMethod, value, apiName, resourcePath))
});
});
});
//Working with small set
return Promise.all(promises)
.catch((err) => {
winston.error(err);
})
})
.catch((err) => {
winston.error(err);
});
};
I have since tried adding this in place of the return Promise.all():
Promise.map(promises, function() {
// Promise.map awaits for returned promises as well.
console.log('X');
},{concurrency: 5})
.then(function() {
return console.log("y");
});
Results of this spits out something like this (it's the same for each endpoint, there are many):
Error: TooManyRequestsException: Too Many Requests
X
Error: TooManyRequestsException: Too Many Requests
X
Error: TooManyRequestsException: Too Many Requests
The AWS SDK is being called 3 times within each promise, the functions of which are (get initiated from the getMethodResponse() function):
apigateway.getRestApisAsync()
return apigateway.getResourcesAsync(resourceParams)
apigateway.getMethodAsync(params, function (err, data) {}
The typical AWS SDK documentation state that this is typical behaviour for when too many consecutive calls are made (too fast). I've had a similar issue in the past which was resolved by simply adding a .delay(500) into the code being called;
Something like:
return apigateway.updateModelAsync(updateModelParams)
.tap(() => logger.verbose(`Updated model ${updatedModel.name}`))
.tap(() => bar.tick())
.delay(500)
EDIT #2
I thought in the name of thorough-ness, to include my entire .js file.
'use strict';
const AWS = require('aws-sdk');
let apigateway, lambda;
const Promise = require('bluebird');
const R = require('ramda');
const logger = require('../logger');
const config = require('../config/default');
const helpers = require('../library/helpers');
const winston = require('winston');
const request = require('request');
const _ = require('lodash');
const region = 'ap-southeast-2';
const methodLib = require('../aws/methods');
const emitter = require('../library/emitter');
emitter.on('updateRegion', (region) => {
region = region;
AWS.config.update({ region: region });
apigateway = new AWS.APIGateway({ apiVersion: '2015-07-09' });
Promise.promisifyAll(apigateway);
});
function getSwaggerFromHttp(externalUrl) {
return new Promise((resolve, reject) => {
request.get({
url: externalUrl,
header: {
"content-type": "application/json"
}
}, (err, res, body) => {
if (err) {
winston.error(err);
reject(err);
}
let result = JSON.parse(body);
resolve(result);
})
});
}
/*
Deletes a method response
*/
function deleteMethodResponse(httpMethod, resourceId, restApiId, statusCode, resourcePath) {
let methodResponseParams = {
httpMethod: httpMethod,
resourceId: resourceId,
restApiId: restApiId,
statusCode: statusCode
};
return apigateway.deleteMethodResponseAsync(methodResponseParams)
.delay(1200)
.tap(() => logger.verbose(`Method response ${statusCode} deleted for path: ${resourcePath}`))
.error((e) => {
return console.log(`Error deleting Method Response ${httpMethod} not found on resource path: ${resourcePath} (resourceId: ${resourceId})`); // an error occurred
logger.error('Error: ' + e.stack)
});
}
/*
Deletes an integration response
*/
function deleteIntegrationResponse(httpMethod, resourceId, restApiId, statusCode, resourcePath) {
let methodResponseParams = {
httpMethod: httpMethod,
resourceId: resourceId,
restApiId: restApiId,
statusCode: statusCode
};
return apigateway.deleteIntegrationResponseAsync(methodResponseParams)
.delay(1200)
.tap(() => logger.verbose(`Integration response ${statusCode} deleted for path ${resourcePath}`))
.error((e) => {
return console.log(`Error deleting Integration Response ${httpMethod} not found on resource path: ${resourcePath} (resourceId: ${resourceId})`); // an error occurred
logger.error('Error: ' + e.stack)
});
}
/*
Get Resource
*/
function getMethodResponse(httpMethod, statusCode, apiName, resourcePath) {
let params = {
httpMethod: httpMethod.toUpperCase(),
resourceId: '',
restApiId: ''
}
return getResourceDetails(apiName, resourcePath)
.error((e) => {
logger.unimportant('Error: ' + e.stack)
})
.then((result) => {
//Only run the comparrison of models if the resourceId (from the url passed in) is found within the AWS Gateway
if (result) {
params.resourceId = result.resourceId
params.restApiId = result.apiId
var awsMethodResponses = [];
try {
apigateway.getMethodAsync(params, function (err, data) {
if (err) {
if (err.statusCode == 404) {
return console.log(`Method ${params.httpMethod} not found on resource path: ${resourcePath} (resourceId: ${params.resourceId})`); // an error occurred
}
console.log(err, err.stack); // an error occurred
}
else {
if (data) {
_.each(data.methodResponses, function (value, key) {
if (key >= 200 && key <= 204) {
awsMethodResponses.push(key)
}
});
awsMethodResponses = _.pull(awsMethodResponses, statusCode); //List of items not found within the Gateway - to be removed.
_.each(awsMethodResponses, function (value, key) {
if (data.methodResponses[value].responseModels) {
var existingModel = data.methodResponses[value].responseModels['application/json']; //Check if there is currently a model attached to the resource / method about to be deleted
methodLib.updateResponseAssociation(params.httpMethod, params.resourceId, params.restApiId, statusCode, existingModel); //Associate this model to the same resource / method, under the new response status
}
deleteMethodResponse(params.httpMethod, params.resourceId, params.restApiId, value, resourcePath)
.delay(1200)
.done();
deleteIntegrationResponse(params.httpMethod, params.resourceId, params.restApiId, value, resourcePath)
.delay(1200)
.done();
})
}
}
})
.catch(err => {
console.log(`Error: ${err}`);
});
}
catch (e) {
console.log(`getMethodAsync failed, Error: ${e}`);
}
}
})
};
function getResourceDetails(apiName, resourcePath) {
let resourceExpr = new RegExp(resourcePath + '$', 'i');
let result = {
apiId: '',
resourceId: '',
path: ''
}
return helpers.apiByName(apiName, AWS.config.region)
.delay(1200)
.then(apiId => {
result.apiId = apiId;
let resourceParams = {
restApiId: apiId,
limit: config.awsGetResourceLimit,
};
return apigateway.getResourcesAsync(resourceParams)
})
.then(R.prop('items'))
.filter(R.pipe(R.prop('path'), R.test(resourceExpr)))
.tap(helpers.handleNotFound('resource'))
.then(R.head)
.then([R.prop('path'), R.prop('id')])
.then(returnedObj => {
if (returnedObj.id) {
result.path = returnedObj.path;
result.resourceId = returnedObj.id;
logger.unimportant(`ApiId: ${result.apiId} | ResourceId: ${result.resourceId} | Path: ${result.path}`);
return result;
}
})
.catch(err => {
console.log(`Error: ${err} on API: ${apiName} Resource: ${resourcePath}`);
});
};
function delay(t) {
return new Promise(function(resolve) {
setTimeout(resolve, t)
});
}
module.exports = (apiName, externalUrl) => {
return getSwaggerFromHttp(externalUrl)
.then((swagger) => {
let paths = swagger.paths;
let resourcePath = '';
let resourceMethod = '';
let promises = [];
_.each(paths, function (value, key) {
resourcePath = key;
_.each(value, function (value, key) {
resourceMethod = key;
let statusList = [];
_.each(value.responses, function (value, key) {
if (key >= 200 && key <= 204) {
statusList.push(key)
}
});
_.each(statusList, function (value, key) { //Only for 200-201 range
promises.push(getMethodResponse(resourceMethod, value, apiName, resourcePath))
});
});
});
//Working with small set
return Promise.all(promises)
.catch((err) => {
winston.error(err);
})
})
.catch((err) => {
winston.error(err);
});
};
You apparently have a misunderstanding about what Promise.all() and Promise.map() do.
All Promise.all() does is keep track of a whole array of promises to tell you when the async operations they represent are all done (or one returns an error). When you pass it an array of promises (as you are doing), ALL those async operations have already been started in parallel. So, if you're trying to limit how many async operations are in flight at the same time, it's already too late at that point. So, Promise.all() by itself won't help you control how many are running at once in any way.
I've also noticed since, that it seems this line promises.push(getMethodResponse(resourceMethod, value, apiName, resourcePath)) is actually executing promises and not simply adding them to the array. Seems like the last Promise.all() doesn't actually do much.
Yep, when you execute promises.push(getMethodResponse()), you are calling getMethodResponse() immediately right then. That starts the async operation immediately. That function then returns a promise and Promise.all() will monitor that promise (along with all the other ones you put in the array) to tell you when they are all done. That's all Promise.all() does. It monitors operations you've already started. To keep the max number of requests in flight at the same time below some threshold, you have to NOT START the async operations all at once like you are doing. Promise.all() does not do that for you.
For Bluebird's Promise.map() to help you at all, you have to pass it an array of DATA, not promises. When you pass it an array of promises that represent async operations that you've already started, it can do no more than Promise.all() can do. But, if you pass it an array of data and a callback function that can then initiate an async operation for each element of data in the array, THEN it can help you when you use the concurrency option.
Your code is pretty complex so I will illustrate with a simple web scraper that wants to read a large list of URLs, but for memory considerations, only process 20 at a time.
const rp = require('request-promise');
let urls = [...]; // large array of URLs to process
Promise.map(urls, function(url) {
return rp(url).then(function(data) {
// process scraped data here
return someValue;
});
}, {concurrency: 20}).then(function(results) {
// process array of results here
}).catch(function(err) {
// error here
});
In this example, hopefully you can see that an array of data items are being passed into Promise.map() (not an array of promises). This, then allows Promise.map() to manage how/when the array is processed and, in this case, it will use the concurrency: 20 setting to make sure that no more than 20 requests are in flight at the same time.
Your effort to use Promise.map() was passing an array of promises, which does not help you since the promises represent async operations that have already been started:
Promise.map(promises, function() {
...
});
Then, in addition, you really need to figure out what exactly causes the TooManyRequestsException error by either reading documentation on the target API that exhibits this or by doing a whole bunch of testing because there can be a variety of things that might cause this and without knowing exactly what you need to control, it just takes a lot of wild guesses to try to figure out what might work. The most common things that an API might detect are:
Simultaneous requests from the same account or source.
Requests per unit of time from the same account or source (such as request per second).
The concurrency operation in Promise.map() will easily help you with the first option, but will not necessarily help you with the second option as you can limit to a low number of simultaneous requests and still exceed a requests per second limit. The second needs some actual time control. Inserting delay() statements will sometimes work, but even that is not a very direct method of managing it and will either lead to inconsistent control (something that works sometimes, but not other times) or sub-optimal control (limiting yourself to something far below what you can actually use).
To manage to a request per second limit, you need some actual time control with a rate limiting library or actual rate limiting logic in your own code.
Here's an example of a scheme for limiting the number of requests per second you are making: How to Manage Requests to Stay Below Rate Limiting.
I'm writing a function that's returning and array of values. Some of the values are calculated in a callback. But I don't know how to make the program asynchronious so all of my results are in the array, and not added after they're returned.
let array = []
for (stuff : stuffs) {
if (condition) {
array.add(stuff)
} else {
api.compute(stuff, callback(resp) {
array.add(resp.stuff)
}
}
}
res.json({ "stuff": array })
In this example the array is written to the response before the async calls have finished.
How can I make this work asynchronously?
You have to use one of the approaches:
async library
Promise.all
coroutines/generators
async/await
The most cool yet, I think, is async/await. First we modify your function, so it returns a promise:
const compute = function(stuff) {
return new Promise( (resolve, reject) => {
api.compute(stuff, callback(resp){
resolve(resp.stuff)
});
});
};
Then we modify your route with async handler:
app.get('/', async function(req, res, next) {
const array = [];
for (const stuff of stuffs) {
if (condition) {
array.add(stuff);
} else {
const stuff = await compute(stuff);
array.push(stuff);
}
}
res.json({ stuff: array });
});
Note: You might need to update node version to latest.
UPDATE:
Those who are not awared, how event loop works, execute this snippet, and finish with that:
const sleep = async function(ms) {
console.log(`Sleeping ${ms}ms`);
return new Promise( resolve => setTimeout(resolve, ms));
};
async function job() {
console.log('start');
for (let t = 0; t < 10; t++) {
await sleep(100);
}
}
job();
console.log('oops did not expect that oO');
You will be surprised.
Here is an answer without package using callbacks
Create a function that's gonna recursively treat all your stuffs.
getArray(stuffs, callback, index = 0, array = []) {
// Did we treat all stuffs?
if (stuffs.length >= index) {
return callback(array);
}
// Treat one stuff
if (condition) {
array.add(stuffs[index]);
// Call next
return getArray(stuffs, callback, index + 1, array);
}
// Get a stuff asynchronously
return api.compute(stuffs[index], (resp) => {
array.add(resp.stuff);
// Call next
return getArray(stuffs, callback, index + 1, array);
});
}
How to call it?
getArray(stuffs, (array) => {
// Here you have your array
// ...
});
EDIT: more explanation
What we want to do to transform the loop you had into a loop that handle asynchronous function call.
The purpose is that one getArray call gonna treat one index of your stuffs array.
After treating one index, the function will call itself again to treat the next index, until all get treated.
-> Treat index 0 -> Treat index 1 -> Treat index 2 -> Return all result
We are using parameters to pass the infos through the process. Index to know which array part we have to treat, and array to keep a tract of what we did calculate.
EDIT: Improvement to 100% asynchronous soluce
What we have done here it's a simple transposition of your initial for loop into an asynchronous code. it can be improved so by making it totally asynchronous, which make it better but slightly more difficult.
For example :
// Where we store the results
const array = [];
const calculationIsDone = (array) => {
// Here our calculation is done
// ---
};
// Function that's gonna aggregate the results coming asynchronously
// When we did gather all results, we call a function
const gatherCalculResult = (newResult) => {
array.push(newResult);
if (array.length === stuffs.length) {
callback(array);
}
};
// Function that makes the calculation for one stuff
const makeCalculation = (oneStuff) => {
if (condition) {
return gatherCalculResult(oneStuff);
}
// Get a stuff asynchronously
return api.compute(oneStuff, (resp) => {
gatherCalculResult(resp.stuff);
});
};
// We trigger all calculation
stuffs.forEach(x => x.makeCalculation(x));
I have an array of URLs and I want to loop through them and fetch thr content. After I have looped through them and fetched thr content I want a callback function to be called.
I know I can do this via async library but I want to do this without using any library.
Sample of what kind of code I want is below
['yahoo.com', 'gmail.com'].each(function(item){
//code to fetch URL content
},someCallbackFunctionToBeExecutedAtTheEndOfLoop);
This is typically the type of thing you do using promises (But you would need a library), with a code like:
var ops = [];
urls.forEach(function(url) {
ops.push(fetchUrl(url));
});
P.all(ops).then(callback);
function fetchUrl(url) {
var defer = P.defer();
//do stuff
// call defer.resolve(result);
return defer.promise;
}
If you don't want to use promises, you can use a counter of operations, like:
var ops = urls.length;
urls.forEach(function(url) {
// do stuff
ops--;
if (ops === 0) {
callback();
}
});
If you chose the promises, I advice to use p-promise module, which is far more optimized than Q.
If you want to do it without any sort of library like async, then you have to write your own counter to keep track of when all the async responses have been completed:
var request = require('request');
function loadAll(list, fn) {
var cnt = list.length;
var responses = [];
list.forEach(function(url, index) {
request(url, function(error, response, body) {
if (error) {
fn(error);
} else {
responses[index] = response;
--cnt;
if (cnt === 0) {
fn(0, responses);
}
}
});
})
}
loadAll(['http://www.yahoo.com', 'http://www.gmail.com'], function(err, results) {
if (!err) {
// process results array here
}
});
If you're going to be doing many async operations in node.js, then getting a promise library like Bluebird will save you a lot of time. For example, I think you could do the above in something like this (untested):
var Promise = require("bluebird");
var requestP = Promise.promisfy(require("request"));
Promise.map(['http://www.yahoo.com', 'http://www.gmail.com'], requestP).then(function(results) {
// process the array of results here
});