Node.js. What is the proper way to make things run repeatedly? - node.js

My goal is to make a simple JSON parser which runs continuously.
As I imagined it - an infinity loop makes a request and stores data in my DB. Simple.
But there always were memory leaks.
What is the proper way to make things run repeatedly over and over again in Node.js? What is actually memory leak? Why and how is it happening? I have tried it to do with setInterval, setTimeOut, processNextTick, setImmediate, promises but there is always the same result! I am obviously something missing.
What I get now:
function getItems(callback) {
request({
url: 'http://foo.com',
json: true
}, function (error, response, body) {
if (!error && response.statusCode == 200 && body) {
var total_count = body.total_count;
var body = body.results_html;
...
setTimeout(function() {callback(body}, 1000);
}
});
}
function series(item) {
if (item) {
getItems(function(result) {
console.log(result);
return series(true);
});
} else {
return;
}
}
series(true);

Related

nodejs express search in request parallel single response

i need to query multiple pages from another api and return a object from the page if a specified value matches.
i guess the problem is that the loop is done asychron because i always get "not found" and later i get "Cannot set headers after they are sent to the client" if the loop found the object.
solved this by calling it recursive but i need more speed because there are many pages with many entries. if possible requests should run parallel but not "found should" be called after all loops finished
router.post('/search', function (req, res) {
var foundObj = false;
for (var page = 1; page < req.body.cubesize; page++) {
request({
method: 'GET',
uri: 'http://localhost:8080/api/v1/getpage/json/' + page
},
function (error, response, body) {
if (!error) {
var result = JSON.parse(body);
for (var obj in result) {
console.log(result[obj]);
if (result[obj].hasOwnProperty(req.body.field)) {
if (result[obj][req.body.field] == req.body.value) {
foundObj = true;
return res.status(200).send(result[obj]);
}
}
}
}
});
}
if(!foundObj){
return res.status(404).send("not found");
}
});
anyone got an idea how to fast loop all pages with all entries but wait for calling not found?
As long as you have a res.send() inside a for loop and at least two matches occurs, two (at least) res.send() calls will be executed and an error will rise.
How to run in parallel ?
router.post('/search', function (req, res) {
const callApi = (page) => new Promise( (resolve, reject) => {
request({
method: 'GET',
uri: `http://localhost:8080/api/v1/getpage/json/${page}`,
},
function (error, response, body) {
if (error) reject(null)
let result = JSON.parse(body);
for (var obj in result) {
console.log(result[obj]);
if (result[obj].hasOwnProperty(req.body.field)) {
if (result[obj][req.body.field] == req.body.value)
return resolve(result[obj]);
}
}
return reject(null);
}
});
});
const promisesArr = [];
for ( let page = 1; page < req.body.cubesize; page++) {
promisesArr.push(callApi(page))
}
Promise.allSettled(promisesArr).then((resArr)=>{
const resolvedArray = resArr.filter(val => !!val);
if (resolvedArray.length === 0) return res.status(404).send("not found");
if (resolvedArray.length === 1)
return res.status(200).send(resolvedArr[0][obj])
if (resolvedArray.length > 1)
return res.status(500).send("Too many matches")
// It is not clear to me in your code what you need to do in case more than one resolves
});
});
Some explanation about the code.
The idea is to promisify request and run in parallel
To run in parallel, Promise object allows four methods:
Promise.all, Promise.race and Promise.allSettled and Promise.any. The two last ones, Promise.allSettled and Promise.any are not fully compatible, so keep this in mind.
Once you have the array and run in parallel, Promise.all and Promise.allSettled returns an array of results. The array is filtered and if some value matchs, it response that, otherwise, response 404.
Further information about promises will be required to select the right one for your specific case. You can found about it here[https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise]
Unfortunately my code is not tested, so please review it and refactor to adapt to your specific case.

JSON variable returns Undefined

Sorry for the inconvenience, I am a newbie in Node. I am trying to store a json in "usersData" variable so I want to use it later in another functions. The problem is that if I test the variable with console.log inside the "if" it returns to me results, but when trying to show the variable outside the request subfunction, it comes out 'undefined'. I have declared the global usersData variable as shown below. Thank you.
var usersData;
function getAllUsers(){
request({url, json: true}, function (error, response, body) {
if (!error && response.statusCode == 200) {
usersData = body
//console.log(usersData) //Here returns a value
}
});
console.log(usersData) //here returns undefined
}
request is asynchronous method, so if you want to use its result later in another functions, should handle that in second parameter callback. i.e
var usersData;
var handleUserData = function() {};
function getAllUsers(){
request({url, json: true}, function (error, response, body) {
if (!error && response.statusCode == 200) {
usersData = body
//console.log(usersData) //Here returns a value
// use results in another function
handleUserData(body);
}
});
}
or use Promise
function getAllUsers() {
return new Promise(function(resolve, reject) {
request({url, json: true}, function (error, response, body) {
if (!error && response.statusCode == 200) {
usersData = body
//console.log(usersData) //Here returns a value
resolve(body);
} else {
reject(error);
}
});
});
}
// handle `usersData`
getAllUsers().then(body => {
handleUserData(body);
});
Here are some stuffs you need to know,
request is a asynchronous function which means it runs on background without blocking main thread, so the callback function is executed after the request completes, that is why value of body is assigned to userData and when printed gives some value.
But when printing outside of the callback function, it gives undefined because of request being async takes time to complete which run on background. So, consoling statement runs before the body is assigned to userData and so when printed gives undefined.

Waiting for a callback

I'm working with callbacks to improve my program's efficiency. I would like to wait for my 'a' variable to get the value from callback if it hasn't already at some point. My code looks like this:
function myFunction(callback){
request("url", function(error, response, body) {
if (!error && response.statusCode == 200)
{
result = JSON.stringify(JSON.parse(body));
return callback(null, result);
}
else
{
return callback(error, null);
}
});
}
var a = myFunction(function(err, data){
if(!err)
{
return(data);
}
else
{
return(err);
}
});
//A CHUNK OF CODE EXECUTES HERE
//I'D LIKE TO CHECK IF THE a VARIABLE GOT THE VALUE (if the callback was executed), note that I don't want to nest the chunk inside the callback, but rather I'd like to check that outside function callback via IF statement/loop or some similar alternative
//MORE CODE CONNECTED TO THE CALLBACK
Any ideas on how I can wait for the variable to get the value if it didn't get it already by the time that the chunk of code is executed? I know that the sole intention of using callbacks is to not wait but in this instance it is necessary for me so just don't bother being annoying please :)
I'd suggest trying async/await and the request-promise-native module, this keeps the syntax very simple. If the call fails for some reason an error will be thrown, this is easy to catch:
For example:
const rp = require('request-promise-native');
async function testAsyncRequest() {
try {
let promise = rp('https://my-json-server.typicode.com/typicode/demo/posts');
// Do some stuff here...
let result = await promise;
console.log("Result: ", result);
} catch (err) {
console.error(`Something went wrong: ${err.message}`);
}
}
testAsyncRequest();
So I've ended up making a little compromise (had to nest), but the code works as intended:
var something;
function myFunction(callback){
request("url", function(error, response, body) {
if (!error && response.statusCode == 200)
{
result = JSON.stringify(JSON.parse(body));
return callback(null, result);
}
else
{
return callback(error, null);
}
});
}
var a = myFunction(function(err, data){
if(!err)
{
something = data;
return(data);
}
else
{
return(err);
}
});
//A CHUNK OF CODE EXECUTES HERE
var something_cachedValue=something;
doStuff();
function doStuff() {
if(something===something_cachedValue) {
setTimeout(doStuff, 10);
return;
}
something_cachedValue = something;
//MORE CODE CONNECTED TO THE CALLBACK
}
So basically I'm checking if the callback has completed every 10ms by re-calling the function. When it's completed, the "//MORE CODE CONNECTED TO THE CALLBACK" executes. Not the cleanest and most fancy way to do it but it does the job.

Node JS Api request in loop

I'm trying my damndest to avoid callback hell with my Node JS. But I'm trying to make a large number of api-requests and insert these into my database.
My issue here (of course) is that my for-loop runs and increments i before I finish my request and database insertion.
for(var i = 0; i <= 1 ; i++){
apiRequest = data[i];
apicall(apiRequest);
}
function apicall(urlApi){
request((urlApi), function(error, response, body){
if(error){
console.log("error");
} else if(!error && response.statusCode == 200){
var myobj = JSON.parse(body);
dbInsert(myobj);
}
});
}
function dbInsert(obj) {
//insert into database
}
If someone else would come by this question I can truly recommend this blogpost which I found after reading the response by joshvermaire:
http://www.sebastianseilund.com/nodejs-async-in-practice
There are a number of ways to approach this type of problem. Firstly, if you can run all the API calls in parallel (all in flight at the same time) and it doesn't matter what order they are inserted in your database, then you can get a result a lot faster by doing that (vs. serializing them in order).
In all the options below, you would use this code:
const rp = require('request-promise');
function apicall(urlApi){
return rp({url: urlApi, json: true}).then(function(obj){
return dbInsert(obj);
});
}
function dbInsert(obj) {
//insert into database
// return a promise that resolves when the database insertion is done
}
Parallel Using ES6 Standard Promises
let promises = [];
for (let i = 0; i <= data.length; i++) {
promises.push(apicall(data[i]));
}
Promise.all(promises).then(() => {
// all done here
}).catch(err => {
// error here
});
Parallel using Bluebird Promise Library
With the Bluebird Promise library, you can use Promise.map() to iterate your array and you can pass it the concurrency option to control how many async calls are in flight at the same time which might keep from overwhelming either the database or the target API host and might help control max memory usage.
Promise.map(data, apiCall, {concurrency: 10}).then(() => {
// all done here
}).catch(err => {
// error here
});
In Series using Standard ES6 Promises
If you have to serialize them for some reason such as inserting into the database in order, then you can do that like this. The .reduce() pattern shown below is a classic way to serialize promise operations on an array using standard ES6:
data.reduce(data, (p, item) => {
return p.then(() => {
return apicall(item);
});
}, Promise.resolve()).then(() => {
// all done here
}).catch(err => {
// error here
});
In Series Using Bluebird Promises
Bluebird has a Promise.mapSeries() that iterates an array in series, calling a function that returns a promise on each item in the array which is a little simpler than doing it manually.
Promise.mapSeries(data, apiCall).then(() => {
// all done here
}).catch(err => {
// error here
});
I'd recommend using something like async.each. Then you could do:
async.each(data, function(apiRequest, cb) {
apicall(apiRequest, cb);
}, function(err) {
// do something after all api requests have been made
});
function apicall(urlApi, cb){
request((urlApi), function(error, response, body){
if(error){
console.log("error");
cb(error);
} else if(!error && response.statusCode == 200){
var myobj = JSON.parse(body);
dbInsert(myobj, cb);
}
});
}
function dbInsert(obj, cb) {
doDBInsert(obj, cb);
}
When the dbInsert method completes, make sure the cb callback is called. If you need to do this in a series, look at async.eachSeries.

Function with async request in Node js

I have a loop, which iterates over array and in every iteration I have to do a http request, like this:
var httpsRequest = require('request')
var getData = function(id) {
var result;
httpsRequest({
url: 'https://link/'+id,
}, (error, resp, body) => {
if(resp.statusCode == 200) {
result = JSON.parse(body);
}
});
//here I would like to wait for a result
}
var data = [];
for(row in rows) {
data.push(getData(row.ID))
}
resp.send(JSON.stringify(data)) //I send data back to the client
I cannot do the rest of the for loop in callback, I have to wait for a result which will be returned from a function getData and move to the next iteration.
How to handle this?
PS I know I could use callback function but what if after the last iteration program will send the response (last line above) before the last getData execution finish?
Regards
As stated in the answer by Johannes, the use of promises is a good idea. Since you're using request I'd like to propose an alternative method by using request-promise which is a promisified version of 'request' using bluebird.
The requests will in this case return a promise, and by using .map() you can create an array of promises that you can await using Promise.all(). When all promises are resolved, the response can be sent! This also differs from the use of .reduce(), which only will start to execute the next request as soon as the previous one is done. By using an array of promises, you can start all the requests at the same time.
var httpsRequest = require('request-promise')
var getData = function(id) {
return httpsRequest({
url: 'https://link/' + id,
}, (error, resp, body) => {
if(resp.statusCode == 200) {
return JSON.parse(body);
} else {
//Throw error, this will be caught in the .catch()
throw error;
}
});
}
var promises = rows.map(function(row){
return getData(row.ID)
});
Promise.all(promises)
.then(function(results){
//All requests are done!
//The variable results will be an array of all the results in the same order as they were requested
resp.send(JSON.stringify(results));
})
.catch(function(error){
//Handle the error thrown in the 'getData' function
});
If you need to wait for each iteration to be done before starting another one, you can use Promises and reduce. If you only want to wait for all requests to be finished it's better to use map + Promise.all as explained in Daniel Bs answer.
// i asume rows is an array as you wrote you iterate over one.
const results = [];
rows.reduce((previous, row) => {
return previous.then(() => getData(row.ID).then(result => results.push(result)) // do whatever you want with the result
);
}, Promise.resolve())
.then(() => resp.send(JSON.stringify(results)));
const getData = (id) => {
return new Promise((resolve, reject)=> {
httpsRequest({
url: 'https://link/'+id,
}, (error, resp, body) => {
if(error) return reject(error);
if(resp.statusCode == 200) {
return resolve(JSON.parse(body));
}
return resolve(); // if you want to pass non 200 through. You may want to do sth different here
});
});
};

Resources