Node.js async - build object from loop, then do something with object - node.js

I'm trying to run a function and once that function is complete, then run another function. The first function reads a CSV file, makes a GET request, and builds an object. The second function uses that newly created object to create a new CSV file.
The problem I'm having is that the new CSV file is being created prior to the GET requests finishing.
I'm using async.parallel to set the flow, but not able to get the logic right.
I'd love to know what I'm doing wrong and better understand how node thinks about these tasks.
// Require
var request = require('request');
var fs = require('fs');
var json2csv = require('json2csv');
var csv = require('csv');
var async = require('async');
// Params
var emailHunter_apiKey = '0000';
var emails = [];
var fields = ['email'];
var i = 0;
// Start
async.parallel([
function(callback){
setTimeout(function(){
var file = fs.readFileSync('file.csv');
csv.parse(file, {delimiter: ','}, function (err, data) {
for (var key in data) {
if (i < 5) {
if (data.hasOwnProperty(key)) {
var h = data[key];
if (h[5] != '') {
var url = h[5];
url = url.replace('//', '');
url = url.replace('www.', '');
request('https://api.emailhunter.co/v1/search?domain=' + url + '&api_key=' + emailHunter_apiKey + '', function (error, response, body) {
if (!error && response.statusCode == 200) {
var json = JSON.parse(body);
for (var subObj in json) {
if (json.hasOwnProperty(subObj) && subObj == 'emails') {
var emailObj = json[subObj];
for (var key in emailObj) {
var email = {
'email': emailObj[key]['value']
};
emails.push(email);
}
}
}
}
});
}
}
}
i++;
}
});
callback(null, emails);
}, 200);
console.log(emails);
}
],
function(err, results){
json2csv({data: results, fields: fields}, function (err, csv) {
if (err) console.log(err);
fs.writeFile('export.csv', csv, function (err) {
if (err) throw err;
console.log('file saved');
});
});
console.log(results);
});

As laggingreflex mentioned, you're using async incorrectly.
First you should build a an array of functions that you want to execute in parallel. And then use async to execute them.
Furthermore, your callback was getting executed immediately because csv.parse() is an async function. Therefore node fires it immediately and then executes callback(). You need to move the callback inside of parse().
Try this...
// Params
var emailHunter_apiKey = '0000';
var emails = [];
var fields = ['email'];
var i = 0;
var functionsToRunAsync = [];
var file = fs.readFileSync('file.csv');
csv.parse(file, {delimiter: ','}, function (err, data) {
for (var key in data) {
if (i < 5) {
if (data.hasOwnProperty(key)) {
var h = data[key];
if (h[5] != '') {
var url = h[5];
url = url.replace('//', '');
url = url.replace('www.', '');
// add a new function to an array, to be executed later
functionsToRunAsync.push(function(callback) {
request('https://api.emailhunter.co/v1/search?domain=' + url + '&api_key=' + emailHunter_apiKey + '', function (error, response, body) {
if (!error && response.statusCode == 200) {
var json = JSON.parse(body);
for (var subObj in json) {
if (json.hasOwnProperty(subObj) && subObj == 'emails') {
var emailObj = json[subObj];
for (var key in emailObj) {
var email = {
'email': emailObj[key]['value']
};
emails.push(email);
// callback to tell async this function is complete
callback()
}
}
}
} else {
// callback to tell async this function is complete
callback
}
});
});
}
}
}
i++;
}
// now that we have all of the functions in an array, we run them in parallel
async.parallel(
functionsToRunAsync,
function(err, results) { // all async functions complete
json2csv({data: results, fields: fields}, function (err, csv) {
if (err) console.log(err);
fs.writeFile('export.csv', csv, function (err) {
if (err) throw err;
console.log('file saved');
});
});
console.log(results);
});
});

Related

Modify the value of a variable outside callback with the callback inside loop

I am new to Nodejs and I am facing with a problem: Modify the value of a variable outside callback with the callback inside a loop.
I am coding online-judgle project, this is my function to check output of a program with answer from database. I created result object to store amount of correct testcase.
function compareResult(fileName, problem, timeLimit, callback) {
const cp = require('child_process');
const exePath = 'submit\\' + fileName + '.exe';
const child = cp.spawn(exePath, ['--from=markdown', '--to=html'], {timeout: timeLimit});
MongoClient.connect(uri, function(err, db) {
if (err) throw err;
var dbo = db.db(dbName);
var query = { id_problem: problem, is_eg: "false" };
var proj = { projection: {input: 1, output: 1} };
dbo.collection("sample").find(query, proj).toArray(function(err, arr) {
if (err) throw err;
if (arr != null) {
var result = {
correct: 0,
total: arr.length
};
for (const json of arr) {
const answer = json['output'];
child.stdin.write(json['input']);
child.stdout.on('data', function(data) {
if (data == answer) {
result.correct += 1; // I want to modify result object here.
}
});
child.stdin.end();
};
console.log(result);
callback(result);
}
});
});
I want to modify result object in that place. How will I do it?
function compareResult(fileName, problem, timeLimit, callback) {
const cp = require('child_process');
const exePath = 'submit\\' + fileName + '.exe';
const child = cp.spawn(exePath, ['--from=markdown', '--to=html'], {timeout: timeLimit});
MongoClient.connect(uri, function(err, db) {
if (err) throw err;
var dbo = db.db(dbName);
var query = { id_problem: problem, is_eg: "false" };
var proj = { projection: {input: 1, output: 1} };
dbo.collection("sample").find(query, proj).toArray(function(err, arr) {
if (err) throw err;
if (arr != null) {
var result = {
correct: 0,
total: arr.length
};
for (const json of arr) {
const answer = json['output'];
child.stdin.write(json['input']);
child.stdout.on('data', function(data) {
if (data == answer) {
result.correct += 1;
}
// Decrement total here to track how many 'data' events have been emitted
result.total--;
if (result.total === 0) {
// All 'data' events have been emitted, so call the callback function
callback(result);
}
});
child.stdin.end();
};
}
});
});
}

How to get code to execute in order in node.js

I am trying to finish my script, but for some reason i don't know, it refuses to execute in the order i put it in.
I've tried placing a 'wait' function between the JoinRequest update function and the following code, but when run, it acts as if the function call and wait function were the other way round, countering the point of the wait().
const Roblox = require('noblox.js')
var fs = require('fs');
var joinRequests = []
...
function wait(ms) {
var d = new Date();
var d2 = null;
do { d2 = new Date(); }
while(d2-d < ms*1000);
};
...
function updateJReqs() {
Roblox.getJoinRequests(4745601).then((array) => {
var i;
var final = [];
for(i = 0; i < array.length; i++) {
final.push(array[i].username);
};
if(final === '') {
final = '-None';
};
joinRequests = final
console.log('Updated join requests.')
});
}
function check() {
setTimeout(() => {
fs.readFile('Request.txt',encoding = 'utf-8', function(err, data) {
if (err) {
check();
} else {
updateJReqs(); //for some reason this function is executed alongside the below, not before it.
// Tried putting wait(x) in here.
console.log('Request received: ' + data)
var solution = joinRequests
console.log('Fuffiling request with ' + solution)
fufillRequest(solution)
fs.unlink('Request.txt', function(err) {
if(err) throw err;
});
check();
}
});
}, 400)
}
check();
The script is supposed to wait until a file is created (accomplished), update the list of join requests (accomplished) and then create a new file with the list of join requests in(not accomplished).
if I understand your code you work with async code, you need to return a promise in updateJReqs and add a condition of leaving from the function because you have an infinite recursion
function updateJReqs() {
return new Promise(resolve => {
Roblox.getJoinRequests(4745601).then((array) => {
var i;
var final = [];
for(i = 0; i < array.length; i++) {
final.push(array[i].username);
};
if(final === '') {
final = '-None';
};
joinRequests = final
console.log('Updated join requests.')
resolve();
});
}
}
async function check() {
setTimeout(() => {
fs.readFile('Request.txt',encoding = 'utf-8', function(err, data) {
if (err) {
await check();
} else {
await updateJReqs();
// Tried putting wait(x) in here.
console.log('Request received: ' + data)
var solution = joinRequests
console.log('Fuffiling request with ' + solution)
fufillRequest(solution)
fs.unlink('Request.txt', function(err) {
if(err) throw err;
});
// you dont have an exit from your function check();
return 'Success';
}
});
}, 400)
}
check().then(res => console.log(res));

Run two AWS methods in the same function of AWS Lambda code

I am creating an AWS Lambda script to retrieve information of my AWS account. At present I am having an issue in the function 3, i want to retrieve the users that have a policy, in this case i run the method to retrieve the list users and then run the method to retrieve the policy based on the users input and send it to an array to the callback. The issue is that the callback is empty. I have tested by console.log(userpolicies); and it is ok in the second method but when it out of the second method the userpolicies is in blank.
I understand that the issue is because i have two aws method in the same function but I don't how to solve.
Do you have any suggestions?
Thanks in advance.
var AWS = require('aws-sdk');
var async = require('async');
var stackoverflow = (event, context, callback) => {
var iam = new AWS.IAM();
async.series([
//function1
function(callback) {
iam.listGroups(function(err, data) {
if (err) {
callback(err);
} else
callback(null, data);
});
},
//function2
function(callback) {
iam.listPolicies(function(err, data) {
if (err) {
callback(err);
} else
callback(null, data);
});
},
//function3
function(callback) {
var userpolicies = [];
iam.listUsers(function(err,data) {
if (err)
console.log(err);
else
for (var i = 0; i < data.Users.length; i++) {
var params = {UserName: data.Users[i].UserName};
iam.listAttachedUserPolicies(params, function(err1, data1){
if (err1)
console.log(err1);
else
for (var j = 0; j < data1.AttachedPolicies.length; j ++) {
var infopolicies = '{"PolicyName":"'+data1.AttachedPolicies[j].PolicyName+'"}';
var infopoliciesjson = JSON.parse(infopolicies);
userpolicies.push(infopoliciesjson);
}
});
}
callback(null, userpolicies);
});
}
], callback);
};
exports.handler = stackoverflow;

Using npm async to run in parallel async methods and return a unified response

I have 2 async methods that can run independently one from each other. I would like to call a callback once both are finished. I have tried using async.parallel() (npm) but this seems to be for non async methods. How can I implement this?
Here is my async.parallel call(); note that asyncTasks is my function array, where the functions are async.
async.parallel(asyncTasks, function(resultFinal){
console.log("--------->>>>> message: "+JSON.stringify(resultFinal));
console.log("");
callback(new RequestResponse(true, resultFinal));
});
In short, what I really want is a way to execute multiple async methods in parallel and consider that method finished when the callback provided for that function is triggered.
UPDATE
for a better understanding, I've included the two functions I am using
getGroups
var getGroups = function (callback_async_1) { //get groups + members
pg.connect(datebasePath, function (err, client, done) {
var s = squel.select();
s.from("groups_members");
s.where("user_id = ?", userId);
console.log("query: " + s.toString());
client.query(s.toString(), function (err, result) { //get groups ids in which i am a member
if (err) {
console.error("error...1 " + err);
callback_async_1(responseData);
} else {
// console.log("init -- get from group_members " + JSON.stringify(result.rows));
var groupIds = [];
if (result.rows.length > 0) {
for (var i = 0; i < result.rows.length; i++) {
groupIds.push(result.rows[i].group_id); // create group ids list
}
// console.log("group ids : " + groupIds);
}
if (groupIds.length === 0) {
callback_async_1(responseData);
}
var s = squel.select();
s.from("groups");
s.where("id IN ?", groupIds);
client.query(s.toString(), function (err, result2) { // retrieve all the groups in which i take part
if (err) {
console.error("error...2 " + err);
callback_async_1(responseData);
return;
} else {
// var groupIds2 = [];
// console.log("init -- get from groups " + JSON.stringify(result2.rows));
var groups = [];
// var groups_members = [];
for (var i = 0; i < result2.rows.length; i++) {
groups.push(result2.rows[i]); // adding group info to list
// var groupId = result2.rows[i].id;
// groupIds2.push(groupId);
}
// console.log("");
//console.log(" ------->>>> " + JSON.stringify(groups));
// console.log("");
// responseData.groups = groups;
responseData.push({ //pushing groups into response
"groups": groups
});
var s = squel.select();
s.from("groups_members");
s.where("group_id IN ?", groupIds);
client.query(s.toString(), function (err, result3) { // get all the members in my groups
//console.log("get from group_members --- " + JSON.stringify(result3.rows));
var groupMembers = [];
for (var i = 0; i < result3.rows.length; i++) {
groupMembers.push({
groupMember: result3.rows[i] // pushing all the group members
});
}
//console.log("");
// console.log(" ------->>>> " + JSON.stringify(groupMembers));
// console.log("");
responseData.push({
"groupsMembers": groupMembers
});
// console.log("resulting json till now; Groups : " + JSON.stringify(responseData));
//fetching now events
var s = squel.select();
s.from("events");
s.where("group_id IN ?", groupIds);
client.query(s.toString(), function (err, result4) { //selecting all events that have my groups
if (err) {
console.error("error...3 " + err);
callback_async_1(responseData);
return;
} else {
var events = [];
for (var i = 0; i < result4.rows.length; i++) {
events.push(result4.rows[i]);
}
// responseData.events = events;
responseData.push({
"events": events
});
//responseData.push (events);
callback_async_1(responseData);
// asyncTasks[1](callback);
}
});
});
}
});
}
});
done();
});
};
getRegisteredContacts
var getRegisteredContacts = function (callback_async_2) { // get registered contacts
pg.connect(datebasePath, function (err, client, done) {
//get contacts that are registered
var s = squel.select();
s.from("users");
s.where("phone_number IN ?", arrayOfContacts);
client.query(s.toString(), function (err, result5) { // retriving registered contacts -- should be run with async parallel, it does not depend on the other calls
if (err) {
console.error(err);
callback_async_2(responseData);
} else {
if (result5.rows.length > 0) {
var contacts = [];
for (var i = 0; i < result5.rows.length; i++) {
contacts.push(result5.rows[i]);
}
responseData.push({
"registeredContacts": contacts
});
}
//console.log("");
//console.log(" ------->>>> " + JSON.stringify(events));
// console.log("");
// console.log("final ---> " + JSON.stringify(responseData));
callback_async_2(responseData);
}
});
done();
});
};
You need your task function to take a parameter which you then call when the task is done
var task = function(callback){
console.log('Task');
callback(null);
};
When you are then doing something async within the task then your task would look like
var task = function(callback){
console.log('Task');
request.get('http://www.google.com', function (error, response, body){
console.log('Task - ' + response.statusCode);
callback(null);
});
};
Example
var async = require('async');
var request = require('request');
var task1 = function(callback){
console.log('Task 1');
callback(null);
};
var task2 = function(callback){
console.log('Task 2');
request.get('http://www.google.com', function (error, response, body){
console.log('Task 2 - ' + response.statusCode);
callback(null);
});
};
var asyncTasks = [task1, task2];
async.parallel(asyncTasks, function(err, result){
console.log('--DONE--');
});
Outputs
Task 1
Task 2
Task 2 - 200
--DONE--
Based on your new code listing the most obvious thing is done() is called too early for both of your tasks. It needs to be like
var getRegisteredContacts = function (callback_async_2) {
pg.connect(datebasePath, function (err, client, done) {
var s = squel.select();
s.from("users");
s.where("phone_number IN ?", arrayOfContacts);
client.query(s.toString(), function (err, result5) {
done(); // <---- done() to be here
if (err) {
//
} else {
//
}
callback_async_2();
});
});
};
You should also lint your code. If you had you would have noticed that you had not checked if there was an err for callback pg.connect (and also keep it nicer to read correctly)

NodeJS: Functions proceeding before previous function returns

I'm getting started with NodeJS with a script (not a webapp), and this is baffling to me. I have the following simplified functions:
var request = require('request');
function first_func(name) {
console.log('first_func(' + name + ')');
var url = 'http://...' + name;
var answers = new Array();
request(url, function(error, resp, html) {
...
answers.push(x);
});
return answers;
}
function second_func(answers) {
var results = new Array();
for (var a in answers) {
var url = 'http://...' + a;
request(url, function(error, resp, html) {
...
results.push(x);
});
}
return results;
}
first_func() scraps a directory page, and second_func() scraps the particular page for that entry. The script runs like this at the end of the script:
var names = ['a', 'b', ...];
function main() {
for (var n in names) {
var ans = first_func(names[n]);
console.log('answers: ' + ans);
for (var a in ans) {
second_func(ans[a]);
}
}
}
main();
For some reason NodeJS is doing first_func() and second_func() in parallel. Why isn't it waiting for first_func() to complete before running second_func()? The output is:
answers: undefined
first_func(a)
first_func(b)
...
Why is NodeJS jumping into that inner for loop in main() before waiting for first_func() to return?
You can use the async module to work around this. It would look something like the following:
var request = require('request');
var async = require('async');
function first_func(name, done) {
console.log('first_func(' + name + ')');
var url = 'http://...' + name;
request(url, function(error, resp, html) {
...
done(answers);
});
}
function second_func(answers, done) {
var results = new Array();
async.each(answers, function(answer, next) {
var url = 'http://...' + a;
request(url, function(error, resp, html) {
...
results.push(x);
next();
});
}, function(err) {
done(results);
});
}
var names = ['a', 'b', ...];
function main() {
async.each(names, function(name, next) {
first_func(name, function(err, answers) {
second_func(answers, function(results) {
// do something wit results
next();
});
});
}, function(err) {
// done will everything
});
}
main();
You basically call a function when you're done getting the values you need instead of returning them. The async module will let you iterate over arrays in an async friendly way.

Resources