Exit Node Process After Successful fs.appendFile - node.js

I'm having trouble create processes in parallel with Node while exiting when they're done with a simple HTTP GET request. I've noticed that if I fire a process.exit() inside of a callback for appendFile, some files will not be created or appended in a Node cluster setup. Ideally, the way below is how I would like to fire events since the process is exited as soon as the job is done:
var rp = require("request-promise");
config = require("./config"),
cluster = require("cluster"),
os = require("os"),
fs = require("fs");
var keywordArray = [
'keyword1',
'keyword2',
...
];
if (cluster.isMaster) {
var numCPUs = os.cpus().length;
var clusterDivision = Math.ceil(keywordArray.length/numCPUs);
// Reset the json if previously set
keywordArray.forEach(function(arrayItem) {
fs.unlink(config.dataDirectory + arrayItem + '.json', function(err) {
if (err) console.error(err);
console.log('successfully unlinked ' + arrayItem + '.json from ' + config.dataDirectory);
});
});
// Create a worker for each CPU
// Seperate the array out evenly for each worker
for (var j=1;j<=numCPUs;j++) {
var tempArray = [];
var removed = keywordArray.splice(0, clusterDivision);
if (removed.length > 0) {
// The array contains something so let's do something with the keyword
console.log('creating a worker');
cluster.fork().send(removed);
} else {
// We don't need a cluster here
}
}
process.on('exit', function() {
console.log('exited');
});
} else if (cluster.isWorker) {
// Code to run if we're in a worker process
// Send the object we created above from variables so they're available to the workers
process.on('message', function(seperatedArrayItem) {
seperatedArrayItem.forEach(function(arrayItem) {
function radarRequest(err, response, body) {
var responseBody = JSON.parse(body);
console.log(arrayItem);
fs.appendFileSync(config.dataDirectory + arrayItem + '.json', JSON.stringify(responseBody.results, null, '\t'), function (err) {
if (err) console.err(err);
console.log('success writing file');
});
}
rp({
url: config.radarSearchURI +
'?key='+ config.apiKey +
'&location=' + config.latitude + ',' + config.longitude +
'&radius=' + config.searchRadius +
'&keyword=' + arrayItem, headers: config.headers
}, radarRequest);
});
setTimeout(function() {
process.exit(0);
}, 5000);
});
}
The only way I can make sure all files are properly appended is by using a Timeout, which is exactly what I don't want to - and shouldn't - do. Is there another way I can ensure an appendFile has happened successfully and then kill the node process? Here's a way that works (assuming the process doesn't take longer than 5 seconds):
process.on('message', function(seperatedArrayItem) {
seperatedArrayItem.forEach(function(arrayItem) {
function radarRequest(err, response, body) {
var responseBody = JSON.parse(body);
console.log(arrayItem);
fs.appendFile(config.dataDirectory + arrayItem + '.json', JSON.stringify(responseBody.results, null, '\t'), function (err) {
if (err) console.err(err)
console.log('success writing file');
});
}
rp({
url: config.radarSearchURI +
'?key='+ config.apiKey +
'&location=' + config.latitude + ',' + config.longitude +
'&radius=' + config.searchRadius +
'&keyword=' + arrayItem, headers: config.headers
}, radarRequest);
});
setTimeout(function() {
process.exit(0);
}, 5000);
});

You can use an async flow control module like async to kill the process after all files are written. I'd also recomment cluster.worker.disconnect() so that the node process will simple exit gracefully, but that isn't a requirement.
async.forEach(seperatedArrayItem, function(item, done){
// append file and call 'done' when it is written.
}, function(){
// Will be called when all item 'done' functions have been called.
cluster.worker.disconnect();
});

Node fs.appendFile( ... ) is an asynchronous function. So it expects us to pass a callback for we know it has finished its main operation, to inform us of some error occurred, or another purpose.
This means we need to call Node process.exit( ... ) in the scope of the provided callback. I've written this code to test:
'use strict';
var fs = require('fs');
function jsonValue(obj) {
return JSON.stringify(obj, null, '\t');
}
fs.appendFile('file.json', jsonValue(['t', 'e', 's', 't']), function(error) {
if (error) {
throw error;
}
console.log('success writing file'); // no error, so log...
process.exit(); // and exit right now
console.log('exited?'); // this will not be printed
});
Well, it worked as defined.
Other way it works is to use the synchronous version of fs.appendFile( ... ) and call process.exit() in a sequential way:
fs.appendFileSync('file.json', jsonValue(['t', 'e', 's', 't']));
console.log('success writing file'); // no error (I hope so =), so log...
process.exit(); // and exit right now
console.log('exited?'); // this will not be printed
This is clean code and works, but you lose the robustness and convenience gained with the callback...

Related

Node.js function not running in order. Error: Unhandled stream error in pipe

I updated the function to create the CSV file but now I'm getting an error:
In upload function
internal/streams/legacy.js:57
throw er; // Unhandled stream error in pipe.
^
Error: ENOENT: no such file or directory, open 'C:\Users\shiv\WebstormProjects\slackAPIProject\billingData\CSV\1548963844106output.csv'
var csvFilePath = '';
var JSONFilePath = '';
function sendBillingData(){
var message = '';
axios.get(url, {
params: {
token: myToken
}
}).then(function (response) {
message = response.data;
fields = billingDataFields;
// saveFiles(message, fields, 'billingData/');
saveFilesNew(message, fields, 'billingData/');
var file = fs.createReadStream(__dirname + '/' + csvFilePath); // <--make sure this path is correct
console.log(__dirname + '/' + csvFilePath);
uploadFile(file);
})
.catch(function (error) {
console.log(error);
});
}
The saveFilesNew function is:
function saveFilesNew(message, options, folder){
try {
const passedData = message;
var relevantData='';
if (folder == 'accessLogs/'){
const loginsJSON = message.logins;
relevantData = loginsJSON;
console.log(loginsJSON);
}
if(folder == 'billingData/'){
relevantData = passedData.members;
const profile = passedData.members[0].profile;
}
//Save JSON to the output folder
var date = Date.now();
var directoryPath = folder + 'JSON/' + date + "output";
JSONFilePath = directoryPath + '.json';
fs.writeFileSync(JSONFilePath, JSON.stringify(message, null, 4), function(err) {
if (err) {
console.log(err);
}
});
//parse JSON onto the CSV
const json2csvParser = new Json2csvParser({ fields });
const csv = json2csvParser.parse(relevantData);
// console.log(csv);
//function to process the CSV onto the file
var directoryPath = folder + 'CSV/' + date + "output";
csvFilePath = directoryPath + '.csv';
let data = [];
let columns = {
real_name: 'real_name',
display_name: 'display_name',
email: 'email',
account_type: 'account_type'
};
var id = passedData.members[0].real_name;
console.log(id);
console.log("messageLength is" +Object.keys(message.members).length);
for (var i = 0; i < Object.keys(message.members).length; i++) {
console.log("value of i is" + i);
var display_name = passedData.members[i].profile.display_name;
var real_name = passedData.members[i].profile.real_name_normalized;
var email = passedData.members[i].profile.email;
var account_type = 'undefined';
console.log("name: " + real_name);
if(passedData.members[i].is_owner){
account_type = 'Org Owner';
}
else if(passedData.members[i].is_admin){
account_type = 'Org Admin';
}
else if(passedData.members[i].is_bot){
account_type = 'Bot'
}
else account_type = 'User';
data.push([real_name, display_name, email, account_type]);
}
console.log(data);
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err;
fs.writeFileSync(csvFilePath, output, function(err) {
console.log(output);
if (err) {
console.log(err);
}
console.log('my.csv saved.');
});
});
} catch (err) {
console.error(err);
}
}
The upload file function is:
function uploadFile(file){
console.log("In upload function");
const form = new FormData();
form.append('token', botToken);
form.append('channels', 'testing');
form.append('file', file);
axios.post('https://slack.com/api/files.upload', form, {
headers: form.getHeaders()
}).then(function (response) {
var serverMessage = response.data;
console.log(serverMessage);
});
}
So I think the error is getting caused because node is trying to upload the file before its being created. I feel like this has something to do with the asynchronous nature of Node.js but I fail to comprehend how to rectify the code. Please let me know how to correct this and mention any improvements to the code structure/design too.
Thanks!
You don't wait for the callback provided to stringify to be executed, and it's where you create the file. (Assuming this stringify function really does acccept a callback.)
Using callbacks (you can make this cleaner with promises and these neat async/await controls, but let's just stick to callbacks here), it should be more like:
function sendBillingData() {
...
// this callback we'll use to know when the file writing is done, and to get the file path
saveFilesNew(message, fields, 'billingData/', function(err, csvFilePathArgument) {
// this we will execute when saveFilesNew calls it, not when saveFilesNew returns, see below
uploadFile(fs.createReadStream(__dirname + '/' + csvFilePathArgument))
});
}
// let's name this callback... "callback".
function saveFilesNew(message, options, folder, callback) {
...
var csvFilePath = ...; // local variable only instead of your global
...
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err; // or return callbcack(err);
fs.writeFile(csvFilePath , output, function(err) { // NOT writeFileSync, or no callback needed
console.log(output);
if (err) {
console.log(err);
// callback(err); may be a useful approach for error-handling at a higher level
}
console.log('my.csv saved.'); // yes, NOW the CSV is saved, not before this executes! Hence:
callback(null, csvFilePath); // no error, clean process, pass the file path
});
});
console.log("This line is executed before stringify's callback is called!");
return; // implicitly, yes, yet still synchronous and that's why your version crashes
}
Using callbacks that are called only when the expected events happen (a file is done writing, a buffer/string is done transforming...) allows JS to keep executing code in the meantime. And it does keep executing code, so when you need data from an async code, you need to tell JS you need it done before executing your piece.
Also, since you can pass data when calling back (it's just a function), here I could avoid relying on a global csvFilePath. Using higher level variables makes things monolithic, like you could not transfer saveFilesNew to a dedicated file where you keep your toolkit of file-related functions.
Finally, if your global process is like:
function aDayAtTheOffice() {
sendBillingData();
getCoffee();
}
then you don't need to wait for the billing data to be processed before starting making coffee. However, if your boss told you that you could NOT get a coffee until the billing data was settled, then your process would look like:
function aDayAtTheOffice() {
sendBillingData(function (err) {
// if (err) let's do nothing here: you wanted a coffee anyway, right?
getCoffee();
});
}
(Note that callbacks having potential error as first arg and data as second arg is a convention, nothing mandatory.)
IMHO you should read about scope (the argument callback could be accessed at a time where the call to saveFilesNew was already done and forgotten!), and about the asynchronous nature of No... JavaScript. ;) (Sorry, probably not the best links but they contain the meaningful keywords, and then Google is your buddy, your friend, your Big Brother.)

Node child process exec async only called once

I have an array with some IDs like a = [abc,cde,efg]. I pass this array and a second one containing the names with fit to the IDs to a child process in Node.
I want to wait until the child process finished and processes the next array member after that.
The code to achieve it is (with the suggested Edit):
function downloadSentinel(promObj) {
return new Promise((resolve,reject) => {
function makeRequest(url, i, callback) {
url = promObj.ID;
name = promObj.Name;
var sys = require('util'),
exec = require('child_process').exec,
child;
var directory = __dirname.substring(0, __dirname.indexOf("\\app_api"));
console.log(directory);
child = exec(directory + '\\downloadProducts.sh' + promObj.ID[i] + ' ' + promObj.Name[i], function (error, stdout, stderr) {
child.on("error", function (error) {
console.log(error);
})
child.stdout.on('data', function (data) {
console.log(data.toString());
});
child.on('exit', function(exit) {
console.log(exit);
callback();
})
})
}
async.eachOfLimit(promObj.requestURLS, 2, makeRequest, function (err) {
if (err) reject(promObj)
else {
resolve(promObj);
}
});
});
}
I am using npm-async to control the concurrency flow because I want to limit the curl requests I do inside the shell script. The shell sript works without an error. Now the script is only called twice because of the async.eachOfLimit limit. Then the other array IDs are not processed anymore.
EDIT
This is the code I tried at last, but then all possible urls are evoked instead of only 2. I found this here Another Stackoverflow question. I also tried with async.timesLimit
function downloadSentinel(promObj,req,res) {
async.eachOfLimit(promObj.requestURLS, 2,function (value,i,callback) {
console.log('I am here ' + i + 'times');
url = promObj.requestURLS;
name = promObj.Name;
console.log(promObj.requestURLS);
var sys = require('util'),
exec = require('child_process').exec,
child;
var directory = __dirname.substring(0, __dirname.indexOf("\\app_api"));
console.log(directory);
console.log("executing:", directory + '\\downloadProducts.sh ' + promObj.requestURLS[i] + ' ' + promObj.Name[i]);
child = exec(directory + '\\downloadProducts.sh' + ' ' + promObj.requestURLS[i] + ' ' + promObj.Name[i], function (error, stdout, stderr) {
if (error != null){
console.log(error);
}
// this task is resolved
return callback(error, stdout);
});
}, function (err) {
if (err) console.log('error')
else {
console.log('Done');
}
});
}
What have I missed? Thanks in advance.
exec is asynchronous, it does not wait for the launched process to finish.
Move the call to callback() inside the child.on('exit' ...) handler or use execSync

Nodejs, How to copy several file in nodejs without crash

I tried to copy several file with node js.
Here is my an example of what i'm trying to do :
var request = require('request');
va photos [{ 'url': 'http://xxxx.com/im1', 'name' : 'name1' }, { 'url': 'http://xxxx.com/im12', 'name' : 'name2' },
for (var i = 0; i < photos.length; i++) {
request(photos[i].source).pipe(fs.createWriteStream(photos[i].name));
}
After maybe 1000 call i have a socket hang out error.
Following #Timothy Strimple advice i decided to use async module.
My code is now something like this :
async.whilst(function () { return !stop; },
function (callback) {
console.log("get next 20 image");
JM.api('/' + album.id + '/photos', { after: next }, function (resf) {
if (!resf || resf.error) {
console.log(!resf ? 'error occurred' : resf.error);
}
console.log("albums" + album.id + " " + resf.data.length + " dir" + dir);
async.eachSeries(resf.data, function (photo, done) {
request(photo.source).pipe(fs.createWriteStream(dir + "/" +photo.name));
console.log("copy of image " + img_basename);
}, function (err) {
if (err) {
console.log('An images failed to copy');
} else {
console.log('All 20 image have been copied successfully');
}
if (resf.paging && resf.paging.cursors) {
console.log("suite de l'album à venir");
next = resf.paging.cursors.after;
setTimeout(function () { callback(); }, 5000);
}
else {
console.log("Fin de l'album");
stop = true;
setTimeout(function () { callback(); }, 5000);
}
});
});
},
function (err) {
if (err) {
console.log('An images failed to process');
albumcallback();
} else {
console.log('All images in this group have been processed successfully');
albumcallback();
}
}
);// end while
I still having a crash after maybe 1 00 file copied. I'm sure that async.whilst and async.eachSeries are weel because my log show that each call is on series. But i have a crash. I temporary solved the proble by ading a wait after each copy like this :
request(photo.source).pipe(fs.createWriteStream(dir + "/" + img_basename));
console.log("copy of image " + img_basename);
setTimeout(function () { done(); }, 5000);
Is it a limit of request module ? How to change this fea line to make sure that each connection are closed before continung the program ?
You probably need to move to an asynchronous loop. Something like eachLimit from the async module would probably be ideal.
async.eachLimit(photos, 10, function(photo, done) {
var r = request(photos[i].source).pipe(fs.createWriteStream(photos[i].name));
r.on('finish', done);
}, function(err) {
// All images done or there was an error
});
Now it will process all the items in your photos list, but it will only process 10 of them concurrently. This will prevent it from spinning up hundreds or thousands of concurrent outgoing connections.
The request call and pipe call are asyncrhon. So i have to rewrite this line : request(photos[i].source).pipe(fs.createWriteStream(photos[i].name));
See here :
Downloading N number of remote files using Node.js synchronously

Node - how to wait on async operations?

Sorry, just starting with node. This might be a very novice question.
Let's say I have some code which reads some files from a directory in the file system:
var fs = require('fs');
fs.readdir(__dirname + '/myfiles', function (err, files) {
if (err) throw err;
files.forEach(function (fileName) {
fs.readFile(__dirname + '/myfiles/' + fileName, function (err, data) {
if (err) throw err;
console.log('finished reading file ' + fileName + ': ' + data);
module.exports.files.push(data);
});
});
});
Note that all of this occurs asynchronously. Let's also say I have a Mocha test which executes this code:
describe('fileProvider', function () {
describe('#files', function () {
it.only('files array not empty', function () {
assert(fileProvider.files.length > 0, 'files.length is zero');
});
});
});
The mocha test runs before the files are finished being read. I know this because I see the console.log statement after I see the little dot that indicates a mocha test being run (at least I think that is what is being indicated). Also, if I surround the assert with a setTimeout, the assert passes.
How should I structure my code so that I can ensure the async file operations are completed? Note that this is not just a problem with testing - I need the files to be loaded fully before I can do real work in my app as well.
I don't think the right answer is to read files synchronously, because that will block the Node request / response loop, right?
Bonus question:
Even if I put the assert in a setTimeout with a 0 timeout value, the test still passes. Is this because just putting it in a setTimeout kicks it to the end of the processing chain or something so the filesystem work finishes first?
You can implement a complete callback after all files have been read.
exports.files = [];
exports.initialize = initialize;
function initialize(callback) {
var fs = require('fs');
fs.readdir(__dirname + '/myfiles', function (err, files) {
if (err) throw err;
files.forEach(function (fileName) {
fs.readFile(__dirname + '/myfiles/' + fileName, function (err, data) {
if (err) throw err;
console.log('finished reading file ' + fileName + ': ' + data);
exports.files.push(data);
if (exports.files.length == files.length) {
callback();
}
});
});
}
You can call the file operation method by doing something like:
var f = require('./files.js');
if (f.files.length < 1) {
console.log('initializing');
f.initialize(function () {
console.log('After: ' + f.files.length);
var another = require('./files.js');
console.log('Another module: ' + another.files.length);
});
}
EDIT: Since you want to only have to call this once, you could initialize it once when the application loads. According to Node.js documentation, modules are cached after the first time they are loaded. The two above examples have been edited as well.
To avoid being caught up in nested callbacks. You might want to use async's each that will allow you to do the tasks asynchronously in a non-blocking manner:
https://github.com/caolan/async#each
I think that's a good test, the same thing would happen in any app that used your module, i.e. it's code could be run before files is set. What you need to do is create a callback like #making3 suggests, or use promises. I haven't used mocha, but there's a section on ascynchronous calls. You could export the promise itself:
module.exports.getFiles = new Promise((resolve, reject) => {
datas = [];
fs.readdir(__dirname + '/myfiles', function (err, files) {
if (err) {
reject(err);
return;
}
files.forEach(function (fileName) {
fs.readFile(__dirname + '/myfiles/' + fileName, function (err, data) {
if (err) {
reject(err);
return;
}
console.log('finished reading file ' + fileName + ': ' + data);
datas.push(data);
if (datas.length == files.length) {
resolve(datas);
}
});
});
});
}
chai-as-promissed lets you work directly with promises using eventually, or you can use the callback passed to your test I think:
describe('fileProvider', function () {
describe('#files', function () {
it.only('files array not empty', function (done) {
fileProvider.getFiles.then(function(value) {
assert(value.length > 0, 'files.length is zero');
done();
}, function(err) {
done(err);
})
});
});
});

node.js multiple SerialPorts

I am using the SerialPorts module for nodejs and need to be able to open, write and read from a variable number of serial ports.
So what I am doing is to first create an array object for the serialPort instances, and then process them in a loop:
var serialport = require("serialport");
var SerialPort = serialport.SerialPort; // localize object constructor
var devs = ["/dev/tty.SerialPort","/dev/tty.HHW-SPP-1800-2-DevB"];
var ports = [];
for (var i = 0; i < devs.length; i++) {
console.log(devs[i]);
var port = new SerialPort(devs[i],{ baudrate:9600, parser: serialport.parsers.readline("\n") });
ports.push(port);
}
Then I have another function that I call periodically to read / write from the ports:
function minute(){
for (var i = 0; i < ports.length; i++) {
console.log(i);
ports[i].on("open", function (path) {
console.log('opened');
ports[i].write("Helo World\n", function(err,res) {
if(err) console.log('err ' + err);
console.log('results ' + res);
});
ports[i].on("data", function (data) {
console.log("here: "+data);
});
});
}
}
The problem is the minute() function executes, however it does not attempt to open or read / write to the ports.
What am I doing wrong ?? and is there a better way of doing this ??
There are a couple misconceptions at play here.
Firstly, you don't need to periodically poll your ports. Nodejs uses an event loop (more or less), to handle IO, and will do the polling for you. So all you need to do is setup the callbacks for the open event, one time for each port. In your code, it looks like you are readding the callback each time minute() is being called. That is not necessary.
Secondly, javascript doesn't have block scoping for variables. Instead you are inadvertently creating a closure, and your code is in error. In this following block:
for (var i = 0; i < ports.length; i++) {
ports[i].on("open", function (path) {
ports[i].write("Helo World\n", function(err,res) {
if(err) console.log('err ' + err);
console.log('results ' + res);
});
ports[i].on("data", function (data) {
console.log("here: "+data);
});
});
}
When your callback for ports.on is invoked, the value of i in ports[i].write and ports[i].on("data") isn't the value of i when the callback is setup, as you are expecting. Instead, because you have created a closure, the value of i isn't bound(set) until the callback is executed. In this example, everyone of your callbacks, i will be set to ports.length, which was the last evaluated value for i
I've created a plunkr that illustrates the problem with your for loop.
One way to fix this problem is to use an anonymous method, and bind the value i to a new local variable. In the code below, (function(index){})(i); executes immediately, and binds the value index to the appropriate value of i.
ports[i].on("open", function (path) {
(function(index) {
ports[index].write("Helo World\n", function(err,res) {
if(err) console.log('err ' + err);
console.log('results ' + res);
});
ports[index].on("data", function (data) {
console.log("here: "+data);
});
})(i);
});
You could also instead pull that method out into a separate function. setupHandlers() executes immediately, and is bound to the proper port.
for (var i = 0; i < ports.length; i++) {
setupHandlers(ports[i]);
}
function setupHandlers(port) {
port.on("open", function (path) {
ports.write("Helo World\n", function(err,res) {
if(err) console.log('err ' + err);
console.log('results ' + res);
});
ports.on("data", function (data) {
console.log("here: "+data);
});
});
}

Resources