I use the module tough-cookie-filestore which saves cookies to a local file. When setting request to use this cookie jar, it requires that the file already exists on disk. As I use this cookie jar in multiple modules, I want to avoid a big block of code at the top of my modules which checks if the cookie file exists and if not creates it, and so I made a module initcookie.js that does this.
My question is, is this a safe and good way to do this? initcookie.init() makes sure that the file exists, but can I be sure that it is run before new FileCookieStore(config.cookiePath) is executed?
var initcookie = require('../initcookie.js').init()
var config = require('../config.js')
var FileCookieStore = require('tough-cookie-filestore')
var request = require('request')
var j = request.jar(new FileCookieStore(config.cookiePath))
request = request.defaults({ jar: j })
Where initcookie.js is:
var config = require('./config.js')
var fs = require('fs')
// Initialize cookie file which stores the login info
exports.init = function () {
try {
fs.openSync(config.cookiePath, 'r')
} catch (e) {
if (e.code === 'ENOENT') {
// File not found, so make one
fs.writeFileSync(config.cookiePath, '', { flags: 'wx' }, function (err) {
if (err) { throw (err) }
})
} else {
throw (e)
}
}
}
This way will work, but isn't the best way to do this. fs.writeFileSync and fs.openSync will make sure that your code executes synchronously, but it would be better to use async so you aren't holding up the thread. You could write
var config = require('./config.js')
var fs = require('fs')
// Initialize cookie file which stores the login info
exports.init = function () {
return new Promise(function (resolve, reject) {
try {
fs.openSync(config.cookiePath, 'r')
resolve()
} catch (e) {
if (e.code === 'ENOENT') {
// File not found, so make one
fs.writeFile(config.cookiePath, '', { flags: 'wx' }, function (err) {
if (err) { reject(err) }
else { resolve() }
})
} else {
reject(e)
}
}
}
}
This way you can use Promises in your other files and be sure cookiePath is created without holding up the thread.
The only issue I could see you running into would be if async leads you to run this function a bunch of times before it completes and so create a race condition. To avoid that I would only call this function once at the beginning of the program and put everything else in the success function of that promise.
Related
I am making a node.js application and part of my code requests for data from 193 different urls to download the json data from each url. Here is one of those urls: https://www.gemeentegeschiedenis.nl/gemeentenaam/json/Apeldoorn For the some the downloaded json data is fine and is complete. However towards the end, corruptions happen for some of the files. Part of the data becomes nullified and then there are some that have database errors. I think it has to do with requesting data from so many urls in a short amount of time (which is why I tried the "setTimeout" function (but that doesn't really work)).
function writeToFile(url) {
// get name to make each new file unique
var name = url.split("json/")[1];
var fileStream = fs.createWriteStream(`jsonFiles/${name}.json`);
var options = {
url: `${url}`,
method: 'GET',
headers: {
'Accept': 'application/json',
'Accept-Charset': 'utf-8',
json: true
}
}
//request the data from the site and download to the file.
request.get(options).pipe(fileStream);
}
function getMunicipalityGeoJsonData(req, res) {
//Get all the urls pointing to the JSON data for the province, Gelderland
getGelderlandJsonUrls((err, jsonUrls) => {
//for all those urls, write the data to files.
for (url of jsonUrls) {
console.log(url);
writeToFile(url);
}
})
}
function getGelderlandJsonUrls(callback) {
getMunicipalityJsonUrls("Gelderland", (err, data) => {
jsonUrls = data;
callback(null, jsonUrls);
});
}
function getMunicipalityJsonUrls(provinceName, callback) {
request({ uri: `https://www.gemeentegeschiedenis.nl/provincie/json/${provinceName}` }, (error, response, body) => {
body = JSON.parse(body);
// extracting each json URL from all the municipalities in Gelderland
var jsonUrls = [];
var numberMun = body.length;
for (var i = 0; i < numberMun; i++) {
var url = body[i].uri.naam;
var urlSplit = url.split("gemeentenaam");
var jsonUrl = urlSplit[0] + "gemeentenaam/json" + urlSplit[1];
jsonUrl = jsonUrl.replace("http://", "https://");
jsonUrls.push(jsonUrl);
}
callback(null, jsonUrls);
});
}
The last json data downloaded into the file as an html page with a database error from the url: https://www.gemeentegeschiedenis.nl/gemeentenaam/json/Zutphen which actually just took just under 6 seconds to load up looking at the network tab on Chrome
the 1812 has null for its properties when it should have a bunch of coordinates https://www.gemeentegeschiedenis.nl/gemeentenaam/json/Winssen (took just over a second to load on chrome
I am a noob at node, but please help me fix this issue maybe with some sort of checking if the data is corrupted or something. Thanks for the help in advanced:)
EDIT: I am trying to do up to 200 urls at a time in the for loop.
First off, add proper error handling to getMunicipalityJsonUrls() and to getGelderlandJsonUrls(). This means:
Check err parameter everywhere it's present and propagate the error back to the caller.
Capture possible errors from JSON.parse()
Check http statusCode.
Here's that fixed up code:
function getMunicipalityJsonUrls(provinceName, callback) {
request({ uri: `https://www.gemeentegeschiedenis.nl/provincie/json/${provinceName}` }, (error, response, body) => {
if (err) {
callback(err);
return;
}
if (response.statusCode !== 200) {
callback(new Error(`http status code ${response.statusCode}`));
return;
}
try {
const jsonUrls = JSON.parse(body).map(url => {
let urlSplit = url.split("gemeentenaam");
let jsonUrl = urlSplit[0] + "gemeentenaam/json" + urlSplit[1];
return jsonUrl.replace("http://", "https://");
});
callback(null, jsonUrls);
} catch(e) {
callback(e);
}
});
}
function getGelderlandJsonUrls(callback) {
getMunicipalityJsonUrls("Gelderland", (err, data) => {
if (err) {
callback(err);
} else {
callback(null, data);
}
});
}
Then, in writeToFile(), add error handling and completion monitoring and I chose to wrap it in a promise rather than a plain callback because I want to use it with some utilities that work with promises.
function writeToFile(url) {
return new Promise((resolve, reject) => {
// get name to make each new file unique
var name = url.split("json/")[1];
var fileStream = fs.createWriteStream(`jsonFiles/${name}.json`);
fileStream.on('error', (e) => {
reject(e);
});
var options = {
url: `${url}`,
method: 'GET',
headers: {
'Accept': 'application/json',
'Accept-Charset': 'utf-8',
json: true
}
}
//request the data from the site and download to the file.
request.get(options).pipe(fileStream).on('error', (e) => {
reject(e);
}).on('finish', () => {
resolve(url);
});
});
}
Now, we need to decide how to loop through all the URLs. If any of the urls could ever be attempting to write to the same file (if that's even a remote possibility), then you have to serialize the URLs to prevent them from ever having more than one asynchronous operation trying to write to the same file at the same time because that will just mess up that file. So, if that was the case, you could serialize the writing to the file like this:
// option 1 - serialize writing to files
async function getMunicipalityGeoJsonData(req, res) {
//Get all the urls pointing to the JSON data for the province, Gelderland
getGelderlandJsonUrls((err, jsonUrls) => {
if (err) {
console.log(err);
res.sendStatus(500);
} else {
try {
//for all those urls, write the data to files.
for (url of jsonUrls) {
console.log(url);
await writeToFile(url);
}
res.send("All done");
} catch(e) {
console.log(e);
res.sendStatus(500);
}
}
});
}
If you are absolutely sure that none of these URLs will ever cause writing to the same file, then you can run N of them at a time where you determine what the lowest value of N is that gets you decent performance. Higher values of N consume more peak resources (memory and file handles). Lower values of N run less things in parallel. If the target hostnames are all the same server, then usually you don't want N to be more than about 5. If the target hosts you are retrieving data from are all different, you can experiment with values of N up to maybe 20.
// option 2 - run N at a time in parallel
function getMunicipalityGeoJsonData(req, res) {
//Get all the urls pointing to the JSON data for the province, Gelderland
getGelderlandJsonUrls((err, jsonUrls) => {
if (err) {
console.log(err);
res.sendStatus(500);
} else {
//for all those urls, write the data to files.
const numConcurrent = 5;
mapConcurrent(jsonUrls, numConcurrent, writeToFile).then(() => {
res.send("All done");
}).catch(err => {
console.log(err);
res.sendStatus(500);
});
}
})
}
The mapConcurrent() function comes from this answer Promise.all consumes all my RAM and is as follows. It expects you to pass it an array of items to be iterated over, the max you want in flight at the same time and a function that will be passed an array item and will return a promise connected to when it's done or has an error:
function mapConcurrent(items, maxConcurrent, fn) {
let index = 0;
let inFlightCntr = 0;
let doneCntr = 0;
let results = new Array(items.length);
let stop = false;
return new Promise(function(resolve, reject) {
function runNext() {
let i = index;
++inFlightCntr;
fn(items[index], index++).then(function(val) {
++doneCntr;
--inFlightCntr;
results[i] = val;
run();
}, function(err) {
// set flag so we don't launch any more requests
stop = true;
reject(err);
});
}
function run() {
// launch as many as we're allowed to
while (!stop && inflightCntr < maxConcurrent && index < items.length) {
runNext();
}
// if all are done, then resolve parent promise with results
if (doneCntr === items.length) {
resolve(results);
}
}
run();
});
}
There are comparable functions in Bluebird's Promise.map() and in the Async library.
So, using this code you now have the ability to control how many of your requests/writeToFile() operations are in-process at the same time and you are capturing and logging all possible errors. Do, you can tune how many can be in flight at the same time for best performance and lowest resource use and, if there are any errors, you should be logging those errors so you can debug.
This code is currently set to stop processing any further URLs if it gets an error. You can change that if you want to continue on to the other URLs if you get an error by tweaking mapConcurrent(). But, I would still make sure you log any errors so you know when there are errors and can investigate why you are seeing errors.
One other note. If this was my code, I would convert everything to promises (no plain callbacks) and I'd use the got() library instead of the now deprecated request() library. I don't write any new code using the request() library.
Trying to access a S3 bucket from a node.js express application. I'm trying to return the JSON file from the bucket in the form of a JSON object. Here's the code:
let gets3 = function(file) {
devs3();
return new Promise(function(resolve, reject) {
var params = {
Bucket: S3_BUCKET,
Key: file
}
var s3 = new aws.S3();
try {
s3.getObject(params, function(err, data) {
var myfile = JSON.parse(data.Body.toString());
resolve(myfile);
});
} catch (err) {
reject(err);
}
});
}
module.exports = {
fetchEvents : function(cata) {
var myJson;
switch (cata) {
case "Pr":
myJson = 'primaryEvents.json';
case "Sc":
myJson = 'secondaryEvents.json';
case "Po":
myJson = 'postEvents.json';
}
let gets3init = gets3(myJson);
gets3init.then(function(file) {
console.log('From Fetch Events: ' + JSON.stringify(file));
return file;
});
}
}
devs3() just sets the S3 login information and runs aws.config.setPromisesDependancy. In my main app.js, I run fetchEvents("Pr") and then try and display it. In my console, it successfully displays the contents of the json file, however the application exists with a type error: cannot read property 'n' of undefined, where n is the name of the first element in the Json file. One thing to note is that this error displays in the console before the JSON file is printed, thereby showing that the function is not waiting for gets3 to finish and return the file. Is there anyway to force the application to wait (this happens on a page load) for promise to resolve?
Thanks to #Bergi for pointing me in the right direction and helping me get this answer. Essentially, I was using promises wrong from the start. I needed to return the promise from the fetchEvents function, not the data, and then use this in the calling code in order to only proceed once the promise has been resolve.
fetchEvents
module.exports = {
fetchEvents : function(cata) {
var myJson;
switch (cata) {
case "Pr":
myJson = 'primaryEvents.json';
break;
case "Sc":
myJson = 'secondaryEvents.json';
break;
case "Po":
myJson = 'postEvents.json';
break;
}
return gets3(myJson);
}
}
Calling Code
app.get('/PrEvents', function(req, res) {
var myCata = req.path.replace('Events', '').replace('/', '');
fetch(myCata).then(function(events) {
res.render('pages/eventsShowTemp', {
console.log(events);
});
});
});
I updated the function to create the CSV file but now I'm getting an error:
In upload function
internal/streams/legacy.js:57
throw er; // Unhandled stream error in pipe.
^
Error: ENOENT: no such file or directory, open 'C:\Users\shiv\WebstormProjects\slackAPIProject\billingData\CSV\1548963844106output.csv'
var csvFilePath = '';
var JSONFilePath = '';
function sendBillingData(){
var message = '';
axios.get(url, {
params: {
token: myToken
}
}).then(function (response) {
message = response.data;
fields = billingDataFields;
// saveFiles(message, fields, 'billingData/');
saveFilesNew(message, fields, 'billingData/');
var file = fs.createReadStream(__dirname + '/' + csvFilePath); // <--make sure this path is correct
console.log(__dirname + '/' + csvFilePath);
uploadFile(file);
})
.catch(function (error) {
console.log(error);
});
}
The saveFilesNew function is:
function saveFilesNew(message, options, folder){
try {
const passedData = message;
var relevantData='';
if (folder == 'accessLogs/'){
const loginsJSON = message.logins;
relevantData = loginsJSON;
console.log(loginsJSON);
}
if(folder == 'billingData/'){
relevantData = passedData.members;
const profile = passedData.members[0].profile;
}
//Save JSON to the output folder
var date = Date.now();
var directoryPath = folder + 'JSON/' + date + "output";
JSONFilePath = directoryPath + '.json';
fs.writeFileSync(JSONFilePath, JSON.stringify(message, null, 4), function(err) {
if (err) {
console.log(err);
}
});
//parse JSON onto the CSV
const json2csvParser = new Json2csvParser({ fields });
const csv = json2csvParser.parse(relevantData);
// console.log(csv);
//function to process the CSV onto the file
var directoryPath = folder + 'CSV/' + date + "output";
csvFilePath = directoryPath + '.csv';
let data = [];
let columns = {
real_name: 'real_name',
display_name: 'display_name',
email: 'email',
account_type: 'account_type'
};
var id = passedData.members[0].real_name;
console.log(id);
console.log("messageLength is" +Object.keys(message.members).length);
for (var i = 0; i < Object.keys(message.members).length; i++) {
console.log("value of i is" + i);
var display_name = passedData.members[i].profile.display_name;
var real_name = passedData.members[i].profile.real_name_normalized;
var email = passedData.members[i].profile.email;
var account_type = 'undefined';
console.log("name: " + real_name);
if(passedData.members[i].is_owner){
account_type = 'Org Owner';
}
else if(passedData.members[i].is_admin){
account_type = 'Org Admin';
}
else if(passedData.members[i].is_bot){
account_type = 'Bot'
}
else account_type = 'User';
data.push([real_name, display_name, email, account_type]);
}
console.log(data);
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err;
fs.writeFileSync(csvFilePath, output, function(err) {
console.log(output);
if (err) {
console.log(err);
}
console.log('my.csv saved.');
});
});
} catch (err) {
console.error(err);
}
}
The upload file function is:
function uploadFile(file){
console.log("In upload function");
const form = new FormData();
form.append('token', botToken);
form.append('channels', 'testing');
form.append('file', file);
axios.post('https://slack.com/api/files.upload', form, {
headers: form.getHeaders()
}).then(function (response) {
var serverMessage = response.data;
console.log(serverMessage);
});
}
So I think the error is getting caused because node is trying to upload the file before its being created. I feel like this has something to do with the asynchronous nature of Node.js but I fail to comprehend how to rectify the code. Please let me know how to correct this and mention any improvements to the code structure/design too.
Thanks!
You don't wait for the callback provided to stringify to be executed, and it's where you create the file. (Assuming this stringify function really does acccept a callback.)
Using callbacks (you can make this cleaner with promises and these neat async/await controls, but let's just stick to callbacks here), it should be more like:
function sendBillingData() {
...
// this callback we'll use to know when the file writing is done, and to get the file path
saveFilesNew(message, fields, 'billingData/', function(err, csvFilePathArgument) {
// this we will execute when saveFilesNew calls it, not when saveFilesNew returns, see below
uploadFile(fs.createReadStream(__dirname + '/' + csvFilePathArgument))
});
}
// let's name this callback... "callback".
function saveFilesNew(message, options, folder, callback) {
...
var csvFilePath = ...; // local variable only instead of your global
...
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err; // or return callbcack(err);
fs.writeFile(csvFilePath , output, function(err) { // NOT writeFileSync, or no callback needed
console.log(output);
if (err) {
console.log(err);
// callback(err); may be a useful approach for error-handling at a higher level
}
console.log('my.csv saved.'); // yes, NOW the CSV is saved, not before this executes! Hence:
callback(null, csvFilePath); // no error, clean process, pass the file path
});
});
console.log("This line is executed before stringify's callback is called!");
return; // implicitly, yes, yet still synchronous and that's why your version crashes
}
Using callbacks that are called only when the expected events happen (a file is done writing, a buffer/string is done transforming...) allows JS to keep executing code in the meantime. And it does keep executing code, so when you need data from an async code, you need to tell JS you need it done before executing your piece.
Also, since you can pass data when calling back (it's just a function), here I could avoid relying on a global csvFilePath. Using higher level variables makes things monolithic, like you could not transfer saveFilesNew to a dedicated file where you keep your toolkit of file-related functions.
Finally, if your global process is like:
function aDayAtTheOffice() {
sendBillingData();
getCoffee();
}
then you don't need to wait for the billing data to be processed before starting making coffee. However, if your boss told you that you could NOT get a coffee until the billing data was settled, then your process would look like:
function aDayAtTheOffice() {
sendBillingData(function (err) {
// if (err) let's do nothing here: you wanted a coffee anyway, right?
getCoffee();
});
}
(Note that callbacks having potential error as first arg and data as second arg is a convention, nothing mandatory.)
IMHO you should read about scope (the argument callback could be accessed at a time where the call to saveFilesNew was already done and forgotten!), and about the asynchronous nature of No... JavaScript. ;) (Sorry, probably not the best links but they contain the meaningful keywords, and then Google is your buddy, your friend, your Big Brother.)
I want to use gridfs-stream in a nodejs application.
A simple example is given in the documentation:
var mongoose = require('mongoose');
var Grid = require('gridfs-stream');
Grid.mongo = mongoose.mongo;
mongoose.connect('mongodb://localhost:27017/test');
// make sure the db instance is open before passing into `Grid`
mongoose.connection.once('open', function () {
var gfs = Grid(mongoose.connection);
// all set!
})
My problem is described by the comment:
make sure the db instance is open before passing into Grid
I try to use gfs in a post request. Now when the code gets initialized, the gfs variable is not defined yet.
api.post('/upload', function(req, res) {
req.pipe(gfs.createWriteStream({
filename: 'test'
}).on('close', function(savedFile){
console.log('file saved', savedFile);
return res.json({file: savedFile});
}));
})
Initializing my route from a callback seems kind of odd.
I read in this post (Asynchronous initialization of Node.js module) that require('') is performed synchronous, and since I rely on the connection being established, I'm kind of forced to wait
Basically I'm not sure if I should use a async pattern on startup now, or if I just miss a more elegant way to solve this.
I have a very similar problem with my server. In my case I am reading https certs asynchronously, the software version from git asynchronously and I want to make sure I have it all together by the time the user comes to log in so I can pass the software version back as a reply to login.
The solution is to use promises. Create the promises on user start up for each activity. Then in the code where you want to be sure its all ready, just call then on either the promise itself or Promise.all(array of promises).then()
Here is an example of what I am doing to read the ssl certs to start the server
class Web {
constructor(manager,logger) {
var self = this;
this.server = false;
this.logger = logger;
var key = new Promise((resolve,reject) => {
fs.readFile(path.resolve(__dirname, 'key.pem'),(err,data) => {
if (err) {
reject(err);
} else {
resolve(data);
}
});
});
var cert = new Promise((resolve,reject) => {
fs.readFile(path.resolve(__dirname, 'certificate.pem'), (err,data) => {
if (err) {
reject(err);
} else {
resolve(data);
}
});
});
Promise.all([key,cert]).then(values => {
var certs = {
key: values[0],
cert: values[1],
};
return certs;
}).then(certs => {
self.server = require('http2').createServer(certs,(req,res) => {
// NOW Started and can do the rest of the stuff
});
self.server.listen(...);
});
NEEDS SOME MORE CLOSING BRACKETS
My code looks similar to that:
var mongo_client = require('mongodb').MongoClient, dataStorage;
lib = {
[...]
find: function(res, param, callback) {
var parentPath = param.path;
while (parentPath !== '/') {
collection.findOne({'paths' : parentPath}, {...}, function(err, data)) {
if (data) {
dataStorage = data;
callback(data, res);
}
}
if (dataStorage) {
return;
}
parentPath = lib.removeLastBlockOfPath(parentPath);
}
if (!dataStorage) {
callback(someDefaultData, res);
}
}
[...]
}
What I want to do is to find some path stored in mongo, or if there is no match, try do find first matching parent path.
I can't set dataStorage value from findOne callback is it any way to do that? Eaven if I find path it always run thru all path blocks.
Node is asynchronous, so your code must be written accordingly. An option is to use the async module, that has lots of tools to manage asynchronous flows.
For example, you could use the whilst function to manage your while loop:
find: function(res, param, callback) {
var parentPath = param.path,
dataStorage = null;
async.whilst(
function () { return parentPath !== '/'; },
function (done) {
collection.findOne({'paths' : parentPath}, {...}, function(err, data) {
if (data) {
dataStorage = data;
return callback(data, res);
}
parentPath = lib.removeLastBlockOfPath(parentPath);
done();
});
},
function (error) {
if (!dataStorage) return callback(someDefaultData, res);
}
);
}
Don't forget to install and require the async module:
var async = require('async');
Your code is written as if it is "traditional synchronous" -- which its not. You cannot check for dataStorage validity till results from findOne() come back -- so your checks need to be moved all the way into the inner "if (data)" statement. This is not a mongodb issue, this is purely how nodejs works and the fact that everything is asynchronous and works on callbacks.