Different results from asynchronous and synchronous reading - node.js

I have a fairly simple script that attempts to read and then parse a JSON file. The JSON is very simple and I am pretty sure it is valid.
{
"foo": "bar"
}
Now, I have been trying to read it with fs.readFile. When read no errors occur and the returned data is a string. The only problem is that the string is empty.
I repeated my code but used fs.readFileSync, this returned the file perfectly using the same path. Both had a utf-8 encoding specified.
It is very simple code, as you can see.
fs.readFile('./some/path/file.json', 'utf8', function(err, data) {
if(!err) {
console.log(data); // Empty string...
}
});
console.log(fs.readFileSync('./some/path/file.json', 'utf8')); // Displays JSON file
Could it be permissions or ownership? I have tried a permission set of 755 and 777 to no avail.
I am running node v0.4.10. Any suggestions to point me in the right direction will be much appreciated. Thanks.
Edit: Here is a block of my actual code. Hopefully this will give you a better idea.
// Make sure the file is okay
fs.stat(file, function(err, stats) {
if(!err && stats.isFile()) {
// It is okay. Now load the file
fs.readFile(file, 'utf-8', function(readErr, data) {
if(!readErr && data) {
// File loaded!
// Now attempt to parse the config
try {
parsedConfig = JSON.parse(data);
self.mergeConfig(parsedConfig);
// The config was loaded and merged
// We can now call the callback
// Pass the error as null
callback.call(self, null);
// Share the news about the new config
self.emit('configLoaded', file, parsedConfig, data);
}
catch(e) {
callback.call(self, new Error(file + ': The config file is not valid JSON.'));
}
}
else {
callback.call(self, new Error(file + ': The config file could not be read.'));
}
});
}
else {
callback.call(self, new Error(file + ': The config file does not exist.'));
}
});

This is pretty weird.
The code looks.
var fs = require('fs');
fs.readFile('./jsonfile', 'utf8', function(err, data) {
if(err) {
console.error(err);
} else {
console.log(data);
parsedConfig = JSON.parse(data);
console.log(parsedConfig);
console.log(parsedConfig.foo);
}
});
Json file:
{
"foo": "bar"
}
output :
$ node test_node3.js
{
"foo": "bar"
}
{ foo: 'bar' }
bar
This is on node 0.4.10 , but i'm pretty sure it should work on all node version.
So why your data is empty ? You should check err in this case (like mine) and post the output if any. If you have no error, you may fill a bug on github

Related

Requesting many connections to another site with Cross Origin Resource Sharing activated close in time makes the data corrupted

I am making a node.js application and part of my code requests for data from 193 different urls to download the json data from each url. Here is one of those urls: https://www.gemeentegeschiedenis.nl/gemeentenaam/json/Apeldoorn For the some the downloaded json data is fine and is complete. However towards the end, corruptions happen for some of the files. Part of the data becomes nullified and then there are some that have database errors. I think it has to do with requesting data from so many urls in a short amount of time (which is why I tried the "setTimeout" function (but that doesn't really work)).
function writeToFile(url) {
// get name to make each new file unique
var name = url.split("json/")[1];
var fileStream = fs.createWriteStream(`jsonFiles/${name}.json`);
var options = {
url: `${url}`,
method: 'GET',
headers: {
'Accept': 'application/json',
'Accept-Charset': 'utf-8',
json: true
}
}
//request the data from the site and download to the file.
request.get(options).pipe(fileStream);
}
function getMunicipalityGeoJsonData(req, res) {
//Get all the urls pointing to the JSON data for the province, Gelderland
getGelderlandJsonUrls((err, jsonUrls) => {
//for all those urls, write the data to files.
for (url of jsonUrls) {
console.log(url);
writeToFile(url);
}
})
}
function getGelderlandJsonUrls(callback) {
getMunicipalityJsonUrls("Gelderland", (err, data) => {
jsonUrls = data;
callback(null, jsonUrls);
});
}
function getMunicipalityJsonUrls(provinceName, callback) {
request({ uri: `https://www.gemeentegeschiedenis.nl/provincie/json/${provinceName}` }, (error, response, body) => {
body = JSON.parse(body);
// extracting each json URL from all the municipalities in Gelderland
var jsonUrls = [];
var numberMun = body.length;
for (var i = 0; i < numberMun; i++) {
var url = body[i].uri.naam;
var urlSplit = url.split("gemeentenaam");
var jsonUrl = urlSplit[0] + "gemeentenaam/json" + urlSplit[1];
jsonUrl = jsonUrl.replace("http://", "https://");
jsonUrls.push(jsonUrl);
}
callback(null, jsonUrls);
});
}
The last json data downloaded into the file as an html page with a database error from the url: https://www.gemeentegeschiedenis.nl/gemeentenaam/json/Zutphen which actually just took just under 6 seconds to load up looking at the network tab on Chrome
the 1812 has null for its properties when it should have a bunch of coordinates https://www.gemeentegeschiedenis.nl/gemeentenaam/json/Winssen (took just over a second to load on chrome
I am a noob at node, but please help me fix this issue maybe with some sort of checking if the data is corrupted or something. Thanks for the help in advanced:)
EDIT: I am trying to do up to 200 urls at a time in the for loop.
First off, add proper error handling to getMunicipalityJsonUrls() and to getGelderlandJsonUrls(). This means:
Check err parameter everywhere it's present and propagate the error back to the caller.
Capture possible errors from JSON.parse()
Check http statusCode.
Here's that fixed up code:
function getMunicipalityJsonUrls(provinceName, callback) {
request({ uri: `https://www.gemeentegeschiedenis.nl/provincie/json/${provinceName}` }, (error, response, body) => {
if (err) {
callback(err);
return;
}
if (response.statusCode !== 200) {
callback(new Error(`http status code ${response.statusCode}`));
return;
}
try {
const jsonUrls = JSON.parse(body).map(url => {
let urlSplit = url.split("gemeentenaam");
let jsonUrl = urlSplit[0] + "gemeentenaam/json" + urlSplit[1];
return jsonUrl.replace("http://", "https://");
});
callback(null, jsonUrls);
} catch(e) {
callback(e);
}
});
}
function getGelderlandJsonUrls(callback) {
getMunicipalityJsonUrls("Gelderland", (err, data) => {
if (err) {
callback(err);
} else {
callback(null, data);
}
});
}
Then, in writeToFile(), add error handling and completion monitoring and I chose to wrap it in a promise rather than a plain callback because I want to use it with some utilities that work with promises.
function writeToFile(url) {
return new Promise((resolve, reject) => {
// get name to make each new file unique
var name = url.split("json/")[1];
var fileStream = fs.createWriteStream(`jsonFiles/${name}.json`);
fileStream.on('error', (e) => {
reject(e);
});
var options = {
url: `${url}`,
method: 'GET',
headers: {
'Accept': 'application/json',
'Accept-Charset': 'utf-8',
json: true
}
}
//request the data from the site and download to the file.
request.get(options).pipe(fileStream).on('error', (e) => {
reject(e);
}).on('finish', () => {
resolve(url);
});
});
}
Now, we need to decide how to loop through all the URLs. If any of the urls could ever be attempting to write to the same file (if that's even a remote possibility), then you have to serialize the URLs to prevent them from ever having more than one asynchronous operation trying to write to the same file at the same time because that will just mess up that file. So, if that was the case, you could serialize the writing to the file like this:
// option 1 - serialize writing to files
async function getMunicipalityGeoJsonData(req, res) {
//Get all the urls pointing to the JSON data for the province, Gelderland
getGelderlandJsonUrls((err, jsonUrls) => {
if (err) {
console.log(err);
res.sendStatus(500);
} else {
try {
//for all those urls, write the data to files.
for (url of jsonUrls) {
console.log(url);
await writeToFile(url);
}
res.send("All done");
} catch(e) {
console.log(e);
res.sendStatus(500);
}
}
});
}
If you are absolutely sure that none of these URLs will ever cause writing to the same file, then you can run N of them at a time where you determine what the lowest value of N is that gets you decent performance. Higher values of N consume more peak resources (memory and file handles). Lower values of N run less things in parallel. If the target hostnames are all the same server, then usually you don't want N to be more than about 5. If the target hosts you are retrieving data from are all different, you can experiment with values of N up to maybe 20.
// option 2 - run N at a time in parallel
function getMunicipalityGeoJsonData(req, res) {
//Get all the urls pointing to the JSON data for the province, Gelderland
getGelderlandJsonUrls((err, jsonUrls) => {
if (err) {
console.log(err);
res.sendStatus(500);
} else {
//for all those urls, write the data to files.
const numConcurrent = 5;
mapConcurrent(jsonUrls, numConcurrent, writeToFile).then(() => {
res.send("All done");
}).catch(err => {
console.log(err);
res.sendStatus(500);
});
}
})
}
The mapConcurrent() function comes from this answer Promise.all consumes all my RAM and is as follows. It expects you to pass it an array of items to be iterated over, the max you want in flight at the same time and a function that will be passed an array item and will return a promise connected to when it's done or has an error:
function mapConcurrent(items, maxConcurrent, fn) {
let index = 0;
let inFlightCntr = 0;
let doneCntr = 0;
let results = new Array(items.length);
let stop = false;
return new Promise(function(resolve, reject) {
function runNext() {
let i = index;
++inFlightCntr;
fn(items[index], index++).then(function(val) {
++doneCntr;
--inFlightCntr;
results[i] = val;
run();
}, function(err) {
// set flag so we don't launch any more requests
stop = true;
reject(err);
});
}
function run() {
// launch as many as we're allowed to
while (!stop && inflightCntr < maxConcurrent && index < items.length) {
runNext();
}
// if all are done, then resolve parent promise with results
if (doneCntr === items.length) {
resolve(results);
}
}
run();
});
}
There are comparable functions in Bluebird's Promise.map() and in the Async library.
So, using this code you now have the ability to control how many of your requests/writeToFile() operations are in-process at the same time and you are capturing and logging all possible errors. Do, you can tune how many can be in flight at the same time for best performance and lowest resource use and, if there are any errors, you should be logging those errors so you can debug.
This code is currently set to stop processing any further URLs if it gets an error. You can change that if you want to continue on to the other URLs if you get an error by tweaking mapConcurrent(). But, I would still make sure you log any errors so you know when there are errors and can investigate why you are seeing errors.
One other note. If this was my code, I would convert everything to promises (no plain callbacks) and I'd use the got() library instead of the now deprecated request() library. I don't write any new code using the request() library.

Node.js function not running in order. Error: Unhandled stream error in pipe

I updated the function to create the CSV file but now I'm getting an error:
In upload function
internal/streams/legacy.js:57
throw er; // Unhandled stream error in pipe.
^
Error: ENOENT: no such file or directory, open 'C:\Users\shiv\WebstormProjects\slackAPIProject\billingData\CSV\1548963844106output.csv'
var csvFilePath = '';
var JSONFilePath = '';
function sendBillingData(){
var message = '';
axios.get(url, {
params: {
token: myToken
}
}).then(function (response) {
message = response.data;
fields = billingDataFields;
// saveFiles(message, fields, 'billingData/');
saveFilesNew(message, fields, 'billingData/');
var file = fs.createReadStream(__dirname + '/' + csvFilePath); // <--make sure this path is correct
console.log(__dirname + '/' + csvFilePath);
uploadFile(file);
})
.catch(function (error) {
console.log(error);
});
}
The saveFilesNew function is:
function saveFilesNew(message, options, folder){
try {
const passedData = message;
var relevantData='';
if (folder == 'accessLogs/'){
const loginsJSON = message.logins;
relevantData = loginsJSON;
console.log(loginsJSON);
}
if(folder == 'billingData/'){
relevantData = passedData.members;
const profile = passedData.members[0].profile;
}
//Save JSON to the output folder
var date = Date.now();
var directoryPath = folder + 'JSON/' + date + "output";
JSONFilePath = directoryPath + '.json';
fs.writeFileSync(JSONFilePath, JSON.stringify(message, null, 4), function(err) {
if (err) {
console.log(err);
}
});
//parse JSON onto the CSV
const json2csvParser = new Json2csvParser({ fields });
const csv = json2csvParser.parse(relevantData);
// console.log(csv);
//function to process the CSV onto the file
var directoryPath = folder + 'CSV/' + date + "output";
csvFilePath = directoryPath + '.csv';
let data = [];
let columns = {
real_name: 'real_name',
display_name: 'display_name',
email: 'email',
account_type: 'account_type'
};
var id = passedData.members[0].real_name;
console.log(id);
console.log("messageLength is" +Object.keys(message.members).length);
for (var i = 0; i < Object.keys(message.members).length; i++) {
console.log("value of i is" + i);
var display_name = passedData.members[i].profile.display_name;
var real_name = passedData.members[i].profile.real_name_normalized;
var email = passedData.members[i].profile.email;
var account_type = 'undefined';
console.log("name: " + real_name);
if(passedData.members[i].is_owner){
account_type = 'Org Owner';
}
else if(passedData.members[i].is_admin){
account_type = 'Org Admin';
}
else if(passedData.members[i].is_bot){
account_type = 'Bot'
}
else account_type = 'User';
data.push([real_name, display_name, email, account_type]);
}
console.log(data);
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err;
fs.writeFileSync(csvFilePath, output, function(err) {
console.log(output);
if (err) {
console.log(err);
}
console.log('my.csv saved.');
});
});
} catch (err) {
console.error(err);
}
}
The upload file function is:
function uploadFile(file){
console.log("In upload function");
const form = new FormData();
form.append('token', botToken);
form.append('channels', 'testing');
form.append('file', file);
axios.post('https://slack.com/api/files.upload', form, {
headers: form.getHeaders()
}).then(function (response) {
var serverMessage = response.data;
console.log(serverMessage);
});
}
So I think the error is getting caused because node is trying to upload the file before its being created. I feel like this has something to do with the asynchronous nature of Node.js but I fail to comprehend how to rectify the code. Please let me know how to correct this and mention any improvements to the code structure/design too.
Thanks!
You don't wait for the callback provided to stringify to be executed, and it's where you create the file. (Assuming this stringify function really does acccept a callback.)
Using callbacks (you can make this cleaner with promises and these neat async/await controls, but let's just stick to callbacks here), it should be more like:
function sendBillingData() {
...
// this callback we'll use to know when the file writing is done, and to get the file path
saveFilesNew(message, fields, 'billingData/', function(err, csvFilePathArgument) {
// this we will execute when saveFilesNew calls it, not when saveFilesNew returns, see below
uploadFile(fs.createReadStream(__dirname + '/' + csvFilePathArgument))
});
}
// let's name this callback... "callback".
function saveFilesNew(message, options, folder, callback) {
...
var csvFilePath = ...; // local variable only instead of your global
...
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err; // or return callbcack(err);
fs.writeFile(csvFilePath , output, function(err) { // NOT writeFileSync, or no callback needed
console.log(output);
if (err) {
console.log(err);
// callback(err); may be a useful approach for error-handling at a higher level
}
console.log('my.csv saved.'); // yes, NOW the CSV is saved, not before this executes! Hence:
callback(null, csvFilePath); // no error, clean process, pass the file path
});
});
console.log("This line is executed before stringify's callback is called!");
return; // implicitly, yes, yet still synchronous and that's why your version crashes
}
Using callbacks that are called only when the expected events happen (a file is done writing, a buffer/string is done transforming...) allows JS to keep executing code in the meantime. And it does keep executing code, so when you need data from an async code, you need to tell JS you need it done before executing your piece.
Also, since you can pass data when calling back (it's just a function), here I could avoid relying on a global csvFilePath. Using higher level variables makes things monolithic, like you could not transfer saveFilesNew to a dedicated file where you keep your toolkit of file-related functions.
Finally, if your global process is like:
function aDayAtTheOffice() {
sendBillingData();
getCoffee();
}
then you don't need to wait for the billing data to be processed before starting making coffee. However, if your boss told you that you could NOT get a coffee until the billing data was settled, then your process would look like:
function aDayAtTheOffice() {
sendBillingData(function (err) {
// if (err) let's do nothing here: you wanted a coffee anyway, right?
getCoffee();
});
}
(Note that callbacks having potential error as first arg and data as second arg is a convention, nothing mandatory.)
IMHO you should read about scope (the argument callback could be accessed at a time where the call to saveFilesNew was already done and forgotten!), and about the asynchronous nature of No... JavaScript. ;) (Sorry, probably not the best links but they contain the meaningful keywords, and then Google is your buddy, your friend, your Big Brother.)

Issue with filestream, reading line by line. ENONET thrown, despite file existing

I'm having issues with a function that reads a text file line by line. It says the file I'm trying to read does not exist, although it does in the file path I am running node on. What could be the issue??
function insertUsers(auth) {
fs.readFile('emails.txt', function (err, data) {
if (err) throw err;
var person = data.toString().split("\n");
var person = data.toString().split("\n");
for (var i = 0; i < person.length(); i++) {
service.members.insert({
groupKey: 'testgroup#x.com',
resource: {
email: person[i],
role: 'MEMBER',
}
}, (err, res) => {
if (err) { return console.error('The API returned an error:', err.message); }
const user = res.data.member;
if (member.length) {
write_log('Inserted' + email + ' into student group.');
} else {
write_log('Failed to delete ' + email);
}
});
}
});
}
https://i.stack.imgur.com/5UTK6.png and https://i.stack.imgur.com/iVvnA.png
Verify that you're starting your node application from the same location where your file (emails.txt) is. According to your method logic it should be
C:\Users\[]\source\repos\StudentGroups\StudentGroups > node main.js
you can check the current working directory from the code
console.log(process.cwd())
it should be
C:\Users\[]\source\repos\StudentGroups\StudentGroups
Otherwise, modify your code to correctly point to the email.txt or start your application from the correct directory.
This problem was due to how I made emails.txt. The name is "emails.txt", and the file extension is .txt. I changed the file name to "emails", and it worked.

find matches in files and parse out the line number

I am trying to find matches in files and parse out the line number and what was the match along with the file name. So far I am able to read the files from the directory / sub directories and then use indexOf() which in this case is not very efficient. The goal would be go through all the files and find matches for the following
.http(
.httpContinue(
$httpUrl(
httpURL
getHttpImageURL(
getHttpURL(
The code I have so far looks like this
var fs = require('fs');
var path = [my directory];
function readFiles(dirname) {
fs.readdir(dirname, function(err, filenames) {
if (err) {
return;
}
filenames.forEach(function(filename) {
if (fs.lstatSync(dirname+'/'+filename).isDirectory() ){
readFiles(dirname+'/'+filename);
};
fs.readFile(dirname+'/'+filename, { encoding: 'utf8' }, function(err, content) {
if (err) {
return;
}
//This is not very effective and I need to check each line for all these possible matches
if (content.indexOf('http(') > -1) {
if(err) {
return console.log(err);
}
console.log(filename);
}
});
});
});
}
readFiles(path);
The challenge I am facing is to read lines and parse line numbers where I found a match and what was the match. Cant figure out how to accomplish that.
You could try this for your if statement
// This should really go somewhere near the top of the file
const wantedStrings = ['.http(',
'.httpContinue(',
'$httpUrl(',
'httpURL',
'getHttpImageURL(',
'getHttpURL('];
if (content.toLowerCase().includes('http')
&& wantedStrings.filter(s => content.includes(s)).length > 0) {
// Don't need another err check here
console.log(filename);
}

How to guarantee non-existance of a file before creating?

fs.exists is now deprecated for a decent reason that I should try to open a file and catch error to be sure nothing is possible to delete the file in between checking and opening. But if I need to create a new file instead of opening an existing file, how do I guarantee that there is no file before I try to create it?
You can't. You can however, create a new file or open an existing one if it exists:
fs.open("/path", "a+", function(err, data){ // open for reading and appending
if(err) return handleError(err);
// work with file here, if file does not exist it will be created
});
Alternatively, open it with "ax+" which will error if it already exists, letting you handle the error.
module.exports = fs.existsSync || function existsSync(filePath){
try{
fs.statSync(filePath);
}catch(err){
if(err.code == 'ENOENT') return false;
}
return true;
};
https://gist.github.com/FGRibreau/3323836
https://stackoverflow.com/a/31545073/2435443
fs = require('fs') ;
var path = 'sth' ;
fs.stat(path, function(err, stat) {
if (err) {
if ('ENOENT' == err.code) {
//file did'nt exist so for example send 404 to client
} else {
//it is a server error so for example send 500 to client
}
} else {
//every thing was ok so for example you can read it and send it to client
}
} );

Resources