I only get all data one time - node.js

I'm creating REST API with node.js without express.js framework, and for database I use fs. I'm having problem with get method. All other methods work fine. When I try to get all json files from directory, I can get everything only one time, second and all other times, nothing happens, not even an error, it just stuck for few minutes, and after that I get this error in chrome: net::ERR_EMPTY_RESPONSE, and this in firefox: TypeError: "NetworkError when attempting to fetch resource.". Get one json file works fine. I think problem is in back-end. Because I have same problems with postman.
I'm using node 10.14.1. As I understand when I'm trying to get all files from directory second time, my server.js file don't send any response. And app stops before chosenHandler in server.js line 64.
Get handler:
// Resources - get
// Required data: none
// Optional data: ID
handlers._resources.get = (data, callback) => {
// Check that the ID is valid
checkId(data.queryStringObject.id)
if (resourceDBId) {
// Lookup the resource
_data.read('resources', resourceDBId, (err, data) => !err &&
data ? callback(ok, data) : callback(notFound, {Error: 'User doesn\'t exist'}))
} else {
_data.readAll('resources', (err, data) => {
if (!err && data) {
if (data.last) {
resourcesData.push(data.data)
callback(ok, resourcesData)
}
if (!data.last) resourcesData.push(data.data)
} else callback(internalServerError, {Error: 'Can\'t get all resources'})
})
}
}
All code are here: https://github.com/FreeDevStan/sale

The readAll method in the lib/data.js file needs to initialize i.
Because i is used undefined, it is treated as global variable i and does not enter the loop, and the callback is not working, so response is not possible.
I recommend you to change it as below.
lib.readAll = (dir, callback) => {
fs.readdir(lib.baseDir + dir, (err, data) => {
if(!err && data) {
let i = 0;
while(i < data.length) {
if (i < data.length - 1) {
fs.readFile(lib.baseDir + dir + '/' + data[i], 'utf-8', (err, content) => {
let parsedContent = helpers.parseJsonToObject(content)
err ? callback(err, content) : callback(false, {last: false, data: parsedContent})
})
} if (i === data.length - 1) {
fs.readFile(lib.baseDir + dir + '/' + data[i], 'utf-8', (err, content) => {
let parsedContent = helpers.parseJsonToObject(content)
err ? callback(err, content) : callback(false, {last: true, data: parsedContent})
})
}
i++
}
} else callback(err, data)
})
}

Related

Requesting many connections to another site with Cross Origin Resource Sharing activated close in time makes the data corrupted

I am making a node.js application and part of my code requests for data from 193 different urls to download the json data from each url. Here is one of those urls: https://www.gemeentegeschiedenis.nl/gemeentenaam/json/Apeldoorn For the some the downloaded json data is fine and is complete. However towards the end, corruptions happen for some of the files. Part of the data becomes nullified and then there are some that have database errors. I think it has to do with requesting data from so many urls in a short amount of time (which is why I tried the "setTimeout" function (but that doesn't really work)).
function writeToFile(url) {
// get name to make each new file unique
var name = url.split("json/")[1];
var fileStream = fs.createWriteStream(`jsonFiles/${name}.json`);
var options = {
url: `${url}`,
method: 'GET',
headers: {
'Accept': 'application/json',
'Accept-Charset': 'utf-8',
json: true
}
}
//request the data from the site and download to the file.
request.get(options).pipe(fileStream);
}
function getMunicipalityGeoJsonData(req, res) {
//Get all the urls pointing to the JSON data for the province, Gelderland
getGelderlandJsonUrls((err, jsonUrls) => {
//for all those urls, write the data to files.
for (url of jsonUrls) {
console.log(url);
writeToFile(url);
}
})
}
function getGelderlandJsonUrls(callback) {
getMunicipalityJsonUrls("Gelderland", (err, data) => {
jsonUrls = data;
callback(null, jsonUrls);
});
}
function getMunicipalityJsonUrls(provinceName, callback) {
request({ uri: `https://www.gemeentegeschiedenis.nl/provincie/json/${provinceName}` }, (error, response, body) => {
body = JSON.parse(body);
// extracting each json URL from all the municipalities in Gelderland
var jsonUrls = [];
var numberMun = body.length;
for (var i = 0; i < numberMun; i++) {
var url = body[i].uri.naam;
var urlSplit = url.split("gemeentenaam");
var jsonUrl = urlSplit[0] + "gemeentenaam/json" + urlSplit[1];
jsonUrl = jsonUrl.replace("http://", "https://");
jsonUrls.push(jsonUrl);
}
callback(null, jsonUrls);
});
}
The last json data downloaded into the file as an html page with a database error from the url: https://www.gemeentegeschiedenis.nl/gemeentenaam/json/Zutphen which actually just took just under 6 seconds to load up looking at the network tab on Chrome
the 1812 has null for its properties when it should have a bunch of coordinates https://www.gemeentegeschiedenis.nl/gemeentenaam/json/Winssen (took just over a second to load on chrome
I am a noob at node, but please help me fix this issue maybe with some sort of checking if the data is corrupted or something. Thanks for the help in advanced:)
EDIT: I am trying to do up to 200 urls at a time in the for loop.
First off, add proper error handling to getMunicipalityJsonUrls() and to getGelderlandJsonUrls(). This means:
Check err parameter everywhere it's present and propagate the error back to the caller.
Capture possible errors from JSON.parse()
Check http statusCode.
Here's that fixed up code:
function getMunicipalityJsonUrls(provinceName, callback) {
request({ uri: `https://www.gemeentegeschiedenis.nl/provincie/json/${provinceName}` }, (error, response, body) => {
if (err) {
callback(err);
return;
}
if (response.statusCode !== 200) {
callback(new Error(`http status code ${response.statusCode}`));
return;
}
try {
const jsonUrls = JSON.parse(body).map(url => {
let urlSplit = url.split("gemeentenaam");
let jsonUrl = urlSplit[0] + "gemeentenaam/json" + urlSplit[1];
return jsonUrl.replace("http://", "https://");
});
callback(null, jsonUrls);
} catch(e) {
callback(e);
}
});
}
function getGelderlandJsonUrls(callback) {
getMunicipalityJsonUrls("Gelderland", (err, data) => {
if (err) {
callback(err);
} else {
callback(null, data);
}
});
}
Then, in writeToFile(), add error handling and completion monitoring and I chose to wrap it in a promise rather than a plain callback because I want to use it with some utilities that work with promises.
function writeToFile(url) {
return new Promise((resolve, reject) => {
// get name to make each new file unique
var name = url.split("json/")[1];
var fileStream = fs.createWriteStream(`jsonFiles/${name}.json`);
fileStream.on('error', (e) => {
reject(e);
});
var options = {
url: `${url}`,
method: 'GET',
headers: {
'Accept': 'application/json',
'Accept-Charset': 'utf-8',
json: true
}
}
//request the data from the site and download to the file.
request.get(options).pipe(fileStream).on('error', (e) => {
reject(e);
}).on('finish', () => {
resolve(url);
});
});
}
Now, we need to decide how to loop through all the URLs. If any of the urls could ever be attempting to write to the same file (if that's even a remote possibility), then you have to serialize the URLs to prevent them from ever having more than one asynchronous operation trying to write to the same file at the same time because that will just mess up that file. So, if that was the case, you could serialize the writing to the file like this:
// option 1 - serialize writing to files
async function getMunicipalityGeoJsonData(req, res) {
//Get all the urls pointing to the JSON data for the province, Gelderland
getGelderlandJsonUrls((err, jsonUrls) => {
if (err) {
console.log(err);
res.sendStatus(500);
} else {
try {
//for all those urls, write the data to files.
for (url of jsonUrls) {
console.log(url);
await writeToFile(url);
}
res.send("All done");
} catch(e) {
console.log(e);
res.sendStatus(500);
}
}
});
}
If you are absolutely sure that none of these URLs will ever cause writing to the same file, then you can run N of them at a time where you determine what the lowest value of N is that gets you decent performance. Higher values of N consume more peak resources (memory and file handles). Lower values of N run less things in parallel. If the target hostnames are all the same server, then usually you don't want N to be more than about 5. If the target hosts you are retrieving data from are all different, you can experiment with values of N up to maybe 20.
// option 2 - run N at a time in parallel
function getMunicipalityGeoJsonData(req, res) {
//Get all the urls pointing to the JSON data for the province, Gelderland
getGelderlandJsonUrls((err, jsonUrls) => {
if (err) {
console.log(err);
res.sendStatus(500);
} else {
//for all those urls, write the data to files.
const numConcurrent = 5;
mapConcurrent(jsonUrls, numConcurrent, writeToFile).then(() => {
res.send("All done");
}).catch(err => {
console.log(err);
res.sendStatus(500);
});
}
})
}
The mapConcurrent() function comes from this answer Promise.all consumes all my RAM and is as follows. It expects you to pass it an array of items to be iterated over, the max you want in flight at the same time and a function that will be passed an array item and will return a promise connected to when it's done or has an error:
function mapConcurrent(items, maxConcurrent, fn) {
let index = 0;
let inFlightCntr = 0;
let doneCntr = 0;
let results = new Array(items.length);
let stop = false;
return new Promise(function(resolve, reject) {
function runNext() {
let i = index;
++inFlightCntr;
fn(items[index], index++).then(function(val) {
++doneCntr;
--inFlightCntr;
results[i] = val;
run();
}, function(err) {
// set flag so we don't launch any more requests
stop = true;
reject(err);
});
}
function run() {
// launch as many as we're allowed to
while (!stop && inflightCntr < maxConcurrent && index < items.length) {
runNext();
}
// if all are done, then resolve parent promise with results
if (doneCntr === items.length) {
resolve(results);
}
}
run();
});
}
There are comparable functions in Bluebird's Promise.map() and in the Async library.
So, using this code you now have the ability to control how many of your requests/writeToFile() operations are in-process at the same time and you are capturing and logging all possible errors. Do, you can tune how many can be in flight at the same time for best performance and lowest resource use and, if there are any errors, you should be logging those errors so you can debug.
This code is currently set to stop processing any further URLs if it gets an error. You can change that if you want to continue on to the other URLs if you get an error by tweaking mapConcurrent(). But, I would still make sure you log any errors so you know when there are errors and can investigate why you are seeing errors.
One other note. If this was my code, I would convert everything to promises (no plain callbacks) and I'd use the got() library instead of the now deprecated request() library. I don't write any new code using the request() library.

Can't get past HTTP request in Node

So, here is the problem. I found how to do http request in Node.js, so that I could download and parse remote JSON file. It all works fine but nothing happens after that. I have a function and in that there is if condition that doesn't get executed. It simply cannot get past the http request. Is there something I am missing?
var remoteStamp;
if (typeof force == "undefined") {
var timeurl = "http://" + parsedconfig.weburl + "/timestamp.json";
request(timeurl, { json: true }, (err, res, body) => {
if (err) { return console.log(err); }
console.log(body.timestamp);
remoteStamp = body.timestamp;
});
}
if (remoteStamp < parsedconfig.timestamp || force == "local") {
//something should happen here, all the values check out - still nothing happens
}
you are using callback so all the code which you want to execute after request is completed should be inside the callback
var remoteStamp;
if (typeof force == "undefined") {
var timeurl = "http://" + parsedconfig.weburl + "/timestamp.json";
request(timeurl, { json: true }, (err, res, body) => {
if (err) { return console.log(err); }
console.log(body.timestamp);
remoteStamp = body.timestamp;
if (remoteStamp < parsedconfig.timestamp || force == "local") {
//something should happen here, all the values check out - still nothing happens
}
});
}
Or you can use request-promise library to do this in promises. https://github.com/request/request-promise
On how to use promises: https://developers.google.com/web/fundamentals/primers/promises

Promises with NodeJS

I'd like to use promises with NodeJS but it seems not working as expected.
I'd like to see LOG 1, LOG 2 and LOG 3 for all files in this order. And LOG 4 is hidden...
But I get this :
LOG 1
LOG 3
LOG 1
LOG 3
LOG 1
LOG 3
SUCCESS LOG 2
SUCCESS LOG 2
SUCCESS LOG 2
SUCCESS LOG 2
My code :
var filePromise = _.map(files, function(file) {
var filePath = './sql/' + file;
fs.lstat(filePath, function(err, stats) {
if (stats.isFile() && file !== '.gitignore' && file !== 'index.js') {
fs.readFile(filePath, 'utf-8', function(err, data) {
console.log('LOG 1');
db.query(data).then(function() {
console.log(colors.green('SUCCESS LOG 2'));
}).catch(function() {
console.log(colors.red('ERROR LOG 2'));
});
console.log('LOG 3');
});
}
});
});
Promise.all(filePromise).then(function() {
console.log(colors.green('LOG 4'));
});
In order for filePromise to contain a collection of promises, you need to return a promise for each iteration of your map call. Only then will the call to Promise.all be evaluated correctly.
var filePromise = _.map(files, function(file) {
return new Promise(function(resolve, reject) {
var filePath = './sql/' + file;
fs.lstat(filePath, function(err, stats) {
if (stats.isFile() && file !== '.gitignore' && file !== 'index.js') {
fs.readFile(filePath, 'utf-8', function(err, data) {
console.log('LOG 1');
db.query(data).then(function() {
console.log(colors.green('SUCCESS LOG 2'));
resolve();
}).catch(function() {
console.log(colors.red('ERROR LOG 2'));
reject();
});
console.log('LOG 3');
});
} else {
reject(new Error("failed condition"))
}
});
})
});
Promise.all(filePromise).then(function() {
console.log(colors.green('LOG 4'));
});
This does not fix the ordering of your console logs. That is a separate issue. Your console logs are not actually out of order, they are logging as expected. However, this is due to the async call you are making to db.query which, when triggered does not block waiting for a response. Instead the next line is executed immediately after the function call returns.
Side note: Make sure to handle your err arguments passed to the inline callback functions of fs.lstat and fs.readFile. Failure to do so will result in the promise never completing in the event of an error.

Getting Error Like RequestsLimitError: You just made too many request to instagram API in node js?

I am work with isntagram api in node js. i have one array and in the array store above 20k up instagram id. and then i am do foreach on that array and one by one take instagram id and go for the take bio but that time i am getting error like this RequestsLimitError: You just made too many request to instagram API. i am try every 5 call after set time out also but still i am getting same error so how can resolved this error any one know how can fix it then please let me know.
Here this is my code =>
var InstaId = ["12345687",20k more id store here in the array]
var changesessionFlage = 0;
async.each(InstaId, function (id, callback) {
async.parallel([
function (cb) {
if (id) {
setTimeout(function () {
Client.Account.getById(sess, id).then(function (bio) {
console.log("changesessionFlage" + changesessionFlage);
changesessionFlage++
//console.log("bio : ", bio._params); // here i am getting bio one by one user
if (changesessionFlage == 6) {
changesessionFlage = 0;
}
cb(null, bio._params);
})
.catch(function (err) {
console.log("get boi: ", err)
cb(null, bio._params);
})
}, (changesessionFlage == 5) ? 10000 : 0)
}
}
], function (err, results) {
if (err) {
console.log(err);
return;
}
Result = results
callback();
});
}, function (err) {
if (err) {
console.log(err);
return;
}
else {
console.log("Result=>", Result)
if (Result) {
console.log("Result[0]=>", Result[0])
var ws = XLSX.utils.json_to_sheet(Result[0]);
var wb = XLSX.utils.book_new();
XLSX.utils.book_append_sheet(wb, ws, "People");
var wbout = XLSX.write(wb, { bookType: 'xlsx', type: 'binary' });
res.end(wbout, 'binary');
}
}
});
any one know how can fix this issue then please help me.
Your setTimeout is use incorrectly, all API calls are made at once after 10000 delay.
Since this is a one time job, just split the 20K usernames to 4K batches and execute them every hour. This way you will be under the 5k/hr API limit

Searching then adding record in Redis using Node.js

I may be over tired but for the life of me I cannot understand why the following is not working. I am trying to search if a string exists and if it does not, add it to a redis database
options = options || {};
var counter = 1,
client = redis.getClient();
options.name = options.name || '';
if (_.isEmpty(options.name)) {
return callback('Cannot add name. No name supplied');
} else {
options.name = options.name.trim();
}
client.get('mySavedKeys' + options.name, function (err, data) {
if (err) {return callback(err); }
if (!_.isNull(data)) {
console.log('Name found', options.name);
return callback(null, data);
} else {
counter += 1;
console.log('Name not found', options.name);
console.log('ID', counter)
client2.set('mySavedKeys' + options.name, counter, function (err) {
if (err) {return callback(err); }
console.log('Added', options.name);
return callback(null, counter);
});
}
});
If I run an array of names to add using async.each then it seems to run all the 'get' functions and then run the 'set' function so I am getting duplicate insertions.
I'm sure the answer is obvious but I cannot see the problem.
If you use async.eachSeries you would insure that the get/set happen atomically rather than all gets running in parallel.

Resources