nodeJS blocking all requests until it calls back - node.js

I've developed a nodeJS API (using express) which allow users to login and get a list of files that they have stored in a remote server. And as you understand, the code must be non-blocking so the webserver can still responds to logging in requests, even if there are some users fetching theirs files lists.
Every time a user make a request to get his files list, the listOfFiles function is called.
This is the code:
exports.listOfFiles = function(req,res){
db.Account.find({where: {id:1}}).then(function(newAcc){
console.log("encontrou a account");
getFiles('/', newAcc.accessToken, '0', newAcc, function(error){
if (error) {
log.error('Error getting files');
}else{
console.log("callback!")
}
});
});
}
getFiles function: this function is responsible for fetching the file list from the remote server, and store them in a postgres database
function getFiles(path, accessToken, parentID, newAcc, callback){
var client = new ExternalAPI.Client({
key: config.get("default:clientId"),
secret: config.get("default:clientSecret")
});
client._oauth._token = accessToken;
var options = {
removed : false,
deleted : false,
readDir: true
}
//this is the instruction that fetch an array of items
//(metadata only) from a remote server
client.stat(path, options, function(error, entries) {
if (error) {
if (error.status == 429) {
console.log(accessToken + 'timeout')
setTimeout(
getFiles(path, accessToken, parentID, callback),
60000);
}else{
log.error(error);
callback(error,null);
}
}
else {
//When the array os items arrives:
console.log("RECEIVED FILES")
var inserted = 0;
var items = entries._json.contents;
for(var file in items){
var skyItemID = uuid.v1();
var name = items[file].path.split('/').pop();
var itemType;
if (items[file].is_dir) {
itemType = 'folder';
}else{
itemType = 'file';
}
newAcc.createItem({
name : name,
lastModified: items[file].modified,
skyItemID: skyItemID,
parentID: parentID,
itemSize: items[file].bytes,
itemType : itemType,
readOnly: items[file].read_only,
mimeType: items[file].mime_type
}).then(function(item){
console.log(item.name)
if (++inserted == items.length) {
console.log(inserted)
console.log(items.length)
console.log("callsback")
callback();
}
}).catch(function(error){
log.error('[DROPBOX] - Filename with special characters');
callback(new Error);
return;
});
}
}
});
}
The problem here is, the moment that webserver prints console.log("RECEIVED FILES") in our console, it stops responding to all other requests, such as log in or fetch files requests from other users.
And it starts responding again when it prints console.log("callback!"). So, i'm assuming that somehow nodeJS is blocking itself until getFiles function is finished and called back.
I think that this is not a normal behaviour. Shouldn't nodeJS be responding to responds to other requests even if there are some operations running in background? Shouldn't getFiles function being run in background and not affecting/blocking all other requests? What am I doing wrong?
Thanks!

I am facing the same kind of problem for long time server http request blocks the service for response other client requests. This is my topic. What is the correct behavior for Node.js while Node.js is requesting a long time http request for other servers Currently, I got no answer for that. If you got the answer, please reply me. Thanks.

Related

Trying to send proper header with Express, Tedious, while accessing Azure SQL

My backend gets a request to get records from an Azure SQL db. To manage this requests I'm using Express in Nodejs, and Tedious (to connect to DB). When the request to the appropriate route comes in, Tedious opens the connection with db, queries it, and it should send the response back to frontend.
However, the code responds before I have an answer with from the db, and thus when I go to send the real (the actually desired) response, Express tells me it already sent headers back (the dreaded: 'Cannot set headers after they are sent to the client').
After debugging quite a bit (using several console.log(JSON.stringify(resp.headersSent)); ) to see when was the response actually sent, I noticed that it's sent the moment I connect with Azure (see below).
I'm not sure if I'm missing something (though I already checked the documentation for all those programs quite a bit), but how can I control when the response is sent? Or, is there another way of doing this.
I omitted several of the other routes for brevity. Other routes work fine and thus I know code connects well to Azure db, and frontend does query backend correctly. Help is appreciated. Thank you.
const express = require('express');
const cors = require('cors');
const Connection = require('tedious').Connection;
const Request = require('tedious').Request;
const config = {
authentication: {
options: {
userName: "xxxx",
password: "xxxx"
},
type: 'default'
},
server: "xxxx",
options: {
database: "xxxx",
encrypt: true
}
};
const app = express();
app.use(express.json({type: '*/*'}));
app.use(cors({ origin: '*' }));
app.get("/allproj/", function (req, resp) {
const q = `select Title, Report_Date, Project_Number, Phase_Code, Items_No, PId from projec order by PId desc`;
let ansf = [];
const connection = new Connection(config);
connection.on('connect', (err, connection) => {
if (err) {
console.log(err);
} else { //this is the moment the headers are sent,
//seemingly with positive response from connection
queryItems(q);
}
});
queryItems = (q) => {
request = new Request(q, function (err, rowCount) {
if (err) {
console.log(err);
} else {
console.log(rowCount + ' rows pulled');
connection.close();
}
});
request.on('row', function(columns) {
let ans = [];
columns.forEach(function(column) {
ans.push(column.value);
if (ans.length === 6) { // I know each row is 6 cols long
ansf.push(ans);
ans = [];
}
});
console.log('ansf length: ' + ansf.length);
resp.send({ ansf }); // This is the response I would like to return
});
request.on('done', function(rowCount) {
console.log(rowCount + ' rows returned');
connection.close();
});
connection.execSql(request);
};
resp.redirect("/");
});
app.listen(3000, process.env.IP, function() {
console.log("Started OK...");
});
Remove resp.redirect("/");
As it is already transferring your request to "/" and when control come at resp.send({ansf}), It gives you error.

API Gateway websockets - Function is sending two messages

I'm using the serverless apigateway websockets and I can successfully get messages sent back and forth between the lambda function and client.
However I can't figure out how to get my function to send only a single message. It's currently sending two messages due to the callback at the end of the function. This is more of a nodejs issue, but I've been trying for the past couple of hours to figure out how, but can't seem to.
var params2 = {
TableName: "UserConnections",
FilterExpression: "cameraId = :val",
ExpressionAttributeValues: { ":val": {"S" : JSON.parse(event.body).data.camera_id}}
};
DDB.scan(params2, function(err, data) {
if (err) throw err;
console.log("DATA: " + JSON.stringify(data));
for(var i = 0; i<data['Items'].length; i++){
var id = data['Items'][i]['connectionId'].S;
console.log("List of connection ids: " + id);
var params3 = {
ConnectionId: id,
Data: JSON.stringify(message)
};
apigatewaymanagementapi.postToConnection(params3, function(err, data) {
if (err){
throw err; // an error occurred
}else{
console.log("Success sending message to clients: " + JSON.stringify(data));
}
});
}
});
callback(null, {
statusCode: 200,
body: "Message Processed in Lambda!"
});
In postToConnection method, it sends a message back to multiple users, and the callback function sends the body to the same users. How can I just send the params3 back to the users and not use the callback to end the function
Edit1:______________________________________________
Adding
callback(null, {});
Still sends two messages except the second one is now empty. How can I get it to strictly send only one message
Return with a empty object, the return value is ignored when this function is invoked from WebSocket gateway
return {}; // callback(null, {});

Requesting HTTP HEAD of thousands of URLs via NodeJS

I need to check the availability of about 300.000 URLs on a local server via HTTP. The files are not in a local file system but a key value store and the goal is to sanity check if every system needing access to those files is able to do so vial HTTP.
To do so, I would use HTTP HEAD requests that return HTTP 200 for every file found and 404 for every file not found.
The problem is, if I do too many requests at once, I get rate limited by nginx or a local proxy, hence no info whether a file is really accessible.
My method to look for the availability of files looks as follows:
...
const request = require('request'); // Using the request lib.
...
const checkEntity = entity => {
logger.debug("HTTP HEAD ", entity);
return request({ method: "HEAD", uri: entity.url })
.then(result => {
logger.debug("Successfully retrieved file: " + entity.url);
entity.valid = result != undefined;
})
.catch(err => {
logger.debug("Failed to retrieve file.", err);
entity.valid = false;
});
}
If I call this function a few times, things work as expected. When trying to run it within recursive promises, I quickly exceed the maximum stack. Setting up one promise for each call causes too much memory usage.
How could this be solved?
This problem can be solved in these steps:
Define a queue and store all your entities (all URLs that need to be checked).
Define how many HTTP requests you want to send in parallel. This number should not be too small or too large. If it's too small, the program is not efficient. If it is too large, current requests-number-limit problem will occur. Let's make it as N, you can define a reasonable number according to your server status.
Send N HTTP requests in parallel at the beginning.
When 1 request is finished, fetch a new entity from the queue and send a new request. To get notified when request is done, you can add a callback parameter in your checkEntity function.
In this way, the maximum HTTP requests number will never be more than N.
Here is a pseudo code example based on your code snippet:
let allEntities = [...]; // 300000 URLs
let finishedEntities = [];
const request = require('request'); // Using the request lib.
...
const checkEntity = function(entity, callback) {
logger.debug("HTTP HEAD ", entity);
return request({ method: "HEAD", uri: entity.url })
.then(result => {
logger.debug("Successfully retrieved file: " + entity.url);
entity.valid = result != undefined;
callback(entity);
})
.catch(err => {
logger.debug("Failed to retrieve file.", err);
entity.valid = false;
callback(entity)
});
}
function checkEntityCallback(entity) {
finishedEntities.push(entity);
let newEntity = allEntities.shift();
if (newEntity) {
checkEntity(allEntities.shift(), checkEntityCallback);
}
}
for (let i=0; i<10; i++) {
checkEntity(allEntities.shift(), checkEntityCallback);
}
To make things easier to understand, you can change the usage of request and remove all Promise stuff:
const checkEntity = function(entity, callback) {
logger.debug("HTTP HEAD ", entity);
request({ method: "HEAD", uri: entity.url }, function(error, response, body) {
if (error) {
logger.debug("Failed to retrieve file.", error);
entity.valid = false;
callback(entity);
return;
}
logger.debug("Successfully retrieved file: " + entity.url);
entity.valid = body != undefined;
callback(entity);
});
}

Avoiding multiple verification for Twitter API (node.js)

I'm trying to create a basic app in node.js that a) tracks a keyword in twitter and temporarily stores messages relating to that keyword, b) after enough messages have been accumulated, return it to the user. I'm using the ntwitter library.
I've a basic long polling system implemented on my client and server side, but I'm having some trouble on verification. The way I set it up currently, it verifies the user each time /api/streamfeed is called, so potentially every 30sec (since I have a 30s timeout schedule) before checking the stream. I'm thinking this will get me into trouble since I believe verification is rate-limited? Is there a way to check whether I'm verified without having to ping Twitter's API (perhaps store a boolean after the first attempt)?
Client side:
//upon receiving a response, poll again
function getStreamFeed() {
console.log('calling getStreamFeed');
$http.get('/api/streamfeed').success(function(data) {
console.log(data)
getStreamFeed();
});
};
setTimeout(getStreamFeed, 1000);
Server side:
app.get('/api/streamfeed', function(req, res) {
/*
...
polling code
...
*/
twit.verifyCredentials(function(err, data) {
if (err) res.send(404);
twit.stream('statuses/filter', {
track: 'justin bieber'
}, function(stream) {
stream.on('data', function(data) {
console.log(data.text)
messages.push(data.text);
});
})
});
});
I'd send the credentials back and resend them again... this could be a bool, or actual credentials to use. these aren't your private keys or anything, only the user's.
could also be sent in headers and cookies and properly hashed etc.
this just simply shows a pattern that should work.
client side:
function getStreamFeed(credentials) {
//upon receiving a response, poll again
console.log('calling getStreamFeed');
var url = '/api/streamfeed';
if (credentials) {
url += '&credentials=' + credentials;
}
$http
.get(url)
.success(function(data) {
console.log(data)
getStreamFeed(true);
});
};
setTimeout(getStreamFeed, 1000);
Server side:
app.get('/api/streamfeed', function(req, res) {
function twitStream () {
twit.stream('statuses/filter', {track: 'justin bieber'}, function(stream) {
stream.on('data', function(data) {
console.log(data.text)
messages.push(data.text);
});
}
}
var credentials = req.query.credentials;
if (credentials) {
twitStream()
}
twit.verifyCredentials(function(err, data) {
if (err) res.send(404);
twitStream()
});
});

Express keeping connection open?

I'm using node js, express and postgresql as backend.
This is the approach I used to make a rest API:
exports.schema = function (inputs, res) {
var query = knex('schema')
.orderBy('sch_title', 'asc')
.select();
query.exec(function (err, schemas) {
if(err){
var response = {
message: 'Something went wrong when trying to fetch schemas',
thrownErr: err
};
console.error(response);
res.send(500, response);
}
if(schemas.length === 0){
var message = 'No schemas was found';
console.error(message);
res.send(400, message);
return;
}
res.send(200, schemas);
});
};
It works but after a while postgres logs an error and it's no longer working:
sorry, too man clients already
Do I need a close each request somehow? Could not find any about this in the express docs. What can be wrong?
This error only occurs on production server. Not on developing machine.
Update
The app only brakes in one 'module'. The rest of the app works fine. So it's only some queries that gives the error.
Just keep one connection open for your whole app. The docs shows an example how to do this.
This code goes in your app.js...
var Knex = require('knex');
Knex.knex = Knex.initialize({
client: 'pg',
connection: {
// your connection config
}
});
And when you want to query in your controllers/middlewares...
var knex = require('knex').knex;
exports.schema = function (req, res) {
var query = knex('schema')
.orderBy('sch_title', 'asc')
.select();
// more code...
};
If you place Knex.initialize inside an app.use or app.VERB, it gets called repeatedly for each request thus you'll end up connecting to PG multiple times.
For most cases, you don't need to do an open+query+close for every HTTP request.

Resources