How can I prevent someone from simply doing
while(true){client.emit('i am spammer', true)};
This sure proves to be a problem when someone has the urge to crash my node server!
Like tsrurzl said you need to implement a rate limiter (throttling sockets).
Following code example only works reliably if your socket returns a Buffer (instead of a string). The code example assumes that you will first call addRatingEntry(), and then call evalRating() immediately afterwards. Otherwise you risk a memory leak in the case where evalRating() doesn't get called at all or too late.
var rating, limit, interval;
rating = []; // rating: [*{'timestamp', 'size'}]
limit = 1048576; // limit: maximum number of bytes/characters.
interval = 1000; // interval: interval in milliseconds.
// Describes a rate limit of 1mb/s
function addRatingEntry (size) {
// Returns entry object.
return rating[(rating.push({
'timestamp': Date.now(),
'size': size
}) - 1);
}
function evalRating () {
// Removes outdated entries, computes combined size, and compares with limit variable.
// Returns true if you're connection is NOT flooding, returns false if you need to disconnect.
var i, newRating, totalSize;
// totalSize in bytes in case of underlying Buffer value, in number of characters for strings. Actual byte size in case of strings might be variable => not reliable.
newRating = [];
for (i = rating.length - 1; i >= 0; i -= 1) {
if ((Date.now() - rating[i].timestamp) < interval) {
newRating.push(rating[i]);
}
}
rating = newRating;
totalSize = 0;
for (i = newRating.length - 1; i >= 0; i -= 1) {
totalSize += newRating[i].timestamp;
}
return (totalSize > limit ? false : true);
}
// Assume connection variable already exists and has a readable stream interface
connection.on('data', function (chunk) {
addRatingEntry(chunk.length);
if (evalRating()) {
// Continue processing chunk.
} else {
// Disconnect due to flooding.
}
});
You can add extra checks, like checking whether or not the size parameter really is a number etc.
Addendum: Make sure the rating, limit and interval variables are enclosed (in a closure) per connection, and that they don't define a global rate (where each connection manipulates the same rating).
I implemented a little flood function, not perfect (see improvements below) but it will disconnect a user when he does to much request.
// Not more then 100 request in 10 seconds
let FLOOD_TIME = 10000;
let FLOOD_MAX = 100;
let flood = {
floods: {},
lastFloodClear: new Date(),
protect: (io, socket) => {
// Reset flood protection
if( Math.abs( new Date() - flood.lastFloodClear) > FLOOD_TIME ){
flood.floods = {};
flood.lastFloodClear = new Date();
}
flood.floods[socket.id] == undefined ? flood.floods[socket.id] = {} : flood.floods[socket.id];
flood.floods[socket.id].count == undefined ? flood.floods[socket.id].count = 0 : flood.floods[socket.id].count;
flood.floods[socket.id].count++;
//Disconnect the socket if he went over FLOOD_MAX in FLOOD_TIME
if( flood.floods[socket.id].count > FLOOD_MAX){
console.log('FLOODPROTECTION ', socket.id)
io.sockets.connected[socket.id].disconnect();
return false;
}
return true;
}
}
exports = module.exports = flood;
And then use it like this:
let flood = require('../modules/flood')
// ... init socket io...
socket.on('message', function () {
if(flood.protect(io, socket)){
//do stuff
}
});
Improvements would be, to add another value next to the count, how often he got disconneted and then create a banlist and dont let him connect anymore. Also when a user refreshes the page he gets a new socket.id so maybe use here a unique cookie value instead of the socket.id
Here is simple rate-limiter-flexible package example.
const app = require('http').createServer();
const io = require('socket.io')(app);
const { RateLimiterMemory } = require('rate-limiter-flexible');
app.listen(3000);
const rateLimiter = new RateLimiterMemory(
{
points: 5, // 5 points
duration: 1, // per second
});
io.on('connection', (socket) => {
socket.on('bcast', async (data) => {
try {
await rateLimiter.consume(socket.handshake.address); // consume 1 point per event from IP
socket.emit('news', { 'data': data });
socket.broadcast.emit('news', { 'data': data });
} catch(rejRes) {
// no available points to consume
// emit error or warning message
socket.emit('blocked', { 'retry-ms': rejRes.msBeforeNext });
}
});
});
Read more in official docs
Related
Hello on entering my backend I make a connection to my socket and send the player data + socket id
good, and then on receiving I add him in line
and perform a function that will fetch 2 players who have approximate points to form a match
and so I wanted to find a match send to the socket of the selected players
But I don't know how best to map session
I saw about express.socket-io
or save one of these socket's that connect in some way
for when I find a match
send my match to my selected socket's
io.on('connection', function (socket) {
//ADD PLAYER TO QUEUE
socket.on('addPlayer-Queue', (result) => {
const player = {
id: result.id,
name: result.name,
mmr: result.mmr
}
const player = new Player(player,socketid )
socket.emit('match', matches)
});
class Player {
constructor(player,socketId) {
this.id = player.id
this.socketId = socketId
this.name = player.name
this.mmr = player.mmr
}
}
Here I get my player and create it, but I don't know how to get this player's socket.id and how to map in a session
If I understood you correctly, here's a way to do it.
Everytime a players get added, I push them on an array and then call the function matchPlayersQueue that tries to match players based on their MMR (I haven't completed the code, but a way to do it, is either check the variance of MMRs or check their difference). If you follow this path, keep in mind that everytime a players disconnect that was on the queue array, you should remove the element.
Another way to do this is, set a timer that periodically calls the function matchPlayersQueue.
let playersOnQueue = [];
io.on("connection", function(socket) {
//ADD PLAYER TO QUEUE
socket.on("addPlayer-Queue", result => {
const player = {
id: result.id,
name: result.name,
mmr: result.mmr
};
const player = new Player(player, socketid);
playersOnQueue.push(player);
const matchedPlayers = matchPlayersQueue(playersOnQueue); // matchedPlayers will be an array of their sockets ids.
// Do something with matchedPlayers, empty playersOnQueue if matchedPlayers.length doesn't equal to 0.
});
});
function matchPlayersQueue(arr) {
//We'll sort the array by mmr.
arr.sort(function(firstPlayer, secondPlayer) {
return firstPlayer.getMMR() - secondPlayer.getMMR();
});
if (arr.length >= 3) {
//Trivial way to match 3 people, not checking for MMR.
if (arr.length === 3) {
const socketIDs = arr.map(function(player) {
return player.getSocketID();
});
return socketIDs;
} else {
/*
Here you can implement your own way of selecting players, maybe having a maximum MMR difference between players or comparing the overall variance of MMR.
*/
}
} else {
//If there are fewer than 3 people.
return [];
}
}
class Player {
/*
Beware of this constructor, while this works, The way i'd would do it is each variable to their own attribute.
*/
constructor(player, socketId) {
this.id = player.id;
this.socketId = socketId;
this.name = player.name;
this.mmr = player.mmr;
}
getMMR() {
return this.mmr;
}
getSocketID() {
return this.socketId;
}
}
Please check if my understanding about the following for loop is correct.
for(let i=0; i<1000; i){
sample_function(i, function(result){});
}
The moment the for loop is invoked, 1000 events of sample_function will be qued in the event loop. After about 5 seconds a user gives a http request, which is qued after those "1000 events".
Usually this would not be a problem because the loop is asynchronous.
But lets say that this sample_function is a CPU intensive function. Therefore the "1000 events" are completed consecutively and each take about 1 second.
As a result, the for loop will block for about 1000 seconds.
Would there be a way to solve such problem? For example would it be possible to let the thread take a "break" every 10 loops? and allow other new ques to pop in between? If so how would I do it?
Try it this:
for(let i=0; i<1000; i++)
{
setTimeout(sample_function, 0, i, function(result){});
}
or
function sample_function(elem, index){..}
var arr = Array(1000);
arr.forEach(sample_function);
There is a technique called partitioning which you can read about in the NodeJs's document, But as the document states:
If you need to do something more complex, partitioning is not a good option. This is because partitioning uses only the Event Loop, and you won't benefit from multiple cores almost certainly available on your machine.
So you can also use another technique called offloading, e.g. using worker threads or child processes which also have certain downsides like having to serialize and deserialize any objects that you wish to share between the event loop (current thread) and a worker thread or a child process
Following is an example of partitioning that I came up with which is in the context of an express application.
const express = require('express');
const crypto = require('crypto');
const randomstring = require('randomstring');
const app = express();
const port = 80;
app.get('/', async (req, res) => {
res.send('ok');
})
app.get('/block', async (req, res) => {
let result = [];
for (let i = 0; i < 10; ++i) {
result.push(await block());
}
res.send({result});
})
app.listen(port, () => {
console.log(`Listening on port ${port}`);
console.log(`http://localhost:${port}`);
})
/* takes around 5 seconds to run(varies depending on your processor) */
const block = () => {
//promisifying just to get the result back to the caller in an async way, this is not part of the partitioning technique
return new Promise((resolve, reject) => {
/**
* https://nodejs.org/en/docs/guides/dont-block-the-event-loop/#partitioning
* using partitioning techinique(using setImmediate/setTimeout) to prevent a long running operation
* to block the eventloop completely
* there will be a breathing period between each time block is called
*/
setImmediate(() => {
let hash = crypto.createHash("sha256");
const numberOfHasUpdates = 10e5;
for (let iter = 0; iter < numberOfHasUpdates; iter++) {
hash.update(randomstring.generate());
}
resolve(hash);
})
});
}
There are two endpoints / and /block, if you hit /block and then hit / endpoint, what happens is that the / endpoint will take around 5 seconds to give back response(during the breathing space(the thing that you call it a "break"))
If setImmediate was not used, then the / endpoint would respond to a request after approximately 10 * 5 seconds(10 being the number of times block function is called in the for-loop)
Also you can do partitioning using a recursive approach like this:
/**
*
* #param items array we need to process
* #param chunk a number indicating number of items to be processed on each iteration of event loop before the breathing space
*/
function processItems(items, chunk) {
let i = 0;
const process = (done) => {
let currentChunk = chunk;
while (currentChunk > 0 && i < items?.length) {
--currentChunk;
syncBlock();
++i;
}
if (i < items?.length) {
setImmediate(process);//the key is to schedule the next recursive call (by passing the function to setImmediate) instead of doing a recursive call (by simply invoking the process function)
}
}
process();
}
And if you need to get back the data processed you can promisify it like this:
function processItems(items, chunk) {
let i = 0;
let result = [];
const process = (done) => {
let currentChunk = chunk;
while (currentChunk > 0 && i < items?.length) {
--currentChunk;
const returnedValue = syncBlock();
result.push(returnedValue);
++i;
}
if (i < items?.length) {
setImmediate(() => process(done));
} else {
done && done(result);
}
}
const promisified = () => new Promise((resolve) => process(resolve));
return promisified();
}
And you can test it by adding this route handler to the other route handlers provided above:
app.get('/block2', async (req, res) => {
let result = [];
let arr = [];
for (let i = 0; i < 10; ++i) {
arr.push(i);
}
result = await processItems(arr, 1);
res.send({ result });
})
I have a log file with about 14.000 aircraft position datapoints captured from a system called Flarm, it looks like this:
{"addr":"A","time":1531919658.578100,"dist":902.98,"alt":385,"vs":-8}
{"addr":"A","time":1531919658.987861,"dist":914.47,"alt":384,"vs":-7}
{"addr":"A","time":1531919660.217471,"dist":925.26,"alt":383,"vs":-7}
{"addr":"A","time":1531919660.623466,"dist":925.26,"alt":383,"vs":-7}
What I need to do is find a way to 'play' this file back in real-time (as if it were occuring right now, even though it's pre-recorded), and emit an event whenever a log entry 'occurs'. The file is not being added to, it's pre-recorded and the playing back would occur at a later stage.
The reason for doing this is that I don't have access to the receiving equipment when I'm developing.
The only way I can think to do it is to set a timeout for every log entry, but that doesn't seem like the right way to do it. Also, this process would have to scale to longer recordings (this one was only an hour long).
Are there other ways of doing this?
If you want to "play them back" with the actual time difference, a setTimeout is pretty much what you have to do.
const processEntry = (entry, index) => {
index++;
const nextEntry = getEntry(index);
if (nextEntry == null) return;
const timeDiff = nextEntry.time - entry.time;
emitEntryEvent(entry);
setTimeout(processEntry, timeDiff, nextEntry, index);
};
processEntry(getEntry(0), 0);
This emits the current entry and then sets a timeout based on the difference until the next entry.
getEntry could either fetch lines from a prefilled array or fetch lines individually based on the index. In the latter case only two lines of data would only be in memory at the same time.
Got it working in the end! setTimeout turned out to be the answer, and combined with the input of Lucas S. this is what I ended up with:
const EventEmitter = require('events');
const fs = require('fs');
const readable = fs.createReadStream("./data/2018-07-18_1509log.json", {
encoding: 'utf8',
fd: null
});
function read_next_line() {
var chunk;
var line = '';
// While this is a thing we can do, assign chunk
while ((chunk = readable.read(1)) !== null) {
// If chunk is a newline character, return the line
if (chunk === '\n'){
return JSON.parse(line);
} else {
line += chunk;
}
}
return false;
}
var lines = [];
var nextline;
const processEntry = () => {
// If lines is empty, read a line
if (lines.length === 0) lines.push(read_next_line());
// Quit here if we've reached the last line
if ((nextline = read_next_line()) == false) return true;
// Else push the just read line into our array
lines.push(nextline);
// Get the time difference in milliseconds
var delay = Number(lines[1].time - lines[0].time) * 1000;
// Remove the first line
lines.shift();
module.exports.emit('data', lines[0]);
// Repeat after the calculated delay
setTimeout(processEntry, delay);
}
var ready_to_start = false;
// When the stream becomes readable, allow starting
readable.on('readable', function() {
ready_to_start = true;
});
module.exports = new EventEmitter;
module.exports.start = function() {
if (ready_to_start) processEntry();
if (!ready_to_start) return false;
}
Assuming you want to visualize the flight logs, you can use fs watch as below, to watch the log file for changes:
fs.watch('somefile', function (event, filename) {
console.log('event is: ' + event);
if (filename) {
console.log('filename provided: ' + filename);
} else {
console.log('filename not provided');
}
});
Code excerpt is from here. For more information on fs.watch() check out here
Then, for seamless update on frontend, you can setup a Websocket to your server where you watch the log file and send newly added row via that socket to frontend.
After you get the data in frontend you can visualize it there. While I haven't done any flight visualization project before, I've used D3js to visualize other stuff (sound, numerical data, metric analysis and etc.) couple of times and it did the job every time.
We do read an XML file (using xml-stream) with about 500k elements and do insert them into MongoDB like this:
xml.on(`endElement: product`, writeDataToDb.bind(this, "product"));
Insert in writeDataToDb(type, obj) looks like this:
collection.insertOne(obj, {w: 1, wtimeout: 15000}).catch((e) => { });
Now when the Mongo connection gets disconnected, the xml stream still reads and the console gets flooded with error messages (can't insert, disconnected, EPIPE broken, ...).
In the docs it says:
When you shut down the mongod process, the driver stops processing operations and keeps buffering them due to bufferMaxEntries being -1 by default meaning buffer all operations.
What does this buffer actually do?
We notice when we insert data and close the mongo server, the things get buffered, then we bring the mongo server back up, the native driver successfully reconnects and node resumes inserting data but the buffered documents (during mongo beeing offline) do not get inserted again.
So I question this buffer and its use.
Goal:
We are looking for the best way to keep inserts in buffer until mongo comes back (in 15000milliseconds according to wtimeout) and let then insert the buffered documents or make use of xml.pause(); and xml.resume() which we tried without success.
Basically we need a little help in how to handle disconnects without data loss or interrupts.
Inserting 500K elements with insertOne() is a very bad idea. You should instead use bulk operations that allows you to insert many document in a single request.
(here for example 10000, so it can be done in 50 single requests)
To avoid buffering issue, you can manually handle it:
Disable buffering with bufferMaxEntries: 0
Set reconnect properties: reconnectTries: 30, reconnectInterval: 1000
Create a bulkOperation and feed it with 10000 items
Pause the xml reader. Try to insert the 10000 items. If it fails, retry every 3000ms until it succeed
You may face some duplicate ID issues if the bulk operation is interrupted during execution, so ignore them (error code: 11000)
here is a sample script :
var fs = require('fs')
var Xml = require('xml-stream')
var MongoClient = require('mongodb').MongoClient
var url = 'mongodb://localhost:27017/test'
MongoClient.connect(url, {
reconnectTries: 30,
reconnectInterval: 1000,
bufferMaxEntries: 0
}, function (err, db) {
if (err != null) {
console.log('connect error: ' + err)
} else {
var collection = db.collection('product')
var bulk = collection.initializeUnorderedBulkOp()
var totalSize = 500001
var size = 0
var fileStream = fs.createReadStream('data.xml')
var xml = new Xml(fileStream)
xml.on('endElement: product', function (product) {
bulk.insert(product)
size++
// if we have enough product, save them using bulk insert
if (size % 10000 == 0) {
xml.pause()
bulk.execute(function (err, result) {
if (err == null) {
bulk = collection.initializeUnorderedBulkOp()
console.log('doc ' + (size - 10000) + ' : ' + size + ' saved on first try')
xml.resume()
} else {
console.log('bulk insert failed: ' + err)
counter = 0
var retryInsert = setInterval(function () {
counter++
bulk.execute(function (err, result) {
if (err == null) {
clearInterval(retryInsert)
bulk = collection.initializeUnorderedBulkOp()
console.log('doc ' + (size - 10000) + ' : ' + size + ' saved after ' + counter + ' tries')
xml.resume()
} else if (err.code === 11000) { // ignore duplicate ID error
clearInterval(retryInsert)
bulk = collection.initializeUnorderedBulkOp()
console.log('doc ' + (size - 10000) + ' : ' + size + ' saved after ' + counter + ' tries')
xml.resume()
} else {
console.log('failed after first try: ' + counter, 'error: ' + err)
}
})
}, 3000) // retry every 3000ms until success
}
})
} else if (size === totalSize) {
bulk.execute(function (err, result) {
if (err == null) {
db.close()
} else {
console.log('bulk insert failed: ' + err)
}
})
}
})
}
})
sample log output:
doc 0 : 10000 saved on first try
doc 10000 : 20000 saved on first try
doc 20000 : 30000 saved on first try
[...]
bulk insert failed: MongoError: interrupted at shutdown // mongodb server shutdown
failed after first try: 1 error: MongoError: no connection available for operation and number of stored operation > 0
failed after first try: 2 error: MongoError: no connection available for operation and number of stored operation > 0
failed after first try: 3 error: MongoError: no connection available for operation and number of stored operation > 0
doc 130000 : 140000 saved after 4 tries
doc 140000 : 150000 saved on first try
[...]
I don't know specifically about Mongodb driver and this buffer of entries. Maybe it only keeps data in specific scenarios.
So I will answer to this question with a more general approach that can work with any database.
To summarize, you have two problems:
You are not recovering from failed attempts
XML stream send data too fast
To handle the first issue, you need to implement a retry algorithm that will ensure that many attempts are made before giving up.
To handle the second issue, you need to implement back pressure on the xml stream. You can do that using the pause method, the resume method and an input buffer.
var Promise = require('bluebird');
var fs = require('fs');
var Xml = require('xml-stream');
var fileStream = fs.createReadStream('myFile.xml');
var xml = new Xml(fileStream);
// simple exponential retry algorithm based on promises
function exponentialRetry(task, initialDelay, maxDelay, maxRetry) {
var delay = initialDelay;
var retry = 0;
var closure = function() {
return task().catch(function(error) {
retry++;
if (retry > maxRetry) {
throw error
}
var promise = Promise.delay(delay).then(closure);
delay = Math.min(delay * 2, maxDelay);
return promise;
})
};
return closure();
}
var maxPressure = 100;
var currentPressure = 0;
var suspended = false;
var stopped = false;
var buffer = [];
// handle back pressure by storing incoming tasks in the buffer
// pause the xml stream as soon as we have enough tasks to work on
// resume it when the buffer is empty
function writeXmlDataWithBackPressure(product) {
// closure used to try to start a task
var tryStartTask = function() {
// if we have enough tasks running, pause the xml stream
if (!stopped && !suspended && currentPressure >= maxPressure) {
xml.pause();
suspended = true;
console.log("stream paused");
}
// if we have room to run tasks
if (currentPressure < maxPressure) {
// if we have a buffered task, start it
// if not, resume the xml stream
if (buffer.length > 0) {
buffer.shift()();
} else if (!stopped) {
try {
xml.resume();
suspended = false;
console.log("stream resumed");
} catch (e) {
// the only way to know if you've reached the end of the stream
// xml.on('end') can be triggered BEFORE all handlers are called
// probably a bug of xml-stream
stopped = true;
console.log("stream end");
}
}
}
};
// push the task to the buffer
buffer.push(function() {
currentPressure++;
// use exponential retry to ensure we will try this operation 100 times before giving up
exponentialRetry(function() {
return writeDataToDb(product)
}, 100, 2000, 100).finally(function() {
currentPressure--;
// a task has just finished, let's try to run a new one
tryStartTask();
});
});
// we've just buffered a task, let's try to run it
tryStartTask();
}
// write the product to database here :)
function writeDataToDb(product) {
// the following code is here to create random delays and random failures (just for testing)
var timeToWrite = Math.random() * 100;
var failure = Math.random() > 0.5;
return Promise.delay(timeToWrite).then(function() {
if (failure) {
throw new Error();
}
return null;
})
}
xml.on('endElement: product', writeXmlDataWithBackPressure);
Play with it, put some console.log to understand how it behaves.
I hope this will help you to solve your issue :)
I have a list of 125,000 + Id numbers.
I am making a request to an api to get more information for each one.
But my problem is that the api will stop giving me a response if I request more then 6 per second.
I need a way to control the speed of the requests.
Just use a function called by setInterval to do the actual API querying ?
Simple example:
var ids = [ /* big array here */ ];
function queryAllIds(ids, callback) {
var processedIds = [];
var lastProcessedId = 0;
var processedIdCount = 0;
var intervalId;
function queryApi() {
var idToProcess = lastProcessedId++;
doActualQuery(ids[idToProcess], function(result) {
processedIds[idToProcess] = result;
processedIdCount++;
if (processedIdCount === ids.length) {
nextTick(callback, processedIds);
}
});
}
if (intervalId && lastProcessedId === ids.length)
clearInterval(intervalId);
}
intervalId = setInterval(queryApi, 1000/6);
}
queryAllIds(ids, function (processedIds) {
// Do stuff here
});
We ended up using rate limiter which provided the rate limiting we needed right out of the box. https://www.npmjs.com/package/limiter