Environment: nodejs 17.2, expressjs 4.17
Task: Data arrives at the url of the type "/user-actions" from different servers at a rate of about 2 requests per second. It is necessary to aggregate them and send them to another server once a second.
For example:
Request #1: {userId: 1, action: "hitOne"}
Request #2: {userId: 2, action: "hitFive"}
Request #3: {userId:1, action: "hitFive"}
It is necessary to get 2 objects
const data = [{userId: 1, action: "hitOne"}, {userId: 2, action: "hitFive"}]
and
const data = [{userId: 1, action: "hitFive"}]
Each of these objects is sent to another server 1 time per second, something like this
http.post('http://newserver.url/user-actions', {data});
I was thinking of making a variable in which to record everything that comes in the request and send this variable to a new server once a second on a timer.
But something tells me: or there will be problems with the variable (for example, due to concurrent request) and there will not always be the data I was waiting for, or some nonsense will come out with the timer.
How to implement such a scenario correctly?
So you're creating some sort of a proxy service. You have two potential issues:
data persistence and
retries and pending requests.
I think your best bet would be to do something like this:
in this particular service (with the API route), you just receive requests, and store them somewhere like Redis or RabbitMQ or Amazon SQS.
in another service, you deal with retries, posting etc.
Even if you don't split up into two services, you still want to put things in specialised storage service in things like this. E.g. your process crashes, and you lose whatever data you have holding in memory. It also simplifies all the management details. Things like storing, sorting what came first, what requests are pending - those are super easy to deal with with RabbitMQ-type service.
But let's simplify things and hold them in memory. Now you have to deal with all these things yourself.
So here's a naive proxy service.
const axios = require('axios');
const axiosRetry = require('axios-retry');
const REQUEST_INTERVAL = 1000; // every second
const MAX_PARALLEL_REQUESTS = 3;
axiosRetry(axios, { retries: 3});
const bucket = [];
let exportingInterval;
let currentRequestsCount = 0;
const logRequest = (payload) => bucket.push(payload);
const makeRequest = (payload) => axios.post('http://remote-service/user-actions', payload);
const sendData = () => {
// first, make sure you don't make more then X parallel requests
if (currentRequestsCount > MAX_PARALLEL_REQUESTS) {
return
}
// clear the bucket
const data = bucket.splice(0, bucket.length);
if (!data.length) {
return;
}
// send the data, make sure you handle the failure.
currentRequestsCount = currentRequestsCount + 1;
makeRequest()
.then(() => currentRequestsCount = currentRequestsCount - 1)
.catch(() => {
// what do do now? We failed three times.
// Let's put everything back in the bucket, try in the next request.
bucket.splice(bucket.length, 0, ...data);
currentRequestsCount = currentRequestsCount - 1;
});
}
const startExporting = () => exportingInterval = setInterval(() => sendData(), REQUEST_INTERVAL);
const stopExporting = () => clearInterval(exportingInterval)
module.exports = {
logRequest,
startExporting,
stopExporting,
}
Now, you would use this like this:
const proxyService = require('./proxy-service');
const app = express();
proxyService.startExporting();
// ...
app.post('/user-data', (req, res) => {
proxyService.logRequest(req.body);
res.end();
});
Now, this is just a simple example:
You do need to make sure that retry policy is ok. You have to make sure you don't DoS wherever you're sending the data.
You want to make sure you limit how many objects you send per call.
maybe that 1-second interval is not a good thing - what if sending off the data lasts longer?
What if you start piling requests? My simple counter only counts to 3, maybe it's more complicatd then that.
Also, calling that startExporting and stopExporting should go in some common place, where you boot the app, and where you cleanup in case of a graceful shutdown.
But it gives you an idea of how it can be done.
It is a trade-off: time, data
If you want ensure enough data, you can use Promise.all() function. When both 2 requests is responded, you will call api to sent it. This will ensure that enough data but won't ensure that send data to another server once a second.
let pr1 = request1();
let pr2 = request2();
await data = promise.all([pr1,pr2]);
requestToAnotherServer(data);
If you want ensure that server will send data to another server once a second. You can set a timer, when time out, you send data that server received. But this won't ensure that enough data
sendData = [];
setInterval(()=>{
let pr1 = request1().then(data=>{sendData.push(data)});
let pr2 = request2().then(data=>{sendData.push(data)});
requestToAnotherServer(sendData);
sendData = [];
},1000)
Related
Okay so I have a Nodejs/Express app that has an endpoint which allows users to receive notifications by opening up a connection to said endpoint:
var practitionerStreams = [] // this is a list of all the streams opened by pract users to the
backend
async function notificationEventsHandler(req, res){
const headers ={
'Content-Type': 'text/event-stream',
'Connection': 'keep-alive',
'Cache-Control': 'no-cache'
}
const practEmail = req.headers.practemail
console.log("PRACT EMAIL", practEmail)
const data = await ApptNotificationData.findAll({
where: {
practEmail: practEmail
}
})
//console.log("DATA", data)
res.writeHead(200, headers)
await res.write(`data:${JSON.stringify(data)}\n\n`)
// create a new stream
const newPractStream = {
practEmail: practEmail,
res
}
// add the new stream to list of streams
practitionerStreams.push(newPractStream)
req.on('close', () => {
console.log(`${practEmail} Connection closed`);
practitionerStreams = practitionerStreams.filter(pract => pract.practEmail !== pract.practEmail);
});
return res
}
async function sendApptNotification(newNotification, practEmail){
var updatedPractitionerStream = practitionerStreams.map((stream) =>
// iterate through the array and find the stream that contains the pract email we want
// then write the new notification to that stream
{
if (stream["practEmail"]==practEmail){
console.log("IF")
stream.res.write(`data:${JSON.stringify(newNotification)}\n\n`)
return stream
}
else {
// if it doesnt contain the stream we want leave it unchanged
console.log("ELSE")
return stream
}
}
)
practitionerStreams = updatedPractitionerStream
}
Basically when the user connects it takes the response object (that will stay open), will put that in an Object along with a unique email, and write to it in the future in sendApptNotification
But obviously this is slow for a full app, how exactly do I replace this with Redis? Would I still have a Response object that I write to? Or would that be replaced with a redis stream that I can subscribe to on the frontend? I also assume I would store all my streams on redis as well
edit: from what examples I've seen people are writing events from redis to the response object
Thank you in advance
If you want to use Redis Stream as notification system, you can follow this official guide:
https://redis.com/blog/how-to-create-notification-services-with-redis-websockets-and-vue-js/ .
To get this data as real time you need to create a websocket connection. I prefer to send to you an official guide instead of create it for you it's because the quality of this guide. It's perfect to anyone understand how to create it, but you need to adapt for your reality, of course.
However like I've said to you in the comments, I just believe that it's more simple to do requests in your api endpoint like /api/v1/notifications with setInterval in your frontend code and do requests each 5 seconds for example. If you prefer to use a notification system as real time I think you need to understand why do you need it, so in the future you can change your system if you think you need it. Basically it's a trade-off you must to do!
For my example imagine two tables in a relational database, one as Users and the second as Notifications.
The tables of this example:
UsersTable
id name
1 andrew
2 mark
NotificationTable
id message userId isRead
1 message1 1 true
2 message2 1 false
3 message3 2 false
The endpoint of this example will return all cached notifications that isn't read by the user. If the cache doesn't exists, it will return the data from the database, put it on the cache and return to the user. In the next call from API, you'll get the result from cache. There some points to complete in this example, for example the query on the database to get the notifications, the configuration of time expiration from cache and the another important thing is: if you want to update all the time the notifications in the cache, you need to create a middleware and trigger it in the parts of your code that needs to notify the notifications user. In this case you'll only update the database and cache. But I think you can complete these points.
const redis = require('redis');
const redisClient = redis.createClient();
app.get('/notifications', async (request, response) => {
const userId = request.user.id;
const cacheResult = await redisClient.get(`user:${userId}:notifications`)
if (cacheResult) return response.send(cacheResult);
const notifications = getUserNotificationsFromDatabase(userId);
redisClient.set(`user:${userId}:notifications`, notifications);
response.send(notifications);
})
Besides that there's another way, you can simple use only the redis or only the database to manage this notification. Your relational database with the correct index will send to your the results as faster as you expect. You'll only think about how much notifications you'll have been.
i have an API with express one route make a few time to get all data required (search through long JSON object)
router.get(
"/:server/:maxCraftPrice/:minBenef/:from/:to",
checkJwt,
async (req, res) => {
const getAllAstuces = new Promise(async (resolve, reject) => {
const { EQUIPMENTS_DIR, RESOURCES_DIR } = paths[req.params.server];
const astuces = [];
const { from, to, maxCraftPrice, minBenef } = req.params;
const filteredEquipments = getItemByLevel(from, to);
for (const equipment in filteredEquipments) {
// parsing and push to astuces array
}
resolve(astuces);
});
const resource = await getAllAstuces;
return res.json(resource);
}
);
Now in my website when someone go to the page associated with this route, while the data is loading EVERY other request is just locked like in a queue
I tried to add Promise to handle this but no change
Is there a way to handle requests simultanously or maybe should i refactor that route to make it faster ?
If your request takes a long time to process, it will block all other requests until it is done. If you can make the request take less processing time, that's a good place to start, but you're probably going to need to take further steps to make multiple requests faster.
There are various methods for getting around this situation. This article describes a few approaches.
I have a sample app, user can access some dynamic data via different URL.
The workflow is like this:
when user request get_data?id=1234567
first it checks the DB if there is data for it
if not, generate a random value
then if other users request the same url within a short time (say 10 min), it will return the value that already generated
if one of the users send a clear request, the value will be cleared from DB.
The bug is: if 2 users request the same url at the same time, since it needs time to query the DB, it would do 1 and 2 at the same time, then create different values for each user.
How to make sure that in a short period, it always generate same value for all users?
Although NodeJS is single threaded and does not have the problem of synchronization between multiple threads, its asynchronous event model still can require you to implement some kind of locking mechanism to synchronize the concurrent async operations in certain situations (like in your case).
There are a number of libraries that provide this functionality, e.g. async-mutex. Here's a very basic example of what your code could look like:
const express = require('express');
const app = express();
const Mutex = require('async-mutex').Mutex;
const locks = new Map();
app.get('/get_data', async (req, res) => {
const queryId = req.query.id;
if (!queryId) {
// handle empty queryid ...
}
if (!locks.has(queryId)) {
locks.set(queryId, new Mutex());
}
const lockRelease = await locks
.get(queryId)
.acquire();
try {
// do the rest of your logic here
} catch (error) {
// handle error
} finally {
// always release the lock
lockRelease();
}
});
app.listen(4000, function () {
console.log("Server is running at port 4000");
});
I'm new to Node/Express. I have a long-running series of processes, for example: post to Express endpoint -> save data (can return now) -> handle data -> handle data -> handle data -> another process -> etc.
A typical POST:
app.post("/foo", (req, res) => {
// save data and return
return res.send("200");
// but now I want to do a lot more stuff...
});
If I omit the return then more processing will occur, but even though I' a newbie to this stack, I can tell that's a bad idea.
All I want is to receive some data, save it and return. Then I want to start processing it, and call into other processes, which call into other processes, etc. I don't want the original POST to wait for all this to complete.
I need to do this in-process, so I can't save to a queue and process it separately afterwards.
Basically I want to DECOUPLE the receipt and processing of the data, in process.
What options are available using Node/Express?
I'd try something like this:
const express = require("express");
const port = 3000;
const app = express();
const uuid = require('uuid');
app.post("/foo", (req, res) => {
const requestId = uuid.v4();
// Send result. Set status to 202: The request has been accepted for processing, but the processing has not been completed. See https://tools.ietf.org/html/rfc7231#section-6.3.3.
res.status(202).json({ status: "Processing data..", requestId: requestId });
// Process request.
processRequest(requestId, request);
});
app.get("/fooStatus", (req, res) => {
// Check the status of the request.
let requestId = req.body.requestId;
});
function processRequest(requestId, request) {
/* Process request here, then perhaps save result to db. */
}
app.listen(port);
console.log(`Serving at http://localhost:${port}`);
Calling this with curl (for example):
curl -v -X POST http://localhost:3000/foo
Would give a response like:
{"status":"Processing data..","requestId":"abbf6a8e-675f-44c1-8cdd-82c500cbbb5e"}
There is absolutely nothing wrong with your approach of removing return here and ending the request.....so long as you don't have any other code that tries to send any data back later on.
I'd recommend returning status code 202 Accepted for these long running scenarios though, this indicates to the consumer that the server has accepted the request but it's not finished.
I've got a node.js script that pulls data from an external web API for local storage. The first request is a query that returns a list of IDs that I need to get further information on. For each ID returned, I spawn a new http request from node.js and reach out to the server for the data (POST request). Once the job is complete, I sleep for 3 minutes, and repeat. Sometimes the number of IDs is in the hundreds. Each individual http request for those returns maybe 1kb of data, usually less, so the round trip is very short.
I got an email this morning from the API provider begging me to shut off my process because I'm "occupying all of the API servers with hundreds of connections" (which I am actually pretty proud of, but that is not the point). To be nice, I increased the sleep from 3 minutes to 30 minutes, and that has so far helped them.
On to the question... now I've not set maxSockets or anything, so I believe the default is 5. Shouldn't that mean I can only create 5 live http request connections at a time? How does the admin have hundreds? Is their server not hanging up the connection once the data is delivered? Am I not doing so? I don't have an explicit disconnect at the end of my http request, so perhaps I am at fault here. So what does maxSockets actually set?
Sorry for some reason I didn't read your question correctly
maxSockets is the max number of connections the http module will make for that current process. You can check to see what yours is currently set at by accessing it from http.globalAgent.maxSockets.
You can see some information on the current number of connections you have to a given host with the following:
console.log("Active socket connections: %d", http.globalAgent.sockets['localhost:8080'].length )
console.log("Total queued requests: %d", http.globalAgent.requests['localhost:8080'].length)
Substituting the localhost:8080 for what ever host and port you are making the request too.
You can see how node handles these connections at the following two points:
Adding a new connection and storing to the request queue
https://github.com/joyent/node/blob/master/lib/_http_agent.js#L83
Creating connections from queued requests
https://github.com/joyent/node/blob/master/lib/_http_agent.js#L148
I wrote this up really quick to give you an idea how you could stagger those requests out a bit. This particular code doesn't check to see how many requests are "pending" you could easily modify it to allow you to only have a set number of requests going out at any given time (which honestly would be the better way to do it).
var Stagger = function (data, stagger, fn, cb) {
var self = this;
this.timerID = 0;
this.data = [].concat(data);
this.fn = fn;
this.cb = cb;
this.stagger = stagger;
this.iteration = 0;
this.store = {};
this.start = function () {
(function __stagger() {
self.fn(self.iteration, self.data[self.iteration], self.store);
self.iteration++;
if (self.iteration != self.data.length)
self.timerID = setTimeout(__stagger, self.stagger);
else
cb(self.store);
})();
};
this.stop = function () {
clearTimeout(self.timerID);
};
};
var t = new Stagger([1,2,3,4,5,6], 1000, function (i, item, store) {
console.log(i, item);
if (!store.out) store.out = [];
store.out[i] = Math.pow(2,i);
},
function (store) {
console.log('Done!', store);
});
t.start();
This code can definitely could be improved but it should give you an idea of maybe where to start.
Live Demo: http://jsbin.com/ewoyik/1/edit (note: requires console)