NodeJS Running multiple fetch requests inside multiple Node-Schedule jobs

NodeJS Running multiple fetch requests inside multiple Node-Schedule jobs - node.js

My goal is to be able to run different functions in each of the multiple node schedule jobs, which will run at specific times of the day, and will call a specific function depending on the time of the day.
I have the following code, for a minimum example I have made it simple.
var f1 = function () {
return fetch('url1.php', {
method: 'post',
headers: {
'Content-Type': 'application/json'
}
})
}
var f2 = function () {
return fetch('url2.php', {
method: 'post',
headers: {
'Content-Type': 'application/json'
}
})
}
var tasks = [f1, f2];
const promesas = async(tasks) => {
return await Promise.all(tasks.map(function (e, j) {
rules[j] = new schedule.RecurrenceRule();
rules[j].dayOfWeek = [new schedule.Range(1, 5)];
rules[j].hour = hours[j];
rules[j].minute = mins[j];
return schedule.scheduleJob(rules[j], async function () {
try {
await tasks[j]()
} catch (error) {
console.log(error);
}
});
})).then(response => {
return Promise.resolve(response);
});
}
promesas(tasks);
I can run these functions just fine outside the jobs, or all within a single job. But not in the setup that I want which I describe at the beggining.
The first iteration works fine, the second just fails. The difference between each job is of one minute for testing purposes.

I am not sure what you're doing here, especially with your odd use of Promises. (Why await the Promise.all()? Why return Promise.resolve() of the already resolved response? Why await each individual task rather than just let it execute (or fail) asynchronously in what amounts to a cronjob script with no output?)
However, if this is working for you for the first iteration, that's fine.
Can I assume you're using node-schedule? If so, just note that you're actually using times of day 00:00, 01:01, etc. rather than "one minute between each for testing purposes" as you claim.
Here's how I would do what you seem to be trying to do:
const tasks = [f1, f2];
let i = 0;
for (let task of tasks) {
let rule = new schedule.RecurrenceRule();
rule.dayOfWeek = [new schedule.Range(1, 5)];
rule.minute = (new Date().getMinutes() + (++i)) % 60; // adjust to suit your needs
schedule.scheduleJob(rule, task);
}
I have confirmed this works, e.g. issues one task per minute starting from the minute after the script is started. No await, then, or Promise.all is necessary for the simple example you have given, but may be required depending on what you plan to do with the results of each job function.

Related

await for Lock() on stateless action

Problem:
front-end page make x parallel requests (let's call it first group),
the next group (x request) will be after 5 seconds, the first request (of the first group) set the cache from DB.
the other x-1 requests got empty array insted of wait to first request to done his job.
the second group and the all next requests got proper data from cache.
What is the best practics to lock other threads until the first done (or fail) in stateless mechanism?
EDIT:
The cache module allow use trigger of set chache but it's not work since it stateless mechanism.
const GetDataFromDB= async (req, res, next) => {
var cachedTableName = undefined;
// "lockFlag" uses to prevent parallel request to get into critical section (because its take time to set cache from db)
// to prevent that we uses "lockFlag" that is short-initiation to cache.
//
if ( !myCache.has( "lockFlag" ) && !myCache.has( "dbtable" ) ){
// here arrive first req from first group only
// the other x-1 of first group went to the nest condition
// here i would build mechanism to wait 'till first req come back from DB (init cache)
myCache.set( "lockFlag", "1" )
const connection1 = await odbc.connect(connectionConfig);
const cachedTableName = await connection1.query(`select * from ${tableName}`);
if(cachedTableName.length){
const success = myCache.set([
{key: "dbtable", val: cachedTableName, ttl: 180},
])
if(success)
{
cachedTableName = myCache.get( "dbtable" );
}
}
myCache.take("lockFlag");
connection1.close();
return res.status(200).json(cachedTableName ); // uses for first response.
}
// here comes x-1 of first group went to the nest condition and got nothing, bacause the cache not set yet
//
if ( myCache.has( "dbtable" ) ){
cachedTableName = myCache.get( "dbtable" );
}
return res.status(200).json(cachedTableName );
}

You can try the approach given here, with minor modifications to apply it for your case.
For brevity, I removed comments and shortened variables names.
Code, then explanation:
const EventEmitter = require('events');
const bus = new EventEmitter();
const getDataFromDB = async (req, res, next) => {
var table = undefined;
if (myCache.has("lockFlag")) {
await new Promise(resolve => bus.once("unlocked", resolve));
}
if (myCache.has("dbtable")) {
table = myCache.get("dbtable");
}
else {
myCache.set("lockFlag", "1");
const connection = await odbc.connect(connectionConfig);
table = await connection.query(`select * from ${tableName}`);
connection.close();
if (table.length) {
const success = myCache.set([
{ key: "dbtable", val: table, ttl: 180 },
]);
}
myCache.take("lockFlag");
bus.emit("unlocked");
}
return res.status(200).json(table);
}
This is how it should work:
At first, lockFlag is not present.
Then, some code calls getDataFromDB. That code evaluates the first if block to false, so it continues: it sets lockFlag to true ("1"), then goes on to retrieve the table data from db. In the meantime:
Some other code calls getDataFromDB. That code, however, evaluates the first if block to true, so it awaits on the promise, until an unlocked event will be emitted.
Back to the first calling code: It finishes its logic, caches the table data, sets lockFlag back to false, emits an unlocked event, and returns.
The other code can now continue its execution: it evaluates the second if to true, so it takes the table from the cache, and returns.

As workaround i add "finally" scope to remove lock-key from cache after first initiation, and this:
while(myCache.has( "lockFlag" )){
await wait(1500);
}
And the "wait" function:
function wait(milleseconds) {
return new Promise(resolve => setTimeout(resolve, milleseconds))
}
(source)
This is working, but still could be time (<1500 ms) that there is cache and the thread not aware.
I'ld happy for batter solution.

Nodejs - Fire multiple API calls while limiting the rate and wait until they are all done

My issues
Launch 1000+ online API that limits the number of API calls to 10 calls/sec.
Wait for all the API calls to give back a result (or retry), it can take 5 sec before the API sends it data
Use the combined data in the rest of my app
What I have tried while looking at a lot of different questions and answers here on the site
Use promise to wait for one API request
const https = require("https");
function myRequest(param) {
const options = {
host: "api.xxx.io",
port: 443,
path: "/custom/path/"+param,
method: "GET"
}
return new Promise(function(resolve, reject) {
https.request(options, function(result) {
let str = "";
result.on('data', function(chunk) {str += chunk;});
result.on('end', function() {resolve(JSON.parse(str));});
result.on('error', function(err) {console.log("Error: ", err);});
}).end();
});
};
Use Promise.all to do all the requests and wait for them to finish
const params = [{item: "param0"}, ... , {item: "param1000+"}]; // imagine 1000+ items
const promises = [];
base.map(function(params){
promises.push(myRequest(params.item));
});
result = Promise.all(promises).then(function(data) {
// doing some funky stuff with dat
});
So far so good, sort of
It works when I limit the number of API requests to a maximum of 10 because then the rate limiter kicks in. When I console.log(promises), it gives back an array of 'request'.
I have tried to add setTimeout in different places, like:
...
base.map(function(params){
promises.push(setTimeout(function() {
myRequest(params.item);
}, 100));
});
...
But that does not seem to work. When I console.log(promises), it gives back an array of 'function'
My questions
Now I am stuck ... any ideas?
How do I build in retries when the API gives an error
Thank you for reading up to hear, you are already a hero in my book!

When you have a complicated control-flow using async/await helps a lot to clarify the logic of the flow.
Let's start with the following simple algorithm to limit everything to 10 requests per second:
make 10 requests
wait 1 second
repeat until no more requests
For this the following simple implementation will work:
async function rateLimitedRequests (params) {
let results = [];
while (params.length > 0) {
let batch = [];
for (i=0; i<10; i++) {
let thisParam = params.pop();
if (thisParam) { // use shift instead
batch.push(myRequest(thisParam.item)); // of pop if you want
} // to process in the
// original order.
}
results = results.concat(await Promise.all(batch));
await delayOneSecond();
}
return results;
}
Now we just need to implement the one second delay. We can simply promisify setTimeout for this:
function delayOneSecond() {
return new Promise(ok => setTimeout(ok, 1000));
}
This will definitely give you a rate limiter of just 10 requests each second. In fact it performs somewhat slower than that because each batch will execute in request time + one second. This is perfectly fine and already meet your original intent but we can improve this to squeeze a few more requests to get as close as possible to exactly 10 requests per second.
We can try the following algorithm:
remember the start time
make 10 requests
compare end time with start time
delay one second minus request time
repeat until no more requests
Again, we can use almost exactly the same logic as the simple code above but just tweak it to do time calculations:
const ONE_SECOND = 1000;
async function rateLimitedRequests (params) {
let results = [];
while (params.length > 0) {
let batch = [];
let startTime = Date.now();
for (i=0; i<10; i++) {
let thisParam = params.pop();
if (thisParam) {
batch.push(myRequest(thisParam.item));
}
}
results = results.concat(await Promise.all(batch));
let endTime = Date.now();
let requestTime = endTime - startTime;
let delayTime = ONE_SECOND - requestTime;
if (delayTime > 0) {
await delay(delayTime);
}
}
return results;
}
Now instead of hardcoding the one second delay function we can write one that accept a delay period:
function delay(milliseconds) {
return new Promise(ok => setTimeout(ok, milliseconds));
}
We have here a simple, easy to understand function that will rate limit as close as possible to 10 requests per second. It is rather bursty in that it makes 10 parallel requests at the beginning of each one second period but it works. We can of course keep implementing more complicated algorithms to smooth out the request pattern etc. but I leave that to your creativity and as homework for the reader.

Grouping redis.get for 2ms and then executing by mget

My application makes about 50 redis.get call to serve a single http request, it serves millions of request daily and application runs on about 30 pods.
When monitoring on newrelic i am getting 200MS average redis.get time, To Optimize this i wrote a simple pipeline system in nodejs which is simply a wrapper over redis.get and it pushes all the request in queue, and then execute the queue using redis.mget (getting all the keys in bulk).
Following is the code snippet:
class RedisBulk {
constructor() {
this.queue = [];
this.processingQueue = {};
this.intervalId = setInterval(() => {
this._processQueue();
}, 5);
}
clear() {
clearInterval(this.intervalId);
}
get(key, cb) {
this.queue.push({cb, key});
}
_processQueue() {
if (this.queue.length > 0) {
let queueLength = this.queue.length;
logger.debug('Processing Queue of length', queueLength);
let time = (new Date).getTime();
this.processingQueue[time] = this.queue;
this.queue = []; //empty the queue
let keys = [];
this.processingQueue[time].forEach((item)=> {
keys.push(item.key);
});
global.redisClient.mget(keys, (err, replies)=> {
if (err) {
captureException(err);
console.error(err);
} else {
this.processingQueue[time].forEach((item, index)=> {
item.cb(err, replies[index]);
});
}
delete this.processingQueue[time];
});
}
}
}
let redis_bulk = new RedisBulk();
redis_bulk.get('a');
redis_bulk.get('b');
redis_bulk.get('c');
redis_bulk.get('d');
My Question is: is this a good approach? will it help in optimizing redis get time? is there any other solution for above problem?
Thanks

I'm not a redis expert but judging by the documentation ;
MGET has the time complexity of
O(N) where N is the number of keys to retrieve.
And GET has the time complexity of
O(1)
Which brings both scenarios to the same end result in terms of time complexity in your scenario. Having a bulk request with MGET can bring you some improvements for the IO but apart from that looks like you have the same bottleneck.
I'd ideally split my data into chunks, responding via multiple http requests in async fashion if that's an option.
Alternatively, you can try calling GET with promise.all() to run GET requests in parallel, for all the GET calls you need.
Something like;
const asyncRedis = require("async-redis");
const client = asyncRedis.createClient();
function bulk() {
const keys = [];
return Promise.all(keys.map(client.get))
}

How to run asynchronous tasks synchronous?

I'm developing an app with the following node.js stack: Express/Socket.IO + React. In React I have DataTables, wherein you can search and with every keystroke the data gets dynamically updated! :)
I use Socket.IO for data-fetching, so on every keystroke the client socket emits some parameters and the server calls then the callback to return data. This works like a charm, but it is not garanteed that the returned data comes back in the same order as the client sent it.
To simulate: So when I type in 'a', the server responds with this same 'a' and so for every character.
I found the async module for node.js and tried to use the queue to return tasks in the same order it received it. For simplicity I delayed the second incoming task with setTimeout to simulate a slow performing database-query:
Declaration:
const async = require('async');
var queue = async.queue(function(task, callback) {
if(task.count == 1) {
setTimeout(function() {
callback();
}, 3000);
} else {
callback();
}
}, 10);
Usage:
socket.on('result', function(data, fn) {
var filter = data.filter;
if(filter.length === 1) { // TEST SYNCHRONOUSLY
queue.push({name: filter, count: 1}, function(err) {
fn(filter);
// console.log('finished processing slow');
});
} else {
// add some items to the queue
queue.push({name: filter, count: filter.length}, function(err) {
fn(data.filter);
// console.log('finished processing fast');
});
}
});
But the way I receive it in the client console, when I search for abc is as follows:
ab -> abc -> a(after 3 sec)
I want it to return it like this: a(after 3sec) -> ab -> abc
My thought is that the queue runs the setTimeout and then goes further and eventually the setTimeout gets fired somewhere on the event loop later on. This resulting in returning later search filters earlier then the slow performing one.
How can i solve this problem?

First a few comments, which might help clear up your understanding of async calls:
Using "timeout" to try and align async calls is a bad idea, that is not the idea about async calls. You will never know how long an async call will take, so you can never set the appropriate timeout.
I believe you are misunderstanding the usage of queue from async library you described. The documentation for the queue can be found here.
Copy pasting the documentation in here, in-case things are changed or down:
Creates a queue object with the specified concurrency. Tasks added to the queue are processed in parallel (up to the concurrency limit). If all workers are in progress, the task is queued until one becomes available. Once a worker completes a task, that task's callback is called.
The above means that the queue can simply be used to priorities the async task a given worker can perform. The different async tasks can still be finished at different times.
Potential solutions
There are a few solutions to your problem, depending on your requirements.
You can only send one async call at a time and wait for the first one to finish before sending the next one
You store the results and only display the results to the user when all calls have finished
You disregard all calls except for the latest async call
In your case I would pick solution 3 as your are searching for something. Why would you use care about the results for "a" if they are already searching for "abc" before they get the response for "a"?
This can be done by giving each request a timestamp and then sort based on the timestamp taking the latest.

SOLUTION:
Server:
exports = module.exports = function(io){
io.sockets.on('connection', function (socket) {
socket.on('result', function(data, fn) {
var filter = data.filter;
var counter = data.counter;
if(filter.length === 1 || filter.length === 5) { // TEST SYNCHRONOUSLY
setTimeout(function() {
fn({ filter: filter, counter: counter}); // return to client
}, 3000);
} else {
fn({ filter: filter, counter: counter}); // return to client
}
});
});
}
Client:
export class FilterableDataTable extends Component {
constructor(props) {
super();
this.state = {
endpoint: "http://localhost:3001",
filters: {},
counter: 0
};
this.onLazyLoad = this.onLazyLoad.bind(this);
}
onLazyLoad(event) {
var offset = event.first;
if(offset === null) {
offset = 0;
}
var filter = ''; // filter is the search character
if(event.filters.result2 != undefined) {
filter = event.filters.result2.value;
}
var returnedData = null;
this.state.counter++;
this.socket.emit('result', {
offset: offset,
limit: 20,
filter: filter,
counter: this.state.counter
}, function(data) {
returnedData = data;
console.log(returnedData);
if(returnedData.counter === this.state.counter) {
console.log('DATA: ' + JSON.stringify(returnedData));
}
}
This however does send unneeded data to the client, which in return ignores it. Somebody any idea's for further optimizing this kind of communication? For example a method to keep old data at the server and only send the latest?

How to wait in node.js for a variable available on the cloud to have a specific value

I'm sorry if this is a basic question, but I am trying to implement a program in node.js that should wait for the value of a variable available trough a request to a cloud api (photon.variable()) to be 1. This variable should not be requested more than once per second. My first attempt is included in the sample code below. Despite knowing it does not work at all, I think it could be useful to show the functionality I would like to implement.
var photondata = 0;
while (photondata < 1)
{
setTimeout(function () {
photon.variable("witok", function(err, data) {
if (!err) {
console.log("data: ", data.result);
photondata = data.result;
}
else console.log(err);
})}, 1000);
}

Since you couldn't do async stuff in loops before, the traditional approach would be to create a function that adds itself to setTimeout for as long as needed, then calls some other function when it's done. You still need to do this in the browser if not using Babel.
These days, you can stop execution and wait for things to happen when using a generator function (which latest versions of Node now support). There are many libraries that will let you do this and I will advertise ours :)
CL.run(function* () {
var photondata = 0;
while (true) {
yield CL.try(function* () {
var data = yield photon.variable("witok", CL.cb());
console.log("data: ", data.result);
photondata = data.result;
}, function* (err) {
console.log(err.message);
});
if (photondata >= 1) break;
yield CL.sleep(1000);
}
// do whatever you need here
});

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string