Inserting data into mongodb using time comparison - node.js

Suppose I have this code running on 3 different servers and every server is using a single database.
setInterval(function(){
if(userArray) {
var report = mongoose.connection.db.collection('report');
report.insert({datenow:new Date(),userlist:userArray},function(err,doc) {
if(err) throw err;
});
}
},600000);
So, this piece of code is running every 10 minutes on every server but I want only one of them to insert the data into the database. Since the data is same it is getting inserted 3 times.
How do I check if the data is already inserted into the database by any one of the servers.
I tried making an incrementing count variable and insert it into the database and use it as a unique ID to check if it exists in the database. If it exists then I won't insert the data. But what if I have to restart the server for some reason, then the count will be reset to its initial value and this doesn't seem a viable solution.
So, how do I approach this problem? I am guessing I have to compare time somehow?

IMO, you should use a Cron expression instead of interval and use the execution time as primary key of your report when you perform the insertion in the database.
Explanation
Cron expression can garantee that the execution of your script will occur at an accurate time. If you use this Cron expression : 00 */10 * * * * (every 10 minutes), your script will execute at exactly 11:00:00, 11:10:00, 11:20:00, so on.. for every server you have.
So you can use this execution time as key for your reports and it will prevent multiple insertion of the same report.
Libs
You can use this lib to use Cron with Node.js : node-cron
Example
var CronJob = require('cron').CronJob;
new CronJob('* * * * * *', function() {
console.log('You will see this message every second');
}, null, true, 'America/Los_Angeles');
I hope this will help you.

So I had to end up comparing timestamps and checking it anything is inserted in the last 10 minutes. This is the solution I came up with.
setInterval(function(){
var currDate = new Date().getTime();
if(keyPairNameArray) {
var overallreport = mongoose.connection.db.collection('overallreport');
overallreport.find({}).sort({_id:-1}).limit(1).toArray(function(err, res){
if(res.length > 0){
var dbDate = new Date(res[0].datenow).getTime();
var diffDate = currDate - dbDate;
if(diffDate < 600000){
} else {
overallreport.insert({datenow:new Date(),userlist:keyPairNameArray},function(err,doc) {
if(err) throw err;
});
}
} else {
overallreport.insert({datenow:new Date(),userlist:keyPairNameArray},function(err,doc) {
if(err) throw err;
});
}
});
}
},610000);

Related

Node-Cron: how to ensure last job is complete

I am using node-cron to send operational emails (currently searching for new emails that need to be sent every minute).
My function (operationalEmails) looks for mongodb records that have a sent flag = false, sends the email, then changes the sent flag = true.
If an iteration of the cron has a large payload of records to send, it could take more than a minute.
How do I ensure the last iteration of the cron is complete before starting a new one?
//Run every Min
cron.schedule("* * * * *", () => {
operationalEmails();
});
you would need to create a simple lock
const running = false;
function operationalEmails() {
if (running) {
return
}
running = true;
// do stuff
running = false;
}
//Run every Min
cron.schedule("* * * * *", () => {
operationalEmails();
});

Scheduled Recurring Script - Setting Script to Run Continuously Until Saved Search Results are Null

I have built a script that looks at fields within a saved search to process, when the script processes each line of the saved search . The script has to process a lot of lines of data and I'm running into issue where I get an SSS Usage Limit exceeded message just due to the nature of the amount of information that I'm processing. Therefore I'm wondering if on each run of the script if I can limit the amount of orders processed and then consecutively run the script until there aren't any more records that need to be processed
Previously I have just restricted the number of records that the script processes at a time and then just manually trigger the script until there aren't anymore lines left to process. I see that you can trigger the script to run every 15 minutes or so but would like it to run each day and then run in a 15 minute cadence after that until the saved search has been exhausted. Then the script would be triggered to run again the following day etc for every 15 min until all records are processed. From research don't know if this type of scheduling is possible.
code itself is working the scheduling of it is what I need guidance on
The other alternative (and one that I've used successfully) is to exit the scheduled script whent the usage reaches a certain point, but before you exit, trigger the scheduled script again. I've used this successfully to process thousands of records (such as mass deletions).
/**
* #NApiVersion 2.x
* #NScriptType ScheduledScript
* #NModuleScope SameAccount
*/
define(['N/record', 'N/runtime', 'N/search', 'N/task'],
/**
* #param {record} record
* #param {search} search
*/
function(record, runtime, search, task) {
const governanceCap = 9950;
function getAllResults(s) {
var results = s.run();
var searchResults = [];
var searchid = 0;
do {
var resultslice = results.getRange({start:searchid,end:searchid+1000});
resultslice.forEach(function(slice) {
searchResults.push(slice);
searchid++;
}
);
} while (resultslice.length >=1000);
return searchResults;
}
/**
* Definition of the Scheduled script trigger point.
*
* #param {Object} scriptContext
* #param {string} scriptContext.type - The context in which the script is executed. It is one of the values from the scriptContext.InvocationType enum.
* #Since 2015.2
*/
function execute(scriptContext) {
function rescheduleCurrentScript() {
var scheduledScriptTask = task.create({
taskType: task.TaskType.SCHEDULED_SCRIPT
});
scheduledScriptTask.scriptId = runtime.getCurrentScript().id;
scheduledScriptTask.deploymentId = runtime.getCurrentScript().deploymentId;
return scheduledScriptTask.submit();
}
try {
var script = runtime.getCurrentScript();
// GET YOUR SEARCH HERE
var mySearch = getAllResults(
search.create({
type: "transaction",
filters:
[
["mainline","is","T"],
],
columns:
[
"name",
"tranid",
"type",
search.createColumn({
name:"datecreated",
sort: search.Sort.DESC
}),
]
})
);
var recCount = mySearch.length;
for (each in mySearch) {
try {
record.delete({
type: transSearch[each].getValue({name:'type'}),
id: transSearch[each].id
});
} catch (err) {log.error(err.name,err.message) }
var govPointsUsed = 10000-script.getRemainingUsage();
script.percentComplete = (govPointsUsed/governanceCap*100).toFixed(1);
if (govPointsUsed >= governanceCap) {
var taskId = rescheduleCurrentScript();
log.audit('Rescheduling status: ','Task ID:' + taskId);
return;
}
}
} catch (err) { log.error(err.name,err.message + '; Stack: '+err.stack ) };
}
return {
execute: execute
};
});
Worked like a charm!!
I would encourage you to use a Map/Reduce script. That is the correct script type to use when dealing with a lot of data and can handle governance issues much better.
I had this problem some time ago. The map/reduce script can be the solution if you run 2.0 scripts. In my case, I had 'legacy' scripts in versions 1.X and 2.X. And all of them throw from time to time 'SSS_TIME_LIMIT_EXCEEDED'.
My solution :)
I created a script, kind of 'listener'. I removed link to the script because the rep. is not active anymore
// *add the script listener
// [it can be used for both script versions 1.X and 2.X]
/** you have to calculate the 'process usage' by 1 loop cycle.
use script.getRemainingUsage() */
var recallValue = 200;
var stopValue = 200;
var recallIsRequired = true;// use false to stop the script without recall
var script = setScriptListener(recallValue, stopValue, recallIsRequired);
for (var data in data_contaniner){
if(script.canContinue()){
//run you processing
// !important : mark the data as 'processed'.
//otherwise you will have a lot of duplicates after script recall
}else{
//stop the loop
break;
}
}

Unable to do a cron job using cron package with specified time

I am trying to do a cron job but it only works if the time specified is every second or every minute. However, If I try to specify a time (day, month , hour and minute) I am unable to get a response.
CronService.js file :
var CronJob = require('cron').CronJob;
module.exports = {
startJob : function(time) {
new CronJob(time, function() {
console.log('test');
}, null, true,'Asia/Singapore');
}
}
some controller.js file :
startCronService : function(req, res, next) {
var time = '12 15 29 8 *';
CronService.startJob(time);
},
I tried changing the timezones to 'America/New_York' and adjust my time as well. but the cron job will never respond.
Am I doing something wrong with the time syntax?
After trying multiple things, I found out that you must provide the name of the month rather than the number (Jan,Feb) etc. Although, the npm documentation mentioned that we are able to use both, but that is false unfortunately.

Don't get tweet with stream API in Node

I'm working with Twit to get a nice wrapper around the Twitter API. I have a cron to get all the tweet on a particular hashtag. It increment the counter everytime there's a new tweet, and at the end of the period, save it in to the database (MongoDB). Only problem is, it always return me 0.
Here is the code
new cronJob('00 */5 * * * *', function(){ // start parsing 5mn after call, and every 5mn then
var stream = T.stream('statuses/filter', { track: 'hashtag' })
var counter = 0;
var date = new Date();
var collection = client.collection("TweetsNumber");
stream.on('tweet', function (tweet) {
console.log(tweet);
counter += 1;
})
collection.insert({Date: date, CrawledTweets: counter, Channel: "someChannel"});
console.log(counter + " tweets saved in DB");
}, null, true, "Europe/Paris");
According to the doc, the "stream.on" method is called everytime there's a new tweets. I use some trending topic to be sure to have data, but it's like it is never called, and I really don't know why.
Hope you can help. Have a great day !
EDIT: T is already create in another part of the program, and with other functionnalities, it is working. Same for client, which is my db.
EDIT: Thanks to Shodan, it works now, see the github issue. Thanks a lot !
Are the tweets logged to your console?
If yes, then it is not a twit problem, since it exactly does what you told it to.
As I read your code correctly, you create a cronjob, which fires once every 5 minutes.
It connects a new local stream, which should output to the console and increase the counter for the next 5 minutes.
It insert into the global client.collection("TweetNumbers"), with the local variable counter having a value of 0
it console.log(counter + " tweets saved in DB");, with the local variable counter having a value of 0
The function then exits, and is fresh started in 5 minutes.
stream.on should continue to fire when ever a tweet comes along for the next 5 minutes and increase the counter, BUT the counter is never used again by collection.insert and the second console.log.
This is because you restart the function creating new local variables for all the stuff and logging the initial values again.
You set var counter = 0 and then immediately console.log() it, which means the 'tweet' event never gets a chance to fire and increase counter. You might want to do this:
new cronJob('00 */5 * * * *', function(){ // start parsing 5mn after call, and every 5mn then
var stream = T.stream('statuses/filter', { track: 'hashtag' })
var counter = 0;
var date = new Date();
var collection = client.collection("TweetsNumber");
stream.on('tweet', function (tweet) {
console.log(tweet);
collection.insert({Date: date, CrawledTweets: counter, Channel: "someChannel"});
counter += 1;
console.log(counter + " tweets saved in DB");
})
}, null, true, "Europe/Paris");

Node.js Cron Job Messing with Date Object

I'm trying to schedule several cron jobs to generate serial numbers for different entities within my web app. However I am running into this problem, when I'm looping each table, it says it has something to do with date.js. I'm not doing anything with a date object ? Not at this stage anyway. A couple of guesses is that the cron object is doing a date thing in its code and is referencing date.js. I'm using date.js to get access to things like ISO date.
for (t in config.generatorTables) {
console.log("t = " + config.generatorTables[t] + "\n\n");
var ts3 = azure.createTableService();
var jobSerialNumbers = new cronJob({
//cronTime: '*/' + rndNumber + ' * * * * *',
cronTime: '*/1 * * * * *',
onTick: function () {
//console.log(new Date() + " calling topUpSerialNumbers \n\n");
manageSerialNumbers.topUpSerialNumbers(config.generatorTables[t], function () { });
},
start: false,
timeZone: "America/Los_Angeles"
});
ts3.createTableIfNotExists(config.generatorTables[t], function (error) {
if (error === null) {
var query = azure.TableQuery
.select()
.from(config.generatorTables[t])
.where('PartitionKey eq ?', '0')
ts3.queryEntities(query, function (error, serialNumberEntities) {
if (error === null && serialNumberEntities.length == 0) {
manageSerialNumbers.generateNewNumbers(config.maxNumber, config.serialNumberSize, config.generatorTables[t], function () {
jobSerialNumbers.start();
});
}
else jobSerialNumbers.start();
});
}
});
}
And this is the error message I'm getting when I examine the server.js.logs\0.txt file:
C:\node\w\WebRole1\public\javascripts\date.js:56
onsole.log('isDST'); return this.toString().match(/(E|C|M|P)(S|D)T/)[2] == "D"
^
TypeError: Cannot read property '2' of null
at Date.isDST (C:\node\w\WebRole1\public\javascripts\date.js:56:110)
at Date.getTimezone (C:\node\w\WebRole1\public\javascripts\date.js:56:228)
at Object._getNextDateFrom (C:\node\w\WebRole1\node_modules\cron\lib\cron.js:88:30)
at Object.sendAt (C:\node\w\WebRole1\node_modules\cron\lib\cron.js:51:17)
at Object.getTimeout (C:\node\w\WebRole1\node_modules\cron\lib\cron.js:58:30)
at Object.start (C:\node\w\WebRole1\node_modules\cron\lib\cron.js:279:33)
at C:\node\w\WebRole1\server.js:169:46
at Object.generateNewNumbers (C:\node\w\WebRole1\utils\manageSerialNumbers.js:106:5)
at C:\node\w\WebRole1\server.js:168:45
at C:\node\w\WebRole1\node_modules\azure\lib\services\table\tableservice.js:485:7
I am using this line in my database.js file:
require('../public/javascripts/date');
is that correct that I only have to do this once, because date.js is global? I.e. it has a bunch of prototypes (extensions) for the inbuilt date object.
Within manageSerialNumbers.js I am just doing a callback, their is no code executing as I've commented it all out, but still receiving this error.
Any help or advice would be appreciated.
Ok I've commented out the date.js module and now I'm getting this error:
You specified a Timezone but have not included the time module. Timezone functionality is disabled. Please install the time module to use Timezones in your application.
When I examine the cron.js module it has this statement at the top:
var CronDate = Date;
try {
CronDate = require("time").Date;
} catch(e) {
//no time module...leave CronDate alone. :)
}
So this would conclude then that it does have something to do with the date.js module ?
Anyone see whats wrong.
cheers
I was using date.js for a while, and then realized that the Date object in Node.js/V8 already had ISO support, input and output, and Date.js only added a very specific edge case. What exactly do you need date.js for?
If you do need date.js...
is that correct that I only have to do this once, because date.js is global?
That's true as long as the code is running in the same process. I'm not familiar Node.js on Azure, or the library providing the cronJob method you're using, but perhaps it spawns a new process, which has its own V8 instance, and then you lose the Date object changes? (So require it again in that code.)

Resources