I have been working on this data gathering module, that is supposed to get data from different bitcoin markets and standardise all the received data so it can be inserted into a mongodb database for later use. (The module is written in node 4.3.x)
The problem I have is that I need consistency in how the data is represented in the database. So whenever a request times out, instead of catching the get request and log an error, I want to resolve a '0'.
Additionally the received data contains trades that need to be cut in order. This needs to happen so that the trades can be cut properly, so data is not written twice.
For that I have implemented two queues:
1: TimestampQueue - holds timestamps. The timestamp in [0] is the next expected response
2: objectQueue - holds received responses
=> as soon as the object in objectQueue[0] equals the timestamp in timestampQueue[0] => do data manipulation and insert into database.
The problem lies that the axios.get request that should catch a timeout doesn't do that consistently.
It happens after random timeframes, but on average the queue gets stuck after 2hrs.
To make things clearer here some important code snippets:
httpclient making the axios request:
get(url) {
return this.instance.get(url) //instance just defined a timeout. Nothing special
.then(response => {
return response.data;
})
.catch(error => {
throw error; //THIS SEEMINGLY DOESN'T GET EXECUTED IN THE DESCRIBED CASE
});
}
Now the marketHandler that resolves the request:
getMethodFromMarket(method, market, timestamp){
if(this.markets[market]){
if(this.markets[market].methods[method]) {
var url = this.markets[market].methods[method];
let result = {};
result[method] = {};
result[method][market] = {};
return this.client.get(url)
.then(data => {
result[method][market] = data;
log.debug("Successfully received " + method + " for " + market + " : " + timestamp);
return result;
})
.catch(err => {
result[method][market] = 0;
log.error(new Error("Failed to get " + method + " for " + market + ": " + timestamp));
log.error(err);
return result;
});
} else{
return Promise.reject(new Error(method + " not available for " + market));
}
} else {
return Promise.reject(new Error("Market not specified in config"));
}
}
The code that makes the requests for all defined markets (for one method) and joins them in one object:
//returns promise to get *method* from all markets specified in
//config.json
getAllMarkets(method, timestamp){
let getMarketsPromises = [];
let result = {};
result[method] = {};
Object.keys(this.markets).forEach(market => {
result[method][market] = {};
getMarketsPromises.push(this.getMethodFromMarket(method, market, timestamp));
});
return Promise.all(getMarketsPromises)
.then(results => {
for(let i = 0; i < results.length; i++){
let market = Object.keys(results[i][method])[0];
result[method][market] = results[i][method][market];
}
log.debug("Got all markets for " + method + " for " + timestamp);
return result;
})
}
The code that makes the requests for all methods and markets and joins them in the final object that gets manipulated from a different module and inserted into the database:
//returns promise to get trades and depths from markets specified in
//config.json
getEverything(timestamp){
let getMethodPromises = [];
let result = {timestamp};
this.methods.forEach(method => {
result[method] = {};
getMethodPromises.push(this.getAllMarkets(method, timestamp))
});
return Promise.all(getMethodPromises)
.then(results =>{
for(let i = 0; i < results.length; i++){
let method = Object.keys(results[i])[0];
result[method] = results[i][method];
}
log.debug("Got everything for " + timestamp);
return result;
})
}
I have tested the whole process without any data manipulation. Only those functions and inserting it into the database.
The implementation of the 2 queues:
//handles the incoming responses from markets and sorts
//them according to their timestamp
queueResponse(marketInfo){
this.marketInfoQueue.push(marketInfo);
this.marketInfoQueue.sort(function(a, b){
return a.timestamp - b.timestamp;
})
}
//returns queued Responses in order of timestamps.
getQueuedResponses(){
var i = 0;
var results = [];
log.debug("TimestampQueue: "+ this.timestampQueue[0] + " | objectQueue: " + this.marketInfoQueue[0].timestamp);
while(this.marketInfoQueue[i] && this.timestampQueue[i] == this.marketInfoQueue[i].timestamp){
results.push(this.marketInfoQueue.shift());
this.timestampQueue.shift();
i++;
}
return results;
}
//pushes new timestamp into timestampQueue to keep
//incoming responses in order
queueTimestamp(timestamp){
this.timestampQueue.push(timestamp);
}
I have been trying to fix this problem for more than 3 weeks now, and I am absolutely clueless.
TLDR: Axios get request does not resolve or reject. Even though a timeout of 5000ms is defined in the instance used in the httpClient module.
Related
I have to call multiple API using promise all in for loop but I am getting a response in unsequential format. e.g. I have 3 rows but getting a response in 1,2,3 sequence. First getting the first-row response than 3rd row and then getting 2nd row but I need to get a response in a sequential format like (1 row,2 rows,3 rows).
result = '{"error_code":0,"err_desc":null,"data":[{"":"","name":"OTT, KATHRYN M","address":"3110 Horseshoe Trl, Glenmoore, PA","email":"","phone1":"(410) 599-2212","phone2":"(610) 827-9107","phone3":"(610) 308-4566","phone4":"(610) 506-1121","phone5":"(610) 469-0737","phone6":"(610) 942-4347","phone7":"323-7898","phone8":"(814) 371-6133","phone9":""},{"":"","name":"BELTRANTE, SUSAN E","address":"3 Rhoads Ave, Moorestown, NJ\"","email":"SUSAN.BELTRANTE#AOL.COM, JOE.BARGER#YAHOO.COM,","phone1":"(856) 266-0381","phone2":"(856) 273-0869","phone3":"(609) 266-0381","phone4":"(856) 235-3933","phone5":"","phone6":"","phone7":"","phone8":"","phone9":""},{"":"","name":"Manish","address":"4895 E American Beauty Dr, Tucson, AZ 85756","email":"abc#gmail.com","phone1":"(857) 266-0381","phone2":"(857) 273-0869","phone3":"(610) 266-0381","phone4":"(857) 235-3933","phone5":"","phone6":"","phone7":"","phone8":"","phone9":""}]}';
var i;
for (i = 0; i < result.length; i++)
//for (i = 0; i <= 0; i++)
{
var phone = result[i].phone9;
var name = result[i].name;
var address = result[i].address;
var email = result[i].email;
var phone1 = result[i].phone1;
var phone2 = result[i].phone2;
var phone3 = result[i].phone3;
var phone4 = result[i].phone4;
var phone5 = result[i].phone5;
var phone6 = result[i].phone6;
var phone7 = result[i].phone7;
var phone8 = result[i].phone8;
var addressinfo = address.split(',');
var street = addressinfo[0];
var city = addressinfo[1];
var state = addressinfo[2];
var zip = addressinfo[3];
Promise.all([
fetch('https://backend.mioym.properties/api/247/eppraisal?street='+street+'&zip='+zip),
fetch('https://backend.mioym.properties/api/247/zillow?street='+street+'&zip='+zip),
fetch('https://backend.mioym.properties/api/247/pennymac?address='+address),
fetch('https://backend.mioym.properties/api/247/chase?address='+address),
fetch('https://backend.mioym.properties/api/247/realtor?address='+address)
]).then(function (responses) {
// Get a JSON object from each of the responses
return Promise.all(responses.map(function (response) {
console.log("here"+i);
//console.log(response.json());
console.log(response.url);
return response.json();
}));
}).then(function (data) {
console.log("success"+i);
// Log the data to the console
// You would do something with both sets of data here
console.log(data);
}).catch(function (error) {
console.log("error"+i);
// if there's an error, log it
console.log(error);
});
}
So please anyone suggest me solution.
The second Promise.all inside your then block is not necessary, as responses will already contain the resolved values. Note that Promise.all processes the requests in parallel but the resolved responses will be in order. So you can simply do:
Promise.all([
fetch('https://backend.mioym.properties/api/247/eppraisal?street=' + street + '&zip=' + zip),
fetch('https://backend.mioym.properties/api/247/zillow?street=' + street + '&zip=' + zip),
fetch('https://backend.mioym.properties/api/247/pennymac?address=' + address),
fetch('https://backend.mioym.properties/api/247/chase?address=' + address),
fetch('https://backend.mioym.properties/api/247/realtor?address=' + address)
]).then(function (responses) {
// Get a JSON object from each of the responses
return responses.map(function (response) {
console.log(response.url);
return response.json();
});
});
I have a big collection in MongoDB. Want to migrate all data by running some business logic nodejs scripts on that data to cassandra. What is the best way to do this ?
I have made a script in which i am getting 5000 documents in a single request from mongo and processing the data and inserting the documents into cassandra. It takes a lot of time after 40-50 iterations. CPU usage shows 100%. is this because of a lot of callbacks happening ? I am new to node js so not able to conclude anything.` var cassandra = require('../models/tracking_cassandra');
var TrackingEvents = require('../models/tracking_mongo_events');
var counter = 0;
var incr = 5000;
var final_counter = 0;
var start_point = function (callback){
TrackingEvents.count(function(err, data){
final_counter = data;
TrackingEvents.getEventsByCounter(counter, function(counter, obj) {
var prevId = obj[0].toObject()._id;
getMessagesFromMongo(prevId, callback);
});
});
};
function getMessagesFromMongo(prevId, callback){
counter = counter + incr;
TrackingEvents.getEventsByCounter(counter, function(counter, obj) {
var nextId = obj[0].toObject()._id;
var start_time = new Date();
TrackingEvents.getEventsBtwIds(prevId, nextId, function ( err, userEvents ) {
if(userEvents.length !== 0){
insert_into_cassandra( userEvents, callback );
}else{
console.log('empty data set');
}
});
if(counter >= final_counter){
callback();
}else{
getMessagesFromMongo(nextId, callback);
}
});
};
var insert_into_cassandra = function( events, callback ){
var inserts = 0;
total_documents = total_documents + events.length;
for(var i = 0 ; i< events.length ; i++){
var userEventData = events[i].toObject();
if(typeof userEventData.uid == 'undefined'){
total_nuid ++;
}else{
create_cassandra_query( userEventData );
}
}
};
var create_cassandra_query = function ( eventData ) {
delete eventData._id;
delete eventData[0];
delete eventData.appid;
delete eventData.appversion;
var query = "INSERT INTO userwise_events ";
var keys = "(";
var values = "(";
for(var key in eventData){
if(eventData[key] == null || typeof eventData[key] == 'undefined'){
delete eventData[key];
}
if (eventData.hasOwnProperty(key)) {
keys = keys + key + ', ';
values = values + ':' + key + ', ';
}
if(key != 'uid' && key!= 'date_time' && key != 'etypeId'){
eventData[key] = String(eventData[key]);
}
}
keys = keys.slice(0, -2);
values = values.slice(0, -2);
keys = keys + ")";
values = values + ")";
query = query + keys + " VALUES " + values;
cassandra.trackingCassandraClient.execute(query, eventData, { prepare: true }, function (err, data) {
if(err){
console.log(err);
}
});
};
var start_time = new Date();
start_point(function(res, err){
var end_time = new Date();
var diff = end_time.getTime() - start_time.getTime();
var seconds_diff = diff / 1000;
var totalSec = Math.abs(seconds_diff);
console.log('Total Execution Time : ' + totalSec);
});
process.on('uncaughtException', function (err) {
console.log('Caught exception: ' + err);
});`
is this because of a lot of callbacks happening ?
There may be no callbacks at all for all I know - it's impossible to tell you what's the problem with your code of which you didn't include even a single line of code.
For such a vague question I can only give you a general advice: make sure you don't have long running for or while loops. And don't ever use a blocking system call anywhere else than on the first tick of the event loop. If you don't know what is the first tick of the event loop then don't use blocking calls at all. Whenever you can, use streams for data - especially if you have lots of it.
A 100% CPU utilization is a bad sign and should never happen for I/O-heavy operation like the one that you are trying to perform. You should easily be able to handle insane amounts of data, especially when you use streams. Having your process max out the CPU for an inherently I/O-bound operation like moving large amounts of data through a network is a sure sign that you're doing something wrong in your code. What exactly is that? That will remain a mystery since you didn't show us even a single line of your code.
I have run into this problem before for a few HTTP transactions (like a hundred or so posts). Today I'm trying to do 7k HTTP requests. This seems silly but it's the only way to interact with the target system. The best I've been able to do will stall out at about 96% of the requests done. It will then just stop and never complete the last few requests.
Perhaps I'm using OiBackoff incorrectly. It seems to be working fine but that last 3% of the GETs won't ever finish. I've let it set for 5 minutes with no requests coming back when the highest retry interval in the log was 40 seconds.
I wonder if I should do like 100 requests at a time with OiBackoff there to make sure they are all complete.
The goal here is to hit a url that has a number like CFD1234, CFD1236, CFD1238 at the end and push the result (small chunk of xml) into an array. Here's the code, the closest I have to working. Perhaps I need to try a different library? I've tried this with a promises queue and couldn't get it to run. It will work if I create an array of function closures and fire them off in sequence but it takes forever, far longer than it should.
var cnum, cnums, complete, ld, logger, oibackoff, opt, processHttpGet, request, responses, total, yamljs, _fn, _i, _len;
yamljs = require('yamljs');
request = require('request');
oibackoff = require('oibackoff').backoff({
maxTries: 10,
delayRatio: 10
});
cnums = yamljs.load('./etc/cnumbers.yaml');
responses = [];
logger = {
debug: console.log,
error: console.log
};
ld = require('lodash');
cnums = ld.uniq(cnums);
logger.debug("cnums len: " + cnums.length);
processHttpGet = function(url, opt, cb) {
return request.get(url, opt, function(error, resp, body) {
if (error != null) {
return cb(error, null);
} else if (resp.statusCode >= 400) {
return cb(resp.statusCode, null);
} else {
return cb(null, body);
}
});
};
opt = null;
total = cnums.length;
complete = 0;
_fn = function(CNumber) {
var intermediate, url;
url = "http://abc:def#abc.def.com/xyz/def/abc.asmx/GetValueByID?ID=" + CNumber;
logger.debug("getting " + url);
intermediate = (function(_this) {
return function(err, tries, delay) {
if (err != null) {
logger.debug("GET failed for " + url + ":", err);
logger.debug("tries: %d, delay: %d", tries, delay);
}
if (tries > 10) {
logger.debug("/n/n Failed max tries.");
process.exit(0);
return false;
}
};
})(this);
return oibackoff(processHttpGet, url, opt, intermediate, function(error, response) {
if (error) {
return false;
} else {
++complete;
responses.push(response);
if (complete % 100 === 0) {
console.dir({
url: url,
response: response
});
}
logger.debug("success; responses complete: " + complete + ", total: " + total + ", percentage: " + (ld.round(complete / total, 2) * 100) + "%");
if (complete >= total) {
logger.debug(responses);
return process.exit(0);
}
}
});
};
for (_i = 0, _len = cnums.length; _i < _len; _i++) {
cnum = cnums[_i];
_fn(cnum);
}
The answer to this was to use Bluebird, Promise.map and concurrency with a backoff library.
# coffee
# exports is an array of buffers
retry = (require 'u-promised').retry
Promise = require("bluebird")
# build array of buffers to post
Promise.map(exports, (buffer) ->
f = -> postToEndpoint(buffer)
retry(5, f) # post with up to 5 retries
, {concurrency: config.export.concurrency}) # 40 for my app
.then (result) ->
c = 0
ld.map(result, (x) -> c += x)
msg = "Complete. #{c} posts completed."
logger.info msg
.catch (reason) ->
logger.error reason
I am creating an insert script that does some business logic.
Basically, I want to check to see if a value in the inserted item exists in a table. But, it seems like if I find a problem Request.Send() doesn't stop execution and get an error.
I think there is an async issue here. I'm not 100% sure how to solve.
Is there a way to stop execution of the script?
if (item.memberType === 'Family' && item.primaryFamilyMember) {
table
.where({
memberNumber: item.primaryFamilyMember,
memberType: 'Family',
primaryFamilyMember: null })
.read({
success: function(results) {
if (results.length == 0) {
request.respond(statusCodes.BAD_REQUEST,
'Invalid Primary Family Member specified.');
console.error('Invalid Primary Family Member specified:' + item.primaryFamilyMember);
validInsert = false;
} else {
item.memberType = results[0].memberType;
item.memberLevel = results[0].memberLevel;
item.dateOfExpiry = results[0].dateOfExpiry;
}
}
});
}
if (validInsert) {
var today = new Date();
var prefix = today.getFullYear().toString().substr(2,2) + ('0' + (today.getMonth() + 1)).slice(-2);
table.includeTotalCount().where(function(prefix){
return this.memberNumber.substring(0, 4) === prefix;
}, prefix)
.take(0).read({
success: function (results) {
if (isNaN(results.totalCount)) {
results.totalCount = 0;
}
item.memberNumber = prefix + ('00' + (results.totalCount + 1)).slice(-3);
request.execute();
}
});
}
Yes, validInsert is declared at the top of the insert function.
I assume what's happening is the if(validInsert) runs before the read callback. But if so, i'm not sure why I'm getting "Error: Execute cannot be called after respond has been called." That implies the callback is running first.
Also, the record is being inserted when it shouldn't be even though the 400 error is sent back to the client.
This is an express app right? Should I just call response.end() after the error occurs?
Yes, there are definitely asyn issues in that code. To solve get rid of your validInsert flag and simply move the if (validInsert) section into the success callback (or make it a function called from the success callback). For example:
success: function(results) {
if (results.length == 0) {
request.respond(statusCodes.BAD_REQUEST,
'Invalid Primary Family Member specified.');
console.error('Invalid Primary Family Member specified:' + item.primaryFamilyMember);
} else {
item.memberType = results[0].memberType;
item.memberLevel = results[0].memberLevel;
item.dateOfExpiry = results[0].dateOfExpiry;
var today = new Date();
var prefix = today.getFullYear().toString().substr(2,2) + ('0' + (today.getMonth() + 1)).slice(-2);
...
//respond successfully
}
}
Hi I have a backbone web app using Jquery and NodeJs/mongo as the server side framework. I'm having problems with making a http get call with a foreah loop and the results of the get call being iteratively added to each row of the loop.
var eventid = this.model.get("_id");
var inPromise = $.get("/registrants/list?eventid="+eventid,null,null,"json").then(
function (result){
var temp;
var finalVal = '';
var tempfinalVal = "";
var loop = 0
percentage = 0;
$.each(result.registrants,function(index,registrant){
temp = JSON.parse(registrant.fields);
for (var key in temp) {
if(key =="Email"){
if(temp[key] != ""){
$.get("/stats/registrant?userid="+temp[key]+"&eventid="+eventid,null,null,"json").then(function(result2){
percentage = (result2.Stats.type ===undefined || result2.Stats.type ==null) ? "0": result2.Stats.type;
finalVal +=percentage+"\n";
}).fail(function(){
percentage = "0";
});
}
}else if(key =="eventid"){
loop++;
finalVal = finalVal.slice(0, - 1);
finalVal +='\n';
}
finalVal +=temp[key] + ',';
}
});
//promises.push(inPromise);
}
).done(function(finalVal){
$("#webcast-download-registrants-tn").attr("href",'data:text/csv;charset=utf-8;filename=registration.csv",'+encodeURIComponent(finalVal));
console.log("DONE");
}).fail(function(){
console.log("fail");
});
// promise.done(function () {
// console.log(" PROMISE DONE");
// });
So I have the loop through a collection and the last item of the docuemnt gets a content froma nother http call and when all is fone it will create a CSV file. The problem is that THE "DONE" text echos firts then the "CALL" text is displayed
Rick, your problem is not the simplest due to :
the need for nested asynchronous gets
the need to build each CSV data row partly synchronously, partly asynchronously.
the need for a mechanism to handle the fulfilment of multiple promises generated in the inner loop.
From what you've tried, I guess you already know that much.
One important thing to note is that you can't rely on for (var key in temp) to deliver properties in any particular order. Only arrays have order.
You might try something like this :
var url = "/stats/registrant",
data = { 'eventid': this.model.get('_id') },
rowTerminator = "\n",
fieldNames = ['firstname','lastname','email','company','score'];
function getScore(email) {
return $.get(url, $.extend({}, data, {'userid':email}), null, "json").then(function(res) {
return res.Stats ? res.Stats.type || 0 : 0;
}, function() {
//ajax failure - assume score == 0
return $.when(0);
});
}
$.get("/registrants/list", data, null, "json").then(function(result) {
var promises = [];//An array in which to accumulate promises of CSV rows
promises.push($.when(fieldNames)); //promise of CSV header row
if(result.registrants) {
$.each(result.registrants, function(index, registrant) {
if(registrant.fields) {
// Synchronously initialize row with firstname, lastname, email and company
// (omitting score for now).
var row = fieldNames.slice(0,-1).map(function(fieldName, i) {
return registrant.fields[fieldName] || '';
});
//`row` remains available to inner functions due to closure
var promise;
if(registrant.fields.Email) {
// Fetch the registrant's score ...
promise = getScore(registrant.fields.Email).then(function(score) {
//... and asynchronously push the score onto row
row.push(score);
return row;
});
} else {
//or synchronously push zero onto row ...
row.push(0);
//... and create a resolved promise
promise = $.when(row);
}
promises.push(promise);//Accumulate promises of CSV data rows (still in array form), in the correct order.
}
});
}
return $.when.apply(null, promises).then(function() {
//Join all the pieces, in nested arrays, together into one long string.
return [].slice.apply(arguments).map(function(row) {
return row.join(); //default glue is ','
}).join(rowTerminator);
});
}).done(function(str) {
$("#webcast-download-registrants-tn").attr("href",'data:text/csv;charset=utf-8;filename=registration.csv",'+encodeURIComponent(str));
console.log("DONE");
}).fail(function() {
console.log("fail");
});
partially tested
See comments in code for explanation and please ask if there's anything you don't follow.