DynamoDB javascript SDK batchWriteItem doesn't complete unless I increase write capacity - node.js

I'm running a series of unit tests (node.js 4.x, aws-sdk, mocha) which load data into a table before each test then clears the table after the test.
I have two tests that are failing because of a ConditionExpression which triggers a ConditionCheckFailedException. But if I increase the read/write capacity they tests pass.
It's my understanding that the SDK handles throttling exceptions and retries them for you so why wouldn't my tests just run slower and pass? Instead it seems as though the tests fail to complete the scan -> batchWriteItem process and so there are records still left in the table when a new tests starts.
I'm told by team members that they've seen similar problems and they just increased the throughput to fix the problem. This doesn't sit right with me. Either I'm doing something wrong and there's a race condition with my tests or there should be a pattern I can implement to make sure that my operations complete when being throttled? I should be able to use throttling metrics to inform when I need to increase throughput but I should still be able to keep retrying until I run out of memory.
Has anyone else run into this and what have you done to handle the problem?

After some debugging I noticed the UnprocessedItems response element. After looking up UnprocessedItems in the docs I realize I should have read more closely. The code below will run a retry loop with a delay (exponential back-off):
var clearEventTable = function (tableName, client, cleared) {
var exclusiveStartKey = null;
var retryCount = 0;
var read = function(query, callback) {
client.scan(query, function (err, page) {
if(err) {
console.log(err);
return callback(err);
}
retryCount = 0;
exclusiveStartKey = page.LastEvaluatedKey || null;
if(page.Count == 0) {
return callback(null, {});
}
if(page.Count < 25 && exclusiveStartKey) {
console.log("read capacity limit reached: " + JSON.stringify(page, null, 2));
}
var keys = _.map(page.Items, function(n) {
return { DeleteRequest: { Key: n } };
});
var batch = {
RequestItems: {},
ReturnConsumedCapacity: "INDEXES",
ReturnItemCollectionMetrics: "SIZE"
};
batch.RequestItems[tableName] = keys;
callback(null, batch);
});
};
var write = function(batch, callback) {
if(batch && batch.RequestItems){
client.batchWriteItem(batch, function(err, result) {
if(err) {
console.log(err);
return callback(err);
}
if(Object.keys(result.UnprocessedItems).length !== 0) {
console.log("Retry batchWriteItem: " + JSON.stringify(result, null, 2));
retryCount++;
var retry = {
RequestItems: result.UnprocessedItems,
ReturnConsumedCapacity: "INDEXES",
ReturnItemCollectionMetrics: "SIZE"
};
// retry with exponential backoff
var delay = retryCount > 0 ? (50 * Math.pow(2, retryCount - 1)) : 0;
setTimeout(write(retry, callback), delay);
return;
}
callback(null, result);
});
} else {
callback(null);
}
};
var params = {
TableName: tableName,
ProjectionExpression: "aggregateId,id",
Limit: 25, // max 25 per batchWriteItem
ConsistentRead: false,
ReturnConsumedCapacity: "TOTAL"
};
async.doWhilst(function (next) {
// retrieve entities
if (exclusiveStartKey)
params.ExclusiveStartKey = exclusiveStartKey;
async.compose(write, read)(params, function (err, result) {
if (err) next(err);
else next(null, result);
});
}, function () {
// test if we need to load more
return exclusiveStartKey !== null;
}, function (err, r) {
// return results
if (err) {
console.log(err);
return cleared(err);
}
return cleared(null);;
});
};

Also take a look at the amount of memory provisioned for the Lambda. Might be too low and hitting the max leads to unpredictable results IMX.

Related

Getting Error Like RequestsLimitError: You just made too many request to instagram API in node js?

I am work with isntagram api in node js. i have one array and in the array store above 20k up instagram id. and then i am do foreach on that array and one by one take instagram id and go for the take bio but that time i am getting error like this RequestsLimitError: You just made too many request to instagram API. i am try every 5 call after set time out also but still i am getting same error so how can resolved this error any one know how can fix it then please let me know.
Here this is my code =>
var InstaId = ["12345687",20k more id store here in the array]
var changesessionFlage = 0;
async.each(InstaId, function (id, callback) {
async.parallel([
function (cb) {
if (id) {
setTimeout(function () {
Client.Account.getById(sess, id).then(function (bio) {
console.log("changesessionFlage" + changesessionFlage);
changesessionFlage++
//console.log("bio : ", bio._params); // here i am getting bio one by one user
if (changesessionFlage == 6) {
changesessionFlage = 0;
}
cb(null, bio._params);
})
.catch(function (err) {
console.log("get boi: ", err)
cb(null, bio._params);
})
}, (changesessionFlage == 5) ? 10000 : 0)
}
}
], function (err, results) {
if (err) {
console.log(err);
return;
}
Result = results
callback();
});
}, function (err) {
if (err) {
console.log(err);
return;
}
else {
console.log("Result=>", Result)
if (Result) {
console.log("Result[0]=>", Result[0])
var ws = XLSX.utils.json_to_sheet(Result[0]);
var wb = XLSX.utils.book_new();
XLSX.utils.book_append_sheet(wb, ws, "People");
var wbout = XLSX.write(wb, { bookType: 'xlsx', type: 'binary' });
res.end(wbout, 'binary');
}
}
});
any one know how can fix this issue then please help me.
Your setTimeout is use incorrectly, all API calls are made at once after 10000 delay.
Since this is a one time job, just split the 20K usernames to 4K batches and execute them every hour. This way you will be under the 5k/hr API limit

Nodejs crashes when database connection fails

While my database server is not available and any function of my node-express rest service like hiExpress is called, Nodejs crashes the node server and console of node reports
sql server connection closed
I do not want this to happen because either it should go to err function or at least it must be cautht by catch block. What could i do to avoid the crash of nodejs when database server is not available I am using following code which is absolutely fine as long as database server is available.
var sqlServer = require('seriate');
app.get('/hiExpress',function(req, res)
{
var sr = {error:'',message:''};
var sql= 'select * from table1 where id=? and name=?';
var params = {id: 5, name:'sami'};
exeDB(res,sr,sql, params);//sent only 4 parameters (not 6)
});
function exeDB(res, sr, sql, params, callback, multiple) {
try {
var obj = {};
for (p in params) {
if (params.hasOwnProperty(p)) {
obj[p] = {
type: sqlServer.VARCHAR,
val: params[p]
};
}
};
var exeOptions = {
query: sql,
params: obj
};
if (multiple) {
exeOptions.multiple = true;
}
sqlServer.execute(sqlServerConfigObject, exeOptions).then(function (results) {
sr.data = results;
if (callback)
callback(sr);
else
res.json(sr); //produces result when success
}, function (err) {
//sr.message = sql;
console.log(11);
sr.error = err.message;
res.json(sr);
});
}
catch (ex) {
console.log(21);
sr.error = ex.message;
res.json(sr);
}
}
Why I preferred to use seriate
I had not been much comfortable with node-SQL, especially when when it came to
multiple queries option even not using a transaction. It facilitates easy go to parameterized queries.
You can use transaction without seriate but with async like below
async.series([
function(callback) {db.run('begin transaction', callback)},
function(callback) {db.run( ..., callback)},
function(callback) {db.run( ..., callback)},
function(callback) {db.run( ..., callback)},
function(callback) {db.run('commit transaction', callback)},
], function(err, results){
if (err) {
db.run('rollback transaction');
return console.log(err);
}
// if some queries return rows then results[query-no] contains them
})
The code is very dirty. Pass req and res params to db-layer is not a good idea.
Try change exeDB. I'm not sure, but probably you don't set error catcher to promise
function exeDB(res, sr, sql, params, callback, multiple) {
// It will execute with no error, no doubt
var obj = {};
for (p in params) {
if (params.hasOwnProperty(p)) {
obj[p] = {
type: sqlServer.VARCHAR,
val: params[p]
};
}
};
var exeOptions = {
query: sql,
params: obj
};
if (multiple) {
exeOptions.multiple = true;
}
// Potential problem is here.
// Catch is useless because code below is asynchronous.
sqlServer.execute(sqlServerConfigObject, exeOptions).then(function (results) {
sr.data = results;
if (callback)
callback(sr);
else
res.json(sr); //produces result when success
}).error(function(err){ // !!! You must provide on-error
console.log(err);
};
}

Mongoose : Unable to upsert data in for loop

I want to upsert document in the following way
for (var i = 0; i < req.body.app_events.length; i++ ) {
console.log(req.body.app_events[i].event_key);
//delete upsertData._id;
Appusers.update({app_key: req.body.app_key, e_key:req.body.app_events[i].event_key}, {
$set : {
app_key:req.body.app_key,
e_key: req.body.app_events[i].event_key,
e_name: req.body.app_events[i].event_name
}}, { upsert: true}, function(err, data){
if(err) return console.log(err);
console.log(data);
});
}
it is creating a single document with _id only. i want to insert document if it exist otherwise update on the basis of e_key and app_key.
You really should not be calling asynchronous functions inside a synchronous loop. What you need is something that repects the callback on completion of the loop cycle and will alert when the update is complete. This makes incrementing counters externally safe.
Use something like async.whilst for this:
var i = 0;
async.whilst(
function() { return i < req.body.app_events.length; },
function(callback) {
console.log(req.body.app_events[i].event_key);
//delete upsertData._id;
Appusers.findOneAndUpdate(
{ app_key: req.body.app_key, e_key:req.body.app_events[i].event_key},
{
$set : {
app_key:req.body.app_key,
e_key: req.body.app_events[i].event_key,
e_name: req.body.app_events[i].event_name
}
},
{ upsert: true},
function(err,data) {
if (err) callback(err);
console.log(data);
i++;
callback();
}
);
},
function(err) {
if (err)
console.log(err);
else
// done;
}
);
Now the loop is wrapped with a "callback" which is called in itself within the callback to the update method. Also if you expect a "document" back then you should be using .findOneAndUpdate() as .update() just modifies the content and returns the number affected.
When the loop is complete or when an error is passed to the callback, then handling is moved to the last function block, where you complete your call or call other callbacks as required.
Better than above. Dig into the native driver methods for Bulk operations. You need to be careful that you have an open connection to the database already established. If unsure about this, then try to always wrap application logic in:
mongoose.connection('once',function(err) {
// app logic here
});
Which makes sure the connections have been made. The mongoose methods themselves "hide" this away, but the native driver methods have no knowledge.
But this is the fastest possible listing to update the data:
var i = 0;
var bulk = Appusers.collection.initializeOrderedBulkOp();
async.whilst(
function() { return i < req.body.app_events.length; },
function(callback) {
console.log(req.body.app_events[i].event_key);
bulk.find(
{ app_key: req.body.app_key, e_key:req.body.app_events[i].event_key},
).upsert().updateOne({
$set : {
app_key:req.body.app_key,
e_key: req.body.app_events[i].event_key,
e_name: req.body.app_events[i].event_name
}
});
i++;
if ( i % 1000 == 0) {
bulk.execute(function(err,response) {
if (err) callback(err);
console.log(response);
bulk = Appusers.collection.initializeOrderedBulkOp();
callback();
})
} else {
callback();
}
},
function(err) {
if (err)
console.log(err);
else {
if ( i % 1000 != 0 )
bulk.execute(function(err,response) {
if (err) console.log(err)
console.log(response);
// done
});
else
// done
}
}
);
The Bulk methods build up "batches" of results ( in this case 1000 at a time ) and send all to the server in one request with one response ( per batch ). This is a lot more efficient than contacting the database once per every write.

Searching then adding record in Redis using Node.js

I may be over tired but for the life of me I cannot understand why the following is not working. I am trying to search if a string exists and if it does not, add it to a redis database
options = options || {};
var counter = 1,
client = redis.getClient();
options.name = options.name || '';
if (_.isEmpty(options.name)) {
return callback('Cannot add name. No name supplied');
} else {
options.name = options.name.trim();
}
client.get('mySavedKeys' + options.name, function (err, data) {
if (err) {return callback(err); }
if (!_.isNull(data)) {
console.log('Name found', options.name);
return callback(null, data);
} else {
counter += 1;
console.log('Name not found', options.name);
console.log('ID', counter)
client2.set('mySavedKeys' + options.name, counter, function (err) {
if (err) {return callback(err); }
console.log('Added', options.name);
return callback(null, counter);
});
}
});
If I run an array of names to add using async.each then it seems to run all the 'get' functions and then run the 'set' function so I am getting duplicate insertions.
I'm sure the answer is obvious but I cannot see the problem.
If you use async.eachSeries you would insure that the get/set happen atomically rather than all gets running in parallel.

While loop to check uniqueness of custom id

I have a MongoDB databse set up with some objects that have a unique code (not the primary key).
I should also note that I'm using NodeJS and this code is in my server.js to connect to the MongoDB database.
To generate a new code, I generate one randomly and I want to check if it already exists. If not then we use it no problem, but if it already exists I want to generate another code and check it again. This is the code I use to check if the id already exists:
function createPartyId(callback) {
var min = 10000, max = 99999;
var partyId = -1, count = -1;
async.whilst(
function () { return count != 0; },
function (callback) {
partyId = min + Math.floor(Math.random() * (max - min + 1));
partyId = 88888;
getPartyIdCount(partyId, function(num) {
count = num;
});
},
function (err) {
}
);
}
function getPartyIdCount(partyId, callback) {
count = -1;
db.db_name.find({id: partyId}, function(err, records) {
if(err) {
console.log("There was an error executing the database query.");
callback(count);
}
count = records.length;
callback(count);
});
}
First of all, is there any particular reason you're not using a simple number increment sequence? This type of code is prone to inefficiency, the more numbers you generate the more chance you have of collisions which means you're going to be spending more time on generating an ID for your data than you are on the rest of your processing. Not a good idea.
But I can still tell you what's going wrong.
OK, so getPartyIdCount() will only, ever, always, without fail, return undefined (or, basically, nothing).
Your mongo call processes the return value in a callback, and that callback doesn't assign its value to anything, so return records.length just gets lost into nothingness.
You've mixed up createPartyId(), which it appears you want to run synchronously, with your mongo call, which must run asynchronously.
return always goes with the nearest containing function, so in this case it goes with function(err, records), not function getPartyIdCount(partyId).
(Expanding my comment from above)
The issue is that createPartyId is an asynchronous function, but you're trying to return the value synchronously. That won't work. Once you touch an async operation, the rest of the call stack has to be async as well.
You don't include the code that's calling this, but I assume you want it to be something like:
var partyId = createPartyId();
// do stuff...
That's not going to work. Try this:
function createPartyId(callback) {
var min = 10000, max = 99999;
var partyId = -1, count = -1;
async.whilst(
function () { return (count == 0); },
function (callback) {
partyId = min + Math.floor(Math.random() * (max - min + 1));
partyId = 88888;
getPartyIdCount(partyId, function(err, num) {
if (!err) {
count = num;
}
callback(err);
});
},
function (err) {
// this is called when the loop ends, error or not
// Invoke outer callback to return the result
callback(err, count);
}
);
}
function getPartyIdCount(partyId, callback) {
count = -1;
db.db_name.find({id: partyId}, function(err, records) {
if(err) {
console.log("There was an error executing the database query.");
callback(err);
}
count = records.length;
callback(null, count);
});
}
(I've also adopted the default node.js convention of always returning errors as the first argument to callback functions.)
So, to use this you would do:
getPartyId(function (err, num) {
if (err) { return aughItFellOver(err); }
// do stuff
});

Resources