Google Datastore NodeJS combine (union) multiple sets of keys only results - node.js

I am working with NodeJS on Google App Engine with the Datastore database.
Note that this question is an extension of (not a duplicate) my original post.
Due to the fact that Datastore does not have support the OR operator, I need to run multiple queries and combine the results.
Here is my approach (based on the selected answer from my original post):
Use Keys-only queries in the 1st stage
Perform the combination of the keys obtained into a single list (including deduplication)
Obtaining the entities simply by key lookup
I have achieved #1 by running two separate queries with the async parallel module. I need help with step 2.
Question: How to combine (union) two lists of entity keys into a single list (including de-duplication) efficiently?
The code I have below successfully performs both the queries and returns two objects getEntities.requesterEntities and getEntities.dataownerEntities.
//Requirement: Get entities for Transfer Requests that match either the Requester OR the Dataowner
async.parallel({
requesterEntities: function(callback) {
getEntitiesByUsername('TransferRequest', 'requester_username', loggedin_username, (treqs_by_requester) => {
//Callback pass in response as parameter
callback(null, treqs_by_requester)
});
},
dataownerEntities: function(callback) {
getEntitiesByUsername('TransferRequest', 'dataowner_username', loggedin_username, (treqs_by_dataowner) => {
//Callback pass in response as parameter
callback(null, treqs_by_dataowner)
});
}
}, function(err, getEntities) {
console.log(getEntities.requesterEntities);
console.log(getEntities.dataownerEntities);
//***HOW TO COMBINE (UNION) BOTH OBJECTS CONTAINING DATASTORE KEYS?***//
});
function getEntitiesByUsername(kind, property_type, loggedin_username, getEntitiesCallback) {
//Create datastore query
const treq_history = datastore.createQuery(kind);
//Set query conditions
treq_history.filter(property_type, loggedin_username);
treq_history.select('__key__');
//Run datastore query
datastore.runQuery(treq_history, function(err, entities) {
if(err) {
console.log('Transfer Request History JSON unable to return data results for Transfer Request. Error message: ', err);
} else {
getEntitiesCallback(entities);
}
});
}

I was able to combine the two separate sets of entity keys by iterating over the arrays and comparing the ID values for each entity key and creating a new array with the unique keys.
Note: The complete solution is posted as an answer to my original post.
//Union of two arrays of objects entity keys
function unionEntityKeys(arr1, arr2) {
var arr3 = [];
for(var i in arr1) {
var shared = false;
for (var j in arr2)
if (arr2[j][datastore.KEY]['id'] == arr1[i][datastore.KEY]['id']) {
shared = true;
break;
}
if(!shared) {
arr3.push(arr1[i])
}
}
arr3 = arr3.concat(arr2);
return arr3;
}

Related

Stored Procedure to update or insert docs that belong to multiple partition keys

I have a list of documents that belong to a partitioned collection. Instead of querying for every document from the .NET client and either do update or insert, I thought I could use a Stored Procedure to accomplish this.
What I did not initially realize is that Stored Procedures are executed in the transaction scope of a single partition key. So I am getting PartitionKey value must be supplied for this operation.
The thing is that the documents (that I am trying to upsert) may belong to different partitions. How can I accomplish this in the Stored Procedure? In my case, the SP is useless unless it can operate on multiple partitions.
This is how I constructed my SP:
function upsertEcertAssignments(ecerts) {
var collection = getContext().getCollection();
var collectionLink = collection.getSelfLink();
var response = getContext().getResponse();
// Validate input
if (!ecerts) throw new Error("The ecerts is null or undefined");
if (ecerts.length == 0) throw new Error("The ecerts list size is 0");
// Recursively call the 'process' function
processEcerts(ecerts, 0);
function processEcerts(ecerts, index) {
if (index >= ecerts.length) {
response.setBody(index);
return;
}
var query = {query: "SELECT * FROM DigitalEcerts c WHERE c.code = #code AND c.collectionType = #type", parameters: [{name: "#code", value: ecerts[index].code}, {name: "#type", value: 0}]};
var isQueryAccepted = collection.queryDocuments(collectionLink, query, {partitionKey: ecerts[index].code}, function(err, foundDocuments, foundOptions) {
if (err) throw err;
if (foundDocuments.length > 0) {
var existingEcert = foundDocuments[0];
ecerts[index].id = existingEcert.id;
var isAccepted = __.replaceDocument(existingEcert._self, ecerts[index], function(err, updatedEcert, replacedOptions) {
if (err) throw err;
processEcerts(ecerts, index + 1);
});
if (!isAccepted) {
response.setBody(index);
}
} else {
var isAccepted = __.createDocument(__.getSelfLink(), ecerts[index], function(err, insertedEcert, insertedOptions) {
if (err) throw err;
processEcerts(ecerts, index + 1);
});
if (!isAccepted) {
response.setBody(index);
}
}
});
if (!isQueryAccepted)
response.setBody(index);
}
}
From .NET, if I call it like this, I get the partitionKey value problem:
var continuationIndex = await _docDbClient.ExecuteStoredProcedureAsync<int>(UriFactory.CreateStoredProcedureUri(_docDbDatabaseName, _docDbDigitalEcertsCollectionName, "UpsertDigitalMembershipEcertAssignments"), digitalEcerts);
If I call it with a partition key, it works...but it is useless:
var continuationIndex = await _docDbClient.ExecuteStoredProcedureAsync<int>(UriFactory.CreateStoredProcedureUri(_docDbDatabaseName, _docDbDigitalEcertsCollectionName, "UpsertDigitalMembershipEcertAssignments"), new RequestOptions { PartitionKey = new PartitionKey(digitalEcerts[0].Code) }, digitalEcerts.Take(1).ToList());
I appreciate any pointer.
Thanks.
By the sound of it, your unique id is a combination of code and type. I would recommend making your id property to be the combination of two.
This guarantees that your id is unique but also eliminates the need to query for it.
If the collection the stored procedure is registered against is a
single-partition collection, then the transaction is scoped to all the
documents within the collection. If the collection is partitioned,
then stored procedures are executed in the transaction scope of a
single partition key. Each stored procedure execution must then
include a partition key value corresponding to the scope the
transaction must run under.
You could refer to the description above which mentioned here. We can query documents cross partitions via setting EnableCrossPartitionQuery to true in FeedOptions parameter. However, the RequestOptions doesn't have such properties against executing stored procedure.
So, It seems you have to provide partition key when you execute sp. Of course, it can be replaced by upsert function. It is useless from the perspective of the business logic, but if bulk operations, the SP can release some of the performance pressure because the SP is running on the server side.
Hope it helps you.

NodeJS: Insert record in dynamodb if not exist

I need to store user's info in DynamoDB and send a mail to the same user if it doesn't already exist in DynamoDB table. I am doing this in for loop. The list contains only 2 records. The issue is only the second record gets inserted in table and the mail is sent twice to the same user. Here is the code:
module.exports.AddUser = function(req, res, usersList, departmentId) {
var _emailId = "";
var _userName = "";
var _departmentId = departmentId;
for (var i = 0; i < usersList.length; i++) {
_emailId = usersList[i].emailId;
_userName = usersList[i].userName;
var params = {
TableName: "UsersTable",
Key: {
"emailId": _emailId,
"departmentId": _departmentId
}
};
docClient.get(params, function(err, data) {
if (!err) {
if (!data.items)
AddUserAndSendEmail("UsersTable", _emailId, _userName);
//The above function is being called twice but for the same user.
//It has a check so not inserting the same record twice but
//sending two mails to the same user.
}
});
}
res.end("success");
}
function AddUserAndSendEmail(tableName, emailId, _userName) {
var params = {
TableName: tableName,
Item: {
"emailId": emailId,
"departmentId": 101//Default Department
}
};
docClient.put(params, function(err, data) {
if (!err) {
//Send Email Code Here
} else {
console.log("error");
}
});
}
What could be the reason for this strange behavior? Really frustrated, I am about to give up on this.
1) Please note that DynamoDB is eventually consistent. If you insert the item and check whether the item exists immediately, it may not always find the item in the database.
This means the second iteration of the loop may not always find the first item inserted into the table.
2) If the item already exists in the table, the Put api will update the item and give successful response.
This means the Put will be successful for the same email id and department id in the second iteration because it updates the record if it is already present.
GetItem – The GetItem operation returns a set of Attributes for an
item that matches the primary key. The GetItem operation provides an
eventually consistent read by default. If eventually consistent reads
are not acceptable for your application, use ConsistentRead.
PutItem – Creates a new item, or replaces an old item with a new item
(including all the attributes). If an item already exists in the
specified table with the same primary key, the new item completely
replaces the existing item. You can also use conditional operators to
replace an item only if its attribute values match certain conditions,
or to insert a new item only if that item doesn’t already exist.
Based on the above points, there is a possibility to get two emails if you have same email id and department id in the array.

how to improve the view with map/reduce in couchdb and nodejs

I'm using nodejs with the module cradle to interact with the couchdb server, the question is to let me understanding the reduce process to improve the view query...
For example, I should get the user data from his ID with a view like this:
map: function (doc) { emit(null, doc); }
And in node.js (with cradle):
db.view('users/getUserByID', function (err, resp) {
var found = false;
resp.forEach(function (key, row, id) {
if (id == userID) {
found = true;
userData = row;
}
});
if (found) {
//good, works
}
});
As you can see, this is really bad for large amount of documents (users in the database), so I need to improve this view with a reduce but I don't know how because I don't understand of reduce works.. thank you
First of all, you're doing views wrong. View are indexes at first place and you shouldn't use them for full-scan operations - that's ineffective and wrong. Use power of Btree index with key, startkey and endkey query parameters and emit field you like to search for as key value.
In second, your example could be easily transformed to:
db.get(userID, function(err, body) {
if (!err) {
// found!
}
});
Since in your loop you're checking row's document id with your userID value. There is no need for that loop - you may request document by his ID directly.
In third, if your userID value isn't matches document's ID, your view should be:
function (doc) { emit(doc.userID, null); }
and your code will be looks like:
db.view('users/getUserByID', {key: userID}, function (err, resp) {
if (!err) {
// found!
}
});
Simple. Effective. Fast. If you need matched doc, use include_docs: true query parameter to fetch it.

MongoDB Async - Find or create docs; return array of them

This is likely a simple answer but I'm relatively new to asynchronous programming and I'm looking for somebody to point me in the right direction.
My question is this - What is the best way to go about finding or creating a number of documents from an array of names (I'm using Mongoose) and then returning an array of _id's?
So to be clear, I want to:
Given an array of names, find or create a document with each name
Return an array of the existing or newly created documents _ids
You can use async module and within it it's async.parallel() method -
https://github.com/caolan/async#quick-examples
async.parallel([
function(){ ... },
function(){ ... }
], callback);
Or you can use promises and then Q.all() to get the array of ids back -
https://github.com/kriskowal/q#combination
Q.all(arrayOfFindOps).then(function(rows) {
return _.pluck(rows, '_id')
}
If you don't want to use any of the above and do it with callbacks, then you have to keep track of the count of array length, keep adding the ids to an array and when your completion counter reaches array length, call another function with the array you made.
This code can be easily modified to meet your requirements. Call this function for each document that required to be created if doesn't exist.
function(req, reply) {
// return document. if not found, create docuemnt.
docModel.findOne( {'name': req.params.name}, function ( err , doc) {
if(err){
//handle error
}
if(doc===null){
//find failed, time to create.
doc = new docModel( {'name': req.params.name} );
doc.save(function(err){
if(err){
//handle error
}
});
}
return reply(user._id);
});
}

Creating incrementing numbers with mongoDB

We have an order system where every order has an id. For accounting purposes we need a way to generate invoices with incremening numbers. What is the best way to do this without using an sql database?
We are using node to implement the application.
http://www.mongodb.org/display/DOCS/How+to+Make+an+Auto+Incrementing+Field
The first approach is keeping counters in a side document:
One can keep a counter of the current _id in a side document, in a
collection dedicated to counters. Then use FindAndModify to atomically
obtain an id and increment the counter.
The other approach is to loop optimistically and handle dup key error code of 11000 by continuing and incrementing the id for the edge case of collisions. That works well unless there's high concurrency writes to a specific collection.
One can do it with an optimistic concurrency "insert if not present"
loop.
But be aware of the warning on that page:
Generally in MongoDB, one does not use an auto-increment pattern for
_id's (or other fields), as this does not scale up well on large database clusters. Instead one typically uses Object IDs.
Other things to consider:
Timestamp - unique long but not incrementing (base on epoch)
Hybrid Approach - apps don't necessarily have to pick one storage option.
Come up with your id mechanism based on things like customer, date/time parts etc... that you generate and handle collisions for. Depending on the scheme, collisions can be much less likely. Not necessarily incrementing but is unique and has a well defined readable pattern.
I did not find any working solution, so I implemented the "optimistic loop" in node.js to get Auto-Incrementing Interger ID fields. Uses the async module to realize the while loop.
// Insert the document to the targetCollection. Use auto-incremented integer IDs instead of UIDs.
function insertDocument(targetCollection, document, callback) {
var keepRunning = true;
var seq = 1;
// $type 16/18: Integer Values
var isNumericQuery = {$or : [{"_id" : { $type : 16 }}, {"_id" : { $type : 18 }}]};
async.whilst(testFunction, mainFunction, afterFinishFunction);
// Called before each execution of mainFunction(). Works like the stop criteria of a while function.
function testFunction() {
return keepRunning;
}
// Called each time the testFunction() passes. It is passed a function (next) which must be called after it has completed.
function mainFunction(next) {
findCursor(targetCollection, findCursorCallback, isNumericQuery, { _id: 1 });
function findCursorCallback(cursor) {
cursor.sort( { _id: -1 } ).limit(1);
cursor.each(cursorEachCallback);
}
function cursorEachCallback(err, doc) {
if (err) console.error("ERROR: " + err);
if (doc != null) {
seq = doc._id + 1;
document._id = seq;
targetCollection.insert(document, insertCallback);
}
if (seq === 1) {
document._id = 1;
targetCollection.insert(document, insertCallback);
}
}
function insertCallback(err, result) {
if (err) {
console.dir(err);
}
else {
keepRunning = false;
}
next();
}
}
// Called once after the testFunction() fails and the loop has ended.
function afterFinishFunction(err) {
callback(err, null);
}
}
// Call find() with optional query and projection criteria and return the cursor object.
function findCursor(collection, callback, optQueryObject, optProjectionObject) {
if (optProjectionObject === undefined) {
optProjectionObject = {};
}
var cursor = collection.find(optQueryObject, optProjectionObject);
callback(cursor);
}
Call with
insertDocument(db.collection(collectionName), documentToSave, function() {if(err) console.error(err);});

Resources