Design DynamoDB for Large data using array. Over 400k - node.js

I'm worked with node.js and for record event-time and event-data, I want to use AWS DynamoDB Table.
single data and one-key-data schema sample like under.
var want_to_update_data = [ // contain json more or same than 1
{ "event-time" : "2021-01-02 10:11:12", "event-data" : 19 },
...
];
var saved_data = { // DynamoDB Saved Data Sample with Key "ABCD"
"Key" : "ABCD",
"events" : [
...
{ "event-time" : "2020-12-28 09:30:17", "event-data" : 35 },
{ "event-time" : "2021-01-01 19:11:12", "event-data" : 16 },
{ "event-time" : "2021-01-02 10:11:12", "event-data" : 19 },
...
]
}
Upper data is only for sample. json contains more key-values.
For update single json, my server code likes under.
app.get('/insert', req, res) {
var update_key = req.body.update_key; // "ABCD"
var want_to_update_data = req.body.update_data; // [ {} ]
var updateExpression = 'SET events = list_append(events, :dl)'; // dl means data-list
var expressionAttributeValues = {':dl' : want_to_update_data }; // dl means data-list
AWS.config.update({
region: "region",
endpoint: "endpoint",
accessKeyId: "access_key",
secretAccessKey: "secret_key",
});
var params = {
TableName: "TableName",
Key: update_key,
UpdateExpression: updateExpression,
ExpressionAttributeValues: expressionAttributeValues,
ReturnValues: "UPDATED_NEW"
});
var dynamodbClient = new AWS.DynamoDB.DocumentClient();
dynamodbClient.update(params, function(err, data) {
if(err) { /* handle error */ }
else { /* handle after update complete */ }
});
});
Simply, It works, but AWS DynamoDB can't change over 4kb at once.
So, if array data is bigger than 4k, list_append not worked with ValidationException: Item size to update has exceeded the maximum allowed size error message.
I want to remain all events and want to sort event-time keys.
How can I change my DynamoDB schema?
Under code is just my guess.
Is it possible?
{
"Key" : "ABCD",
/*
"events" : [
...
{ "event-time" : "2020-12-28 09:30:17", "event-data" : 35 },
{ "event-time" : "2021-01-01 19:11:12", "event-data" : 16 },
{ "event-time" : "2021-01-02 10:11:12", "event-data" : 19 },
...
]
*/
"events" : { // is it possible events['some-date'] could be sort-key?
...
"2020-12-28 09:30:17" : 35,
"2021-01-01 19:11:12" : 16,
"2021-01-02 10:11:12" : 19,
...
}
}
/*
var updateExpression = 'SET events = list_append(events, :dl)'; // dl means data-list
var expressionAttributeValues = {':dl' : want_to_update_data }; // dl means data-list
*/
// under 2 line is just for sample. json key is not fixed
var event_time = '2021-01-02 10:11:12';
var single_json = want_to_update_data[event_time];
// if number-start key is not allow(event_time), prefix append could be like "D_2021~~"
var updateExpression = `SET events.${event_time} = :dl`;
var expressionAttributeValues = {':dl' : single_json }; // single json
Thank you for read my question.
I want to create new table for save my events json datas.
(not S3. using DynamoDB)

DynamoDB has an item size limit of 400kb, including the attribute names.
According to the docs
Number of Values in List, Map, or Set
There is no limit on the number of values in a List, a Map, or a Set, as long as the item containing the values fits within the 400 KB item size limit.
It sounds like your list of 4k events, plus whatever else you're saving in the item, exceeds this 400kb limit. You can check out this nifty calculator to get an idea of the size of your item.
Instead of storing events in a list item, you might want to store them in an item collection.
For example, instead of this:
You could store event data like this
If you need to fetch events by time, you might consider making the event_time your sort key.

You cannot store infinitely large amounts of data in a DynamoDB on single Key.
Just like what you wrote, periodically back up to S3.
In my case, I use redis-cache for check each array size and with node-schedule, back-up large datas to S3.

Related

DynamoDB Scan with FilterExpression in nodejs

I'm trying to retrieve all items from a DynamoDB table that match a FilterExpression, and although all of the items are scanned and half do match, the expected items aren't returned.
I have the following in an AWS Lambda function running on Node.js 6.10:
var AWS = require("aws-sdk"),
documentClient = new AWS.DynamoDB.DocumentClient();
function fetchQuotes(category) {
let params = {
"TableName": "quotient-quotes",
"FilterExpression": "category = :cat",
"ExpressionAttributeValues": {":cat": {"S": category}}
};
console.log(`params=${JSON.stringify(params)}`);
documentClient.scan(params, function(err, data) {
if (err) {
console.error(JSON.stringify(err));
} else {
console.log(JSON.stringify(data));
}
});
}
There are 10 items in the table, one of which is:
{
"category": "ChuckNorris",
"quote": "Chuck Norris does not sleep. He waits.",
"uuid": "844a0af7-71e9-41b0-9ca7-d090bb71fdb8"
}
When testing with category "ChuckNorris", the log shows:
params={"TableName":"quotient-quotes","FilterExpression":"category = :cat","ExpressionAttributeValues":{":cat":{"S":"ChuckNorris"}}}
{"Items":[],"Count":0,"ScannedCount":10}
The scan call returns all 10 items when I only specify TableName:
params={"TableName":"quotient-quotes"}
{"Items":[<snip>,{"category":"ChuckNorris","uuid":"844a0af7-71e9-41b0-9ca7-d090bb71fdb8","CamelCase":"thevalue","quote":"Chuck Norris does not sleep. He waits."},<snip>],"Count":10,"ScannedCount":10}
You do not need to specify the type ("S") in your ExpressionAttributeValues because you are using the DynamoDB DocumentClient. Per the documentation:
The document client simplifies working with items in Amazon DynamoDB by abstracting away the notion of attribute values. This abstraction annotates native JavaScript types supplied as input parameters, as well as converts annotated response data to native JavaScript types.
It's only when you're using the raw DynamoDB object via new AWS.DynamoDB() that you need to specify the attribute types (i.e., the simple objects keyed on "S", "N", and so on).
With DocumentClient, you should be able to use params like this:
const params = {
TableName: 'quotient-quotes',
FilterExpression: '#cat = :cat',
ExpressionAttributeNames: {
'#cat': 'category',
},
ExpressionAttributeValues: {
':cat': category,
},
};
Note that I also moved the field name into an ExpressionAttributeNames value just for consistency and safety. It's a good practice because certain field names may break your requests if you do not.
I was looking for a solution that combined KeyConditionExpression with FilterExpression and eventually I worked this out.
Where aws is the uuid. Id is an assigned unique number preceded with the text 'form' so I can tell I have form data, optinSite is so I can find enquiries from a particular site. Other data is stored, this is all I need to get the packet.
Maybe this can be of help to you:
let optinSite = 'https://theDomainIWantedTFilterFor.com/';
let aws = 'eu-west-4:EXAMPLE-aaa1-4bd8-9ean-1768882l1f90';
let item = {
TableName: 'Table',
KeyConditionExpression: "aws = :Aw and begins_with(Id, :form)",
FilterExpression: "optinSite = :Os",
ExpressionAttributeValues: {
":Aw" : { S: aws },
":form" : { S: 'form' },
":Os" : { S: optinSite }
}
};

How to add a number to a DynamoDB number set using Node.js

I'm just trying to add a number to a number set in DynamoDB. This expression was working with an untyped list. But to save space since it will just be storing numbers I moved everything to plain number sets. Now no matter how much I tinker with it I can't get it to go through.
var phoneID = req.body.PhoneID;
var category = req.body.ratingCategory;
var ratingToAdd = [Number(req.body.rating)]
var dbparams = {
"TableName": "Venue_Ratings",
Key: {
"PhoneID" : phoneID
},
"UpdateExpression": "SET #categoryName = list_append(#categoryName, :rating)",
"ExpressionAttributeNames" : {
"#categoryName" : category
},
"ExpressionAttributeValues": {
":rating": ratingToAdd
},
"ReturnValues": "ALL_NEW"
};
This error is being thrown An operand in the update expression has an incorrect data type
I have also tried changing the update expression to an ADD expression instead like so ADD #categoryName :rating.
I've tried changing ratingToAdd to a plain number not in an array, a string in an array, and a plain string not in an array.
I'm calling the db using the docClient.update method.
I have verified that the sets in the db are in fact number sets and that they exist.
What am I missing here? Thanks for the help.
The below code should add the number to the Number Set (i.e. DynamoDB data type 'NS').
Use this function with ADD in UpdateExpression:-
docClient.createSet([Number(5)])
Code:-
var params = {
TableName : "Movies",
Key : {
"yearkey" : 2016,
"title" : "The Big New Movie 1"
},
UpdateExpression : "ADD #category :categorySet",
ExpressionAttributeNames: {
'#category' : 'category'
},
ExpressionAttributeValues: {':categorySet' : docClient.createSet( [Number(5)])},
ReturnValues: 'UPDATED_NEW'
};

how to use in operator in dynamo db

I have a user table with a field username. I need to write something equivalent to this in dynamo db: Select * from user where username in('a','b','c');
Adding more from code prosepective i have usernames in an array say var arr=['a','b','c'];
I so far tried this which is giving me zero result
this.dynamo.client.scanAsync({
TableName: this.dynamo.table('users'),
FilterExpression: 'username IN (:list)',
ExpressionAttributeValues: {
':list': arr.toString()
}
}).then((response) => {
console.log(response);
return {
userFriends: result.Item.friends
};
});
When I pass one element in array it give me result searching passed single element in user table but its not working with more than one element in array.
The individual users should be given as comma separated String variables. JavaScript array is equivalent to List in AWS DynamoDB data type. The DynamoDB can't compare the String data type in database with List attribute (i.e. Array in JavaScript).
var params = {
TableName : "Users",
FilterExpression : "username IN (:user1, :user2)",
ExpressionAttributeValues : {
":user1" : "john",
":user2" : "mike"
}
};
Construct the object from array for FilterExpression:-
Please refer the below code for forming the object dynamically based on Array value.
var titleValues = ["The Big New Movie 2012", "The Big New Movie"];
var titleObject = {};
var index = 0;
titleValues.forEach(function(value) {
index++;
var titleKey = ":titlevalue"+index;
titleObject[titleKey.toString()] = value;
});
var params = {
TableName : "Movies",
FilterExpression : "title IN ("+Object.keys(titleObject).toString()+ ")",
ExpressionAttributeValues : titleObject
};
Note:-
I don't think IN clause with 1000s of usernames is a good idea in terms of performance.

Add a string to a list in a dynamodb table

So I have been working with dynamodb in a nodejs express app and I have a specific table that has a field which is just empty lists and I want to append a string to the list.
The table name is "dev_entrants" and here is an example of the table:
----------------------------------------
primary Key Sort Key
eventID | eventType | entrants
----------------------------------------
Qual-919-w5wm1xhnw | Qual | []
----------------------------------------
So I receive a post request and then route it through express and it comes to a function where after doing type checks and all that I try to add stuff to my table with:
import AWS from 'aws-sdk';
const docClient = new AWS.DynamoDB.DocumentClient({region: 'us-west-1'});
const db = {
docClient
};
...
const entrantsParams = {
'TableName' : 'dev_entrants',
'Key': {
'eventID' : 'Qual-919-w5wm1xhnw',
},
'UpdateExpression' : "SET #attrName = list_append(#attrName, :attrValue)",
'ExpressionAttributeNames' : {
'#attrName' : 'entrants'
},
'ExpressionAttributeValues' : {
':attrValue' : ['joe'],
}
};
const updateEntrantsPromise = db.docClient.update(entrantsParams).promise();
(For the purpose of this example I have replaced variables with the strings they represent)
I have spent 6 hours or so reading through different documentation, as well as on stack overflow trying to find the answer.
The current error i get is the provided key element does not match the schema. If I remove the brackets around the attrValue then I get wrong operand type. I know the key exists in the table as i copied and pasted it from there. Also I am succesfully adding things to the table from another function so my connection is working fine. Can anyone please help me out?
You need to include the eventType in the Key object because your table schema has a sort key. If your table has a sort/partition key then you need to include it along with the primary key. Try it with the following:
const entrantsParams = {
'TableName' : 'dev_entrants',
'Key': {
'eventID' : 'Qual-919-w5wm1xhnw',
'eventType' : 'Qual'
},
'UpdateExpression' : "SET #attrName = list_append(if_not_exists(#attrName, :empty_list), :attrValue)",
'ExpressionAttributeNames' : {
'#attrName' : 'entrants'
},
'ExpressionAttributeValues' : {
':attrValue' : ['joe'],
':empty_list': []
}
};

Insertion order of array elements in MongoDB

I am having trouble preserving insertion order of a bulk insert into mongodb.
My applications requires posting data continuously (via HTTP POST # once a second) to a server. On the server side, the HTTP POST is handled and this data is stored in a capped collection in mongodb v2.4. The size of this capped collection is large (50MB). The format of this data is JSON and it has arrays in it like this:
{"Data":[{"Timestamp":"2014-08-02 13:38:18:852","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0021321268286556005,"b":-0.0010663296561688185}],"Monkec":[{"a":17.511783599853516,"c":-0.42092469334602356,"b":-0.42092469334602356}]},{"Timestamp":"2014-08-02 13:38:18:858","Rabbit":[{"a":-0.0021321268286556005,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.892329216003418,"c":-0.2339634746313095,"b":-0.2342628538608551}]},{"Timestamp":"2014-08-02 13:38:18:863","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0021321268286556005,"b":0.0021315941121429205}],"Monkec":[{"a":9.702523231506348,"c":-0.24264541268348694,"b":-0.2148033082485199}]},{"Timestamp":"2014-08-02 13:38:18:866","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.665101051330566,"c":-0.23366409540176392,"b":-0.2197430431842804}]},{"Timestamp":"2014-08-02 13:38:18:868","Rabbit":[{"a":-0.0021321268286556005,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.693991661071777,"c":-0.2936892807483673,"b":-0.22857467830181122}]},{"Timestamp":"2014-08-02 13:38:18:872","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.684710502624512,"c":-0.2296224981546402,"b":-0.13786330819129944}]},{"Timestamp":"2014-08-02 13:38:18:873","Rabbit":[{"a":-0.0021321268286556005,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.67707633972168,"c":-0.31255003809928894,"b":-0.1902543604373932}]},{"Timestamp":"2014-08-02 13:38:18:875","Rabbit":[{"a":-0.0021321268286556005,"c":-0.0010663296561688185,"b":0}],"Monkec":[{"a":9.739496231079102,"c":-0.1899549812078476,"b":-0.18845809996128082}]},{"Timestamp":"2014-08-02 13:38:18:878","Rabbit":[{"a":-0.003197923768311739,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.721234321594238,"c":-0.19205063581466675,"b":-0.17318984866142273}]},{"Timestamp":"2014-08-02 13:38:18:881","Rabbit":[{"a":-0.003197923768311739,"c":-0.003197923768311739,"b":0.0010657970560714602}],"Monkec":[{"a":9.78545093536377,"c":-0.2501298487186432,"b":-0.1953437775373459}]},{"Timestamp":"2014-08-02 13:38:18:882","Rabbit":[{"a":0,"c":-0.0010663296561688185,"b":0.0021315941121429205}],"Monkec":[{"a":9.686058044433594,"c":-0.21630020439624786,"b":-0.18247054517269135}]},{"Timestamp":"2014-08-02 13:38:18:884","Rabbit":[{"a":-0.0010663296561688185,"c":0,"b":0.0010657970560714602}],"Monkec":[{"a":9.67198657989502,"c":-0.18546432256698608,"b":-0.23156845569610596}]},{"Timestamp":"2014-08-02 13:38:18:887","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.640103340148926,"c":-0.23276595771312714,"b":-0.25686585903167725}]},{"Timestamp":"2014-08-02 13:38:18:889","Rabbit":[{"a":-0.0010663296561688185,"c":0,"b":0}],"Monkec":[{"a":9.739346504211426,"c":-0.19130218029022217,"b":-0.22602996230125427}]},{"Timestamp":"2014-08-02 13:38:18:891","Rabbit":[{"a":-0.0021321268286556005,"c":-0.0010663296561688185,"b":0}],"Monkec":[{"a":9.716594696044922,"c":-0.22543121874332428,"b":-0.19728973507881165}]},{"Timestamp":"2014-08-02 13:38:18:898","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.682914733886719,"c":-0.28680360317230225,"b":-0.1740879863500595}]},{"Timestamp":"2014-08-02 13:38:18:904","Rabbit":[{"a":-0.0010663296561688185,"c":0,"b":0.0021315941121429205}],"Monkec":[{"a":9.693093299865723,"c":-0.20866607129573822,"b":-0.2586621046066284}]},{"Timestamp":"2014-08-02 13:38:18:907","Rabbit":[{"a":-0.0021321268286556005,"c":-0.0010663296561688185,"b":0}],"Monkec":[{"a":9.690997123718262,"c":-0.18681152164936066,"b":-0.23216719925403595}]},{"Timestamp":"2014-08-02 13:38:18:910","Rabbit":[{"a":-0.003197923768311739,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.671688079833984,"c":-0.15388000011444092,"b":-0.2588118016719818}]},{"Timestamp":"2014-08-02 13:38:19:055","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0010663296561688185,"b":0}],"Monkec":[{"a":9.689650535583496,"c":-0.23605911433696747,"b":-0.1989363133907318}]}],"Serial":"35689"}
I am inserting this in mongodb (using NodeJs MongoClient driver) using a bulk insert command:
var length = 20; // only doing 20 inserts for testing purposes
for (var i = 0; i <length;i++) {
var bulk = col.initializeUnorderedBulkOp();
bulk.insert(data["Data"][i]); // data is my JSON data of interest
bulk.execute(function(err) {
if (err) {
return cb(err);
}
if (++inserted == length) {
cb(); // callback (not seen in this code snippet)
}
}); // end function
} // end of for loop
However, when I examine the entries in the database, they are not inserted in the order in which the data resides in the originating JSON array. My source data is in ascending Timestamp order, but a few entries in the mongodb capped collection are out of order. For instance, I see this:
{ "Timestamp" : "2014-08-02 13:38:18:910", "Rabbit" : [ { "a" : -0.003197923768311739, "c" : -0.0010663296561688185, "b" : 0.0010657970560714602 } ], "Monkec" : [ { "a" : 9.671688079833984, "c" : -0.15388000011444092, "b" : -0.2588118016719818 } ], "_id" : ObjectId("548e67a683946a5d25bc6d1a") }
{ "Timestamp" : "2014-08-02 13:38:18:884", "Rabbit" : [ { "a" : -0.0010663296561688185, "c" : 0, "b" : 0.0010657970560714602 } ], "Monkec" : [ { "a" : 9.67198657989502, "c" : -0.18546432256698608, "b" : -0.23156845569610596 } ], "_id" : ObjectId("548e67a683946a5d25bc6d13") }
{ "Timestamp" : "2014-08-02 13:38:18:904", "Rabbit" : [ { "a" : -0.0010663296561688185, "c" : 0, "b" : 0.0021315941121429205 } ], "Monkec" : [ { "a" : 9.693093299865723, "c" : -0.20866607129573822, "b" : -0.2586621046066284 } ], "_id" : ObjectId("548e67a683946a5d25bc6d18") }
so Timestamp" : "2014-08-02 13:38:18:910 is stored before "Timestamp" : "2014-08-02 13:38:18:884" even though it is the other way around in the source JSON.
How to ensure mongodb inserts data in the correct order? I also tried non bulk inserts (db.col.insert or db.col.insertOne) but still get this inconsistency. Thank You
If your queries aren't asking for any specific sorting/ordering, MongoDB makes no guarantees as to in which order they'll be returned.
How you insert your data is irrelevant. What you need to do is write your find query like this:
// Sort by ascending timestamp
db.my_collection.find({ ... }).sort({"TimeStamp": 1})
See http://docs.mongodb.org/manual/reference/method/cursor.sort/#cursor.sort for more information on how sorting works.
Of course, if you want to do that, you'll greatly benefit from adding an index on Timestamp to your collection (see http://docs.mongodb.org/manual/core/indexes/).

Resources