I have a DynamoDB table that looks like this:
(there are like 1500000 entries like this one with different timestamps)
I have 2 GSI:
I'm trying to retrieve all the rows in the table for a given day.
This is what my code looks like (NodeJS):
var AWS = require("aws-sdk");
AWS.config.update({accessKeyId: "", secretAccessKey: ""});
AWS.config.update({region: 'us-east-1'});
var docClient = new AWS.DynamoDB.DocumentClient();
var params = {
TableName: "QfGamingTransactionsProd",
IndexName: 'Result-RedeemedAt-index',
KeyConditionExpression: "#rs = :result and begins_with (#rat, :Rat)",
ExpressionAttributeNames: {
"#rs": "Result",
"#rat": "RedeemedAt"
},
ExpressionAttributeValues: {
":result": "SUCCESS",
":Rat": "2016-10-20"
}
};
docClient.query(params, function (err, data) {
if (err) {
console.error("Unable to query. Error:", JSON.stringify(err, null, 2));
} else {
console.log("\nQuery succeeded. \n");
console.log("- Total", data.Count);
}
});
It seems to be working, but i'm getting (way) less results than expected. This same code works fine on a smaller Table.
Similar results with "Scan".
What am I missing?
According to the size of each record, the number of records retrieved will change since DynamoDB has a size limitation for query (1MB).
In DynamoDB, a query will return only 1MB of data.
But we can paginate through the results. It may solve your issue.
Data returned by the query will contain a "LastEvaluatedKey", if data that satisfy that query is not fully retrieved. So we have to set the "LastEvaluatedKey" as the "ExclusiveStartKey". Then the query will retrieve the remaining data. By recursively following this method, we will get the complete data.
var data = [];
async.until(function () {
return scanComplete;
},
function (callback) {
docClient.query(params, function (err, result) {
if (err) {
console.log(err);
} else {
data.push(result.Items);
if (typeof (result.LastEvaluatedKey) === 'undefined') {
scanComplete = true;
//fully retrieved
} else {
params.ExclusiveStartKey = result.LastEvaluatedKey;
}
if (!scanComplete) {
}
}
callback(err);
});
},
// this runs when the loop is complete or returns an error
function (err) {
if (err) {
console.log('error in processing scan ');
console.log(err);
reject(err);
} else {
resolve(data);
}
});
This is because by default DynamoDB will return only 1mb of data at a time, But there is a way to solve this issue.
You need to change your implementation like following
Step 1: Call DyanmoDB table, it will return 1st 1mb of data, with that it will return "Next Evaluated Key"
Step 2: Call Dynamodb table again but this time you pass "Next Evaluated Key" in "Exclusive Start key" to get new set of data
Step3: Check if "Next Evaluated Key" is still available then repeat step2 else you got all the data for that key
Here are the references:
About query limits
Blog on how to implement this code
Hope that helps
Related
I have one Dynamodb table. I created another one just like it. I created a Lambda trigger on the first table to capture all the new, updated and deleted items in the first table. I invoke a Lambda function to write the items, update the items or delete the items in the second table. While I can see the events in Cloudwatch along with my console statements the "put" or "putItem" never execute and item never gets inserted (or updated or deleted) in the second table. So I need some help.
I've tried a couple of different versions of the following code:
var AWS = require("aws-sdk");
AWS.config.update({region: 'us-east-2'});
var ddb = new AWS.DynamoDB.DocumentClient();
exports.handler = (event, context, callback) => {
console.log(JSON.stringify(event, null, 2));
event.Records.forEach((record) => {
console.log('New MY Stream record: ', JSON.stringify(record, null, 2));
if (record.eventName == 'INSERT') {
console.log('New MY INSERTING RECORD');
ddb.put({
"TableName": 'my_jobs_holding',
"Item": {
"attributeone" : {"S":record.dynamodb.NewImage.attributeone},
"attributetwo" : {"S":record.dynamodb.NewImage.attributetwo},
"attributethree" : {"S":record.dynamodb.NewImage.attributethree},
"attributefour" : {"S":record.dynamodb.NewImage.attributefour,
"attributefive" : {"S":record.dynamodb.NewImage.attributefive},
}
}, function(result) {
console.log('MY HERE 1' + JSON.stringify(result, null, 2));
result.on('data', function(chunk) {
console.log("MY HERE" + chunk);
});
});
console.log("New MY COMPLETED INSERTION MODULE");
}
if (record.eventName == 'DELETE') {
console.log('New MY DELETING RECORD');
var params = {
TableName: 'my_jobs_holding',
Item: {
"attributeone" : {"S":record.dynamodb.NewImage.attributeone},
"attributetwo" : {"S":record.dynamodb.NewImage.attributetwo},
"attributethree" : {"S":record.dynamodb.NewImage.attributethree},
"attributefour" : {"S":record.dynamodb.NewImage.attributefour,
"attributefive" : {"S":record.dynamodb.NewImage.attributefive},
}
};
ddb.deleteItem(params, function(err, data) {
if (err) {
console.log("New MY DELETING Error", err.stack);
} else {
console.log("New MY DEL Success", JSON.stringify(data, null, 2));
}
});
}
if (record.eventName == 'MODIFY') {
console.log('New MY MODIFYING RECORD');
var params = {
TableName: 'my_jobs_holding',
Item: {
"attributeone" : {"S":record.dynamodb.NewImage.attributeone},
"attributetwo" : {"S":record.dynamodb.NewImage.attributetwo},
"attributethree" : {"S":record.dynamodb.NewImage.attributethree},
"attributefour" : {"S":record.dynamodb.NewImage.attributefour,
"attributefive" : {"S":record.dynamodb.NewImage.attributefive},
}
};
ddb.updateItem(params, function(err,data) {
if (err) {
console.log("New MY UPDATING Error", err);
} else {
console.log("New MY UPDATING Success",data);
}
});
}
});
callback(null, `New MY Successfully processed records.`);
};
And I have tried the above where the Insert was like the update and delete defining params first and then using putItem with no luck.
I read somewhere that I should try to test it through the AWS Lambda console. I did and it just spins and spins and spins. but if I actually go to the Dynamodb 1 table and insert an item I can see the results pretty quickly in the Cloudwatch table. I have played around with this code several times triggering errors like "putItem is not a valid function" or the time out errors people reported etc. But I have never been able to get an item to insert into the second Dynamodb table. (Therefore I have never tried the delete or the update.) Also I have just used new AWS.DynamoDB() as well. AND I have commented out the callback at the bottom too. NOTHING gets the item inserted into the second Dynamodb table. I've checked the role/permissions too.
What am I missing here?
Any one have any thoughts here? I would DEFINITELY appreciate it.
Seems like a super basic task, but I just cannot get this to work (not very experienced with mongo or nodeJS).
I have an array of records. I need to check the DB to see if any records with a matching name already exist and if they do grab that record so I can update it.
Right now I am trying this
function hit_the_db(db, record_name, site_id) {
return new Promise((resolve, reject) => {
var record = db.collection('' + site_id + '_campaigns').find({name: record_name}).toArray(function(err, result) {
if (err) {
console.log('...error => ' + err.message);
reject(err);
} else {
console.log('...promise resolved...');
resolve(result);
}
});
console.log('...second layer of select successful, returning data for ' + record.length + ' records...');
return record;
});
}
This query works in another part of the app so I tried to just copy it over, but I am not getting any records returned even though I know there should be with the data I am sending over.
site_id is just a string that would look like ksdlfnsdlfu893hdsvSFJSDgfsdk. The record_name is also just a string that could really be anything but it is previously filtered so no spaces or special characters, most are something along these lines this-is-the-name.
With the names coming through there should be at least one found record for each, but I am getting nothing returned. I just cannot wrap my head around using mongo for these basic tasks, if anyone can help it would be greatly appreciated.
I am just using nodeJS and connecting to mongoDB, there is no express or mongoose or anything like that.
The problem here is that you are mixing callback and promises for async code handling. When you call:
var record = db.collection('' + site_id + '_campaigns').find({name: record_name}).toArray(function(err, result) {
You are passing in a callback function, which will receive the resulting array of mongo records in a parameter called result, but then assigning the immediate returned value to a variable called 'record', which is not going to contain anything.
Here is a cleaned up version of your function.
function hit_the_db(db, site_id, record_name, callback) {
// Find all records matching 'record_name'
db.collection(site_id + 'test_campaigns').find({ name: record_name }).toArray(function(err, results) {
// matching records are now stored in 'results'
if (err) {
console.log('err:', err);
}
return callback(err, results);
});
}
Here is optional code for testing the above function.
// This is called to generate test data
function insert_test_records_callback(db, site_id, record_name, insert_count, callback) {
const testRecords = [];
for (let i = 0; i < insert_count; ++i) {
testRecords.push({name: record_name, val: i});
}
db.collection(site_id + 'test_campaigns').insertMany(testRecords, function(err, result) {
return callback(err);
});
}
// This cleans up by deleting all test records.
function delete_test_records_callback(db, site_id, record_name, callback) {
db.collection(site_id + 'test_campaigns').deleteMany({name: record_name}, function(err, result) {
return callback(err);
});
}
// Test function to insert, query, clean up test records.
function test_callback(db) {
const site_id = 'ksdlfnsdlfu893hdsvSFJSDgfsdk';
const test_record_name = 'test_record_callback';
// First call the insert function
insert_test_records_callback(db, site_id, test_record_name, 3, function(err) {
// Once execution reaches here, insertion has completed.
if (err) {
console.log(err);
return;
}
// Do the query function
hit_the_db(db, site_id, test_record_name, function(err, records) {
// The query function has now completed
console.log('hit_the_db - err:', err);
console.log('hit_the_db - records:', records);
delete_test_records_callback(db, site_id, test_record_name, function(err, records) {
console.log('cleaned up test records.');
});
});
});
}
Output:
hit_the_db - err: null
hit_the_db - records: [ { _id: 5efe09084d078f4b7952dea8,
name: 'test_record_callback',
val: 0 },
{ _id: 5efe09084d078f4b7952dea9,
name: 'test_record_callback',
val: 1 },
{ _id: 5efe09084d078f4b7952deaa,
name: 'test_record_callback',
val: 2 } ]
cleaned up test records.
I built an AWS Lambda that scans and filters my DynamoDB and returns to my AWS API. Thereby I had three possible search parameters (itemname, author and type), whereas I don’t know which of them are used in a query. At first, I implemented a version, in which all search parameters where hard coded. As a result, I got errors if not all search parameters where defined. At the end I reworked the code to build individual scan params dependent on the inputted search parameter.
The code works fine, but I think that there are better implementations for this problem, maybe you can give me some improvement advice. Otherwise this will help people which face the same issues with their optional search parameters.
var AWS = require('aws-sdk');
var docClient = new AWS.DynamoDB.DocumentClient();
//This is the Lambda function
exports.handler = function(event, context, callback)
{
//In case we query without query attributes
if(!event.hasOwnProperty("queryStringParameters"))
{
console.log("NO queryStringParameters FOUND");
var emptyparams =
{
TableName: "blackboard-items",
};
docClient.scan(emptyparams, onScan);
return;
}
//we want to tailor this attributes for the params for docClient.scan(params, onScan);
var queryParam = event["queryStringParameters"];
var filterexpression = "";
var expressionAttributeNames = {}; //Instantiate
var expressionAttributeValues = {};
console.log("QUERY PARAMETERS: " + JSON.stringify(queryParam));
//Do we look for an author?
if(queryParam.hasOwnProperty("author"))
{
console.log("FOUND AUTHOR");
filterexpression += "contains(#author, :author)"; //Collect scan params
expressionAttributeNames['#author'] = 'author';
expressionAttributeValues[':author'] = event["queryStringParameters"]["author"];
}
//Do we look for an itemname?
if(queryParam.hasOwnProperty("itemname"))
{
console.log("FOUND ITEMNAME");
if(filterexpression !== "")
filterexpression += " AND contains(#itemname, :itemname)";
else
filterexpression += "contains(#itemname, :itemname)";
expressionAttributeNames['#itemname'] = 'itemname';
expressionAttributeValues[':itemname'] = queryParam["itemname"];
}
//Do we look for a type?
if(queryParam.hasOwnProperty("type"))
{
console.log("FOUND TYPE");
if(filterexpression !== "")
filterexpression += " AND #type = :type";
else
filterexpression += "#type = :type";
expressionAttributeNames['#type'] = 'type';
expressionAttributeValues[':type'] = event["queryStringParameters"]["type"];
}
//Build params based on the tailored parts
var params =
{
TableName: "blackboard-items",
FilterExpression: filterexpression,
ExpressionAttributeNames: expressionAttributeNames,
ExpressionAttributeValues: expressionAttributeValues,
};
//Use tailored params for scan()
docClient.scan(params, onScan);
var count = 0;
function onScan(err, data)
{
if (err)
{
console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
}
else
{
console.log("Scan succeeded.");
data.Items.forEach(function(itemdata)
{
console.log("Item :", ++count,JSON.stringify(itemdata));
});
// continue scanning if we have more items
if (typeof data.LastEvaluatedKey != "undefined")
{
console.log("Scanning for more...");
params.ExclusiveStartKey = data.LastEvaluatedKey;
docClient.scan(params, onScan);
}
}
var response =
{
"isBase64Encoded": false,
"statusCode": "200",
"headers": { },
"body": JSON.stringify(data.Items)
};
callback(null, response);
}
};
Note:
The primary key of the DB is "itemname", but I will rework the DB design soon to have a sort key.
DynamoDB is very limited on its query capabilities and, as such, you should avoid scan at ALL costs. Every scan operation will consume your RCUs for every item it reads. If your table has many items, it can use up your RCUs quite quickly.
If you want to query by those 3 attributes, then DynamoDB may not be the best database for your use case. If you can narrow your query down to 1 attribute at a time instead, you can then use Global Secondary Indexes. That way you can query based on the author or type. You can see this answer on how to query for GSIs in DynamoDB
I have the following code that scan's the DynamoDB table and returns a count of the number of people who have the key value test = true. For some reason, this code is not scanning the whole table. Does anyone know why?
var aws = require('aws-sdk');
var config = require('./config.js');
aws.config.update({accessKeyId: config.key, secretAccessKey: config.secret});
aws.config.update({region: 'us-east-1'});
function getItems() {
var db = new aws.DynamoDB.DocumentClient();
db.scan({
TableName : config.db,
}, function(err, data) {
if (err) { console.log(err); return; }
var count = 0;
for (var ii in data.Items) {
ii = data.Items[ii];
if (ii.setRemoveBrandingEmailOptin) {
console.log(ii.test);
count += 1;
}
}
console.log(count);
});
}
getItems();
Per the documentation:
If the total number of scanned items exceeds the maximum data set size
limit of 1 MB, the scan stops and results are returned to the user as
a LastEvaluatedKey value to continue the scan in a subsequent
operation. The results also include the number of items exceeding the
limit.
You shouldn't need to dump the entire table into your application for a simple count anyway. You're doing this in the most inefficient way possible. Try something like this:
db.scan({
TableName : config.db,
Select: 'COUNT',
FilterExpression: "#emailOptInField = :emailOptInValue",
ExpressionAttributeNames: {
"#emailOptInField": "setRemoveBrandingEmailOptin",
},
ExpressionAttributeValues: {
":emailOptInValue": true
}
}, function(err, data) {
if (err) { console.log(err); return; }
var count = data.Count;
console.log(count);
});
I am creating a weather station using the Particle Electron and AWS. I have managed to get the returned data sent to a DynamoDB table "weather" which contains all of the weather data with the following schema (with included sample values):
Item{13}
deviceId: 540056000a51343334363138 (String) (Primary Partition Key)
tm: 1458754711 (Number) (Primary Sort Key)
batSoC: 89 (String)
batV: 4.01 (String)
hum: 27.9 (String)
lat: 41.2083 (String)
lon: -73.3439 (String)
pres: 968.4 (String)
temp: 19.8 (String)
uvI: 0.1 (String)
wDir: 0 (String)
wGst: 0.0 (String)
wSpd: 0.0 (String)
as well as a separate "weather_index" table which contains only the deviceId and tm attributes for the most recent data that was written to the main table (kind of like an atomic counter but for a periodically updated unix timestamp value). So if the "weather_index" item above was the most recent entry, the item in the "weather_index" table would look like this:
Item{2}
deviceIdString: 540056000a51343334363138 (String) (Primary Partition Key)
tmNumber: 1458754711 (Number)
I am currently trying to write a very basic web frontend in Node.js (which, prior to this project, I have had no experience with, so I am still learning) and can't figure out how to:
Perform a DynamoDB getItem which contains a parameter retrieved via a previous getItem. Like:
latestTime = getItem(weather_index, deviceId) // Gets the time "tm" of the most recent weather observation and stores it in "latestTime"
// Where "weather_index" is the table name
currentWeather = getItem(deviceId, tm) // Gets the weather observation for the specified "tm" value and stores it in "currentWeather"
// Where "tm" is the unix time-stamp of the most recent observation
I then want to be able to print the individual values to the terminal/webpage/carrier pigeon/etc... (Something along the lines of currentWeather.deviceId, currentWeather.tm, currentWeather.batSoC, etc...
I have the following code that I can't really make work properly:
/*
* Module dependencies
*/
var AWS = require('aws-sdk')
// weathermon_dev credentials
AWS.config.update({accessKeyId: 'REDACTED for obvious reasons', secretAccessKey: 'This bit too'});
// Select AWS region
AWS.config.update({region: 'us-east-1'});
var db = new AWS.DynamoDB();
// db.listTables(function(err,data) {
// console.log(data.TableNames);
// });
var time = Date.now() / 1000;
time = Math.round(time);
//console.log("Time: ");
//console.log(time);
time = Math.round(time);
var deviceId = "540056000a51343334363138"
var params = {
Key: {
deviceId: {S: deviceId}
},
TableName: 'weather_index'
};
var timeJson;
db.getItem(params, function(err,data) {
if (err) console.log(err); // an error occurred
else console.log(data); // successful response
var timeJson = JSON.parse(data);
})
// var timeJson = JSON.parse(data);
// var itemTime = timeJson.item;
console.log("timeJSON: " + timeJson);
// console.log("itemTime: " + itemTime);
var params = {
Key: {
deviceId: {S: deviceId},
time: {N: 'tm'}
},
TableName: 'weather'
};
db.getItem(params, function(err, data) {
if (err) console.log(err); // an error occurred
else console.log(data); // successful response
})
Any help would be greatly appreciated.
You need to look into how NodeJS asynchronous calls work. You would need to wait until the callback from the first getItem() is called before you perform the second getItem().
I've rewritten the relevant part of your code here, to show you what I'm talking about, but I recommend you try to understand why the code needs to be written in this way instead of just copy/pasting it.
var deviceId = "540056000a51343334363138"
var params = {
Key: {
deviceId: {S: deviceId}
},
TableName: 'weather_index'
};
var timeJson;
db.getItem(params, function(err,data) {
if (err) console.log(err); // an error occurred
else {
console.log(data); // successful response
var timeJson = JSON.parse(data);
console.log("timeJSON: " + timeJson);
// Inside this callback we have the weather_index tm value,
// so query the weather table here.
var params = {
Key: {
deviceId: {S: deviceId},
time: {N: 'tm'}
},
TableName: 'weather'
};
db.getItem(params, function(err, data) {
if (err) console.log(err); // an error occurred
else {
console.log(data); // successful response
// TODO: Use the database response data here
}
});
}
});