Retrieving rows from dynamoDB from Lambda based on primary key? - node.js

Below is the code of my Lambda function. I'm having trouble querying rows based on the timestamps. My plan is to get all the rows from 5 seconds before the current time to the current time in milliseconds. TimeMillis(Number) stores the current time in miiliseconds and it is the primary key and the range key is PhoneId(String). Please help me with the solution or is there any way to overcome the problem?
I'm not able to get the output, it is throwing error.
'use strict';
var AWS = require("aws-sdk");
AWS.config.update({
region: "us-east-1",
});
var docClient = new AWS.DynamoDB.DocumentClient();
exports.handler = function(event, context, callback) {
var timemillis = new Date().getTime();
var timemillis1 = timemillis - 5000;
var params = {
TableName: 'Readings',
KeyConditionExpression: "TimeMillis = :tm and TimeMillis BETWEEN :from AND :to",
ExpressionAttributeValues: {
":tm" : "TimMillis",
":from" : timemillis1,
":to" : timemillis
}
};
docClient.query(params, function(err, data) {
if(err){
callback(err, null);
}
else{
callback(null, data);
}
});
};
Here is my DynamoDB table image.

You cannot have multiple conditions inside a KeyConditionExpression. What you can do is use a FilterExpression with KeyConditionExpression to narrow down the result set.
Quoting from the documentation,
Use the KeyConditionExpression parameter to provide a specific value
for the partition key. The Query operation will return all of the
items from the table or index with that partition key value. You can
optionally narrow the scope of the Query operation by specifying a
sort key value and a comparison operator in KeyConditionExpression. To
further refine the Query results, you can optionally provide a
FilterExpression. A FilterExpression determines which items within the
results should be returned to you. All of the other results are
discarded.
Also for the test, only supported test for partition key is equality. Other conditions can be applied to sort key.
partitionKeyName = :partitionkeyval AND sortKeyName = :sortkeyval
Another way is to create a GSI which support further querying. By the way, traditional RDBMS thinking would not work best with DynamoDB. You can read about best practices here.

Related

How to get data from DynamoDB sorted by timestamp ? - NodeJs

I want to get data form dynamoDB, shorted by timestamp. Anyone can help? My code is given below.
const AWS = require("aws-sdk");
const dynamoDbClient = new AWS.DynamoDB.DocumentClient();
const USERS_TABLE = process.env.USERS_TABLE;
const getNews = async (req, res) => {
try {
//dynamodb params
const params = {
TableName: USERS_TABLE,
FilterExpression: "PK = :this",
ExpressionAttributeValues: { ":this": "newsTable" },
};
//get dynamodb data
const data = await dynamoDbClient.scan(params).promise();
res.status(200).send({ data: data });
} catch (e) {
return res.status(400).send({ message: e.message });
}
};
module.exports = { getNews };
Option 1: Keep Scan; Sort client-side
Works for small tables only. Single Scan call will scan only the first 1 MB of data in the table.
If you're doing scan operation as in your code example, it's impossible to get results sorted from DynamoDB. The only way to sort them is on client-side after you download all your data from database.
Replace:
res.status(200).send({ data: data });
With:
res.status(200).send({data: data.sort((a, b) => b.date - a.date)});
However, this is not recommended, since Scan operation without pagination will scan only 1st MB of data in your table. So you could get partial results. Possible solutions are:
Option 2: (recommended) Don't use Scan; Use Query; Sort by secondary key
This will work if you have your timestamp in the secondary key of the table
Don't use Scan; Use Query -- that way you can sort your data by SK (secondary key) by passing the ScanIndexForward: false to get the most recent results first.
Assuming you have such a table schema, where a timestamp is in the secondary key:
PK
SK
email
newsTable
2022-01-01
some-1#example.com
newsTable
2022-02-01
some-2#example.com
newsTable
2022-03-01
some-3#example.com
You can change your code from:
const params = {
TableName: USERS_TABLE,
FilterExpression: 'PK = :this',
ExpressionAttributeValues: {':this': 'newsTable'},
};
//get dynamodb data
const data = await dynamoDbClient.scan(params).promise();
To:
const params = {
TableName: USERS_TABLE,
KeyConditionExpression: 'PK = :this',
ExpressionAttributeValues: {':this': 'newsTable'},
ScanIndexForward: false,
};
//get dynamodb data
const data = await dynamoDbClient.query(params).promise();
And it will return results sorted from database already.
If you don't have a timestamp in your secondary key, and you cannot add it, you can add Local Secondary Index or Global Secondary Index.
Option 3: Keep Scan, but paginate; Sort client-side
Works if you cannot change DB schema and cannot switch your code to the Query operation.
Beware, it will be much more expensive, much slower. The larger the table, the slower it gets.
If you absolutely need to use Scan, you need to paginate through all the pages of the Scan operation, and then sort results in the JS code, like I described before. I've developed a handy library that makes scanning in parallel and supports pagination.

I can't numerically order the results of a GET request from Dynamo DB with Lambda

I am trying to return data from a DynamoDB table with results ordered numerically by the Primary Sort Key. I am using a Lambda scan function to return the data but it is not returning in numerical order.
The Primary Sort Key is 'time', how can i achieve this?
const AWS = require('aws-sdk');
const dynamodb = new AWS.DynamoDB({region: 'eu-west-2', apiVersion: '2012-08-10'});
exports.handler = (event, context, callback) => {
const params = {
TableName: "finalTrickstar",
};
dynamodb.scan(params, function(err, data){
if (err) {
console.log(err);
callback(err);
} else {
console.log(data);
const items = data.Items.map(
(dataField) => {
return {time: dataField.time.S, day: dataField.day.S, show: dataField.show.S, showID: dataField.showID.S};
}
);
callback(null, items);
}
});
};
I thought having a Primary Sort Key would return results ordered by the key but instead they are seemingly not ordered at all.
I don't know what you mean by "Primary Sort Key" but DynamoDB supports two types of primary keys:
partition key
partition key + sort key (composite primary key)
If your table has a primary key composed of a partition key and a sort key then the data will be kept and retrieved sorted by the sort key. If the data type of the sort key is Number, the results are returned in numeric order; otherwise, the results are returned in order of UTF-8 bytes. By default, the sort order is ascending. To reverse the order, set the ScanIndexForward parameter to false.
Make sure your table is configured correctly and that you're not expecting the data to be sorted in any other way than by the UTF-8 bytes of the sort key if it's not a number.

How can I filter entries in dynamodb which has time_stamp more than 1 day?

I have a lambda function which queries dynamoDb table userDetailTable, and I want to filter only the entries whose timestamp(recorded in ms) has exceeded 1 day (86400000 ms) when subtracted from (new Date.getTime()). Can anyone suggest me the way for doing it in the right way ?
Dynamo Table has GSIndex as user_status which has value 'active' for all the entries and epoch_timestamp(timestamp in ms) as attribute used for filter expression.
In Lambda I am checking epoch_timestamp and trying to subtract epoch_timestamp with (new Date.getTime()) in the query, which I am not sure is even possible. Below is the code which has my query.
function getUserDetails(callback){
var params = {
TableName: 'userDetailTable',
IndexName: 'user_status-index',
KeyConditionExpression: 'user_status = :user_status',
FilterExpression: `expiration_time - ${new Date().getTime()} > :time_difference`,
ExpressionAttributeValues: {
':user_status': 'active',
':time_difference': '86400000' // 1 day in ms
}
};
docClient.query(params, function(err, data) {
if(err) {
callback(err, null)
} else{
callback(null, data)
}
})
}
Here's a rewrite of your code:
function getUserDetails(callback){
var params = {
TableName: 'userDetailTable',
IndexName: 'user_status-index',
KeyConditionExpression: 'user_status = :user_status',
FilterExpression: 'epoch_timestamp > :time_threshold_ms',
ExpressionAttributeValues: {
':user_status': 'active',
':time_threshold_ms': Date.now() - 86400000
}
};
docClient.query(params, function(err, data) {
if(err) {
callback(err, null)
} else{
callback(null, data)
}
})
}
Specifically, in the FilteExpression you cannot compute any date. Instead you should compare the item's epoch_timestamp attribute with time_threshold_ms which you compute once (for all items inspected by the query) at ExpressionAttributeValues
Please note though that you are can make this more efficient if you define a GSI which uses epoch_timestamp as its sort key (user_status can remain the partition key). Then, instead of placing the condition in the FilterExpression you will need to move it into KeyConditionExpression.
Also, when you use a FilterExpression you need to check the LastEvaluatedKey of the response. If it is not empty you need to issue a followup query with LastEvaluatedKey copied into the request's ExclusiveStartKey. Why? due to filtering it is possible that you will get no results from the "chunk" (or "page") examined by DDB. DDB only examines a single "chunck" at each query invocation. Issuing a followup query with ExclusiveStartKey will tell DDB to inspect the next "chunk".
(see https://dzone.com/articles/query-dynamodb-items-withnodejs for further details on that)
Alternatively, if you do not use filtering you are advised to use pass a Limit value in the request to tell DDB to stop after the desired number of items. However, if you do use filtering do not pass a Limit value as it will reduce the size of the "chunk" and you will need to do many more followup queries until you get your data.
You cannot perform a calculation in the filter expression but you can calculate it outside and use the result with a new inequality.
I think you are looking for items expiring after one day from now.
Something like
FilterExpression: 'expiration_time > :max_time',
ExpressionAttributeValues: {
':user_status': 'active',
':max_time': new Date().getTime() + 86400000 // 1 day in ms // i.e. one day from now.
}

dynamodb: query with hash key only

I have this table:
DomainId string HashKey
EmailId string RangeKey
I was wondering if it's possible query this table with HashKey only, like this:
var AWS = require("aws-sdk");
var client = new AWS.DynamoDB.DocumentClient();
var dm = 'infodinamica.cl';
//Set params
var params = {
TableName : 'table-name',
KeyConditionExpression: "DomainId = :dm",
ExpressionAttributeValues: {
":dm": dm
},
Select: 'COUNT'
};
client.query(params, (err, data) => {
if(err)
console.log(JSON.stringify(err, null, 2));
else
console.log(JSON.stringify(data, null, 2));
}
ps: note that this table has HashKey and RangeKey.
Yes, it is possible to query the data using Hash Key only using query API.
Use the KeyConditionExpression parameter to provide a specific value
for the partition key. The Query operation will return all of the
items from the table or index with that partition key value. You can
optionally narrow the scope of the Query operation by specifying a
sort key value and a comparison operator in KeyConditionExpression.
You can use the ScanIndexForward parameter to get results in forward
or reverse order, by sort key.

AWS DynamoDB Node.js scan- certain number of results

I try to get first 10 items which satisfy condition from DynamoDB using lambda AWS. I was trying to use Limit parameter but it is (basis on that website)
https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/DynamoDB.html#scan-property
"maximum number of items to evaluate (not necessarily the number of matching items)".
How to get 10 first items which satisfy my condition?
var AWS = require('aws-sdk');
var db = new AWS.DynamoDB();
exports.handler = function(event, context) {
var params = {
TableName: "Events", //"StreamsLambdaTable",
ProjectionExpression: "ID, description, endDate, imagePath, locationLat, locationLon, #nm, startDate, #tp, userLimit", //specifies the attributes you want in the scan result.
FilterExpression: "locationLon between :lower_lon and :higher_lon and locationLat between :lower_lat and :higher_lat",
ExpressionAttributeNames: {
"#nm": "name",
"#tp": "type",
},
ExpressionAttributeValues: {
":lower_lon": {"N": event.low_lon},
":higher_lon": {"N": event.high_lon}, //event.high_lon}
":lower_lat": {"N": event.low_lat},
":higher_lat": {"N": event.high_lat}
}
};
db.scan(params, function(err, data) {
if (err) {
console.log(err); // an error occurred
}
else {
data.Items.forEach(function(record) {
console.log(
record.name.S + "");
});
context.succeed(data.Items);
}
});
};
I think you already know the reason behind this: the distinction that DynamoDB makes between ScannedCount and Count. As per this,
ScannedCount — the number of items that were queried or scanned,
before any filter expression was applied to the results.
Count — the
number of items that were returned in the response.
The fix for that is documented right above this:
For either a Query or Scan operation, DynamoDB might return a LastEvaluatedKey value if the operation did not return all matching items in the table. To get the full count of items that match, take the LastEvaluatedKey value from the previous request and use it as the ExclusiveStartKey value in the next request. Repeat this until DynamoDB no longer returns a LastEvaluatedKey value.
So, the answer to your question is: use the LastEvaluatedKey from DynamoDB response and Scan again.

Resources