Get all items from a table without Scan - node.js

At the moment I have a function to get all items from a DynamoDB table using the SCAN option. This is an expensive way to do it and I would prefer using the QUERY option. But looking at the docs there does not seem to be a simple way to retrieve all items using the QUERY option - it expects some sort of condition.
Example
var params = {
TableName : "Movies",
KeyConditionExpression: "#yr = :yyyy",
ExpressionAttributeNames:{
"#yr": "year"
},
ExpressionAttributeValues: {
":yyyy": 1985
}
};
docClient.query(params, function(err, data) {
if (err) {
console.error("Unable to query. Error:", JSON.stringify(err, null, 2));
} else {
console.log("Query succeeded.");
data.Items.forEach(function(item) {
console.log(" -", item.year + ": " + item.title);
});
}
});
Expected
var params = {
TableName : "Movies"
};
docClient.query(params, function(err, data) {
if (err) {
console.error("Unable to query. Error:", JSON.stringify(err, null, 2));
} else {
console.log("Query succeeded.");
data.Items.forEach(function(item) {
console.log(" -", item.year + ": " + item.title);
});
}
});
Is it possible to retrieve all data from a table using QUERY? I thought of using BEGINS_WITH or such but all the primary keys are different/random and do not start with a specific character or phrase.

Technically, a query of all items in an Amazon DynamoDB table would return the same amount of data that a scan returns, so there should be no difference in cost.
The usual reduced efficiency of a scan operation is due to the fact that it has to read the whole table and then filters out values to provide the result you want, essentially adding the extra step of removing data from the result set. If you want to read the whole table without filtering, both scan and query have to retrieve all values and there is no additional filtering step.

The only way to do via query would be to loop over every partition key individually.
I'd suggest you look at a secondary index built around your query which will be more efficient: https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/SecondaryIndexes.html

If you want to get all data you can use scan all data, but I recommend you to get data by limit and pagination because it can kill a lot of memory resources if you have millions of data at dynamodb.
this approach for getting all your data
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({
apiVersion: '2012-08-10',
region: 'ap-southeast-1' // put your region
});
exports.handler = async (event, context, callback) => {
const tableName = event.params.querystring.tablename;
let params = {
TableName: tableName
};
let scanResults = [];
let items;
do {
items = await docClient.scan(params).promise();
items.Items.forEach((item) => scanResults.push(item));
params.ExclusiveStartKey = items.LastEvaluatedKey;
} while (typeof items.LastEvaluatedKey != "undefined");
callback(null, scanResults);
};
But with the approach below, after you get data, you need to post the LastEvaluatedKey from the frontend to params and you can use it as ExclusiveStartKey.
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({
apiVersion: '2012-08-10',
region: 'ap-southeast-1' // put your region
});
exports.handler = async (event, context, callback) => {
const tableName = event.params.querystring.tablename;
let pageSize = event.params.querystring.pagesize;
let lastItem = event.params.querystring.lastItem;
try {
const params = {
TableName: tableName,
Limit: pageSize,
};
if (lastItem) {
params.ExclusiveStartKey = { id: lastItem};
}
const response = await docClient.scan(params).promise();
return {
items: response.Items,
lastItem: response.LastEvaluatedKey
};
} catch (error) {
throw error;
}
};

Related

Delete all items in Dynamodb using Lambda?

Using Lambda (node.js) - how to delete all the items in the Dynamodb table?
There are 500K rows in the table
I have tried using scan method and then loop through each item and then using delete method. It only allow up to 3000 rows only.
Code
exports.handler = function(context, callback) {
getRecords().then((data) => {
data.Items.forEach(function(item) {
deleteItem(item.Id).then((data1) => {
});
});
});
};
var deleteItem = function(id) {
var params = {
TableName: "TableName",
Key: {
"Id": id
},
};
return new Promise(function(resolve, reject) {
client.delete(params, function(err, data) {
if (err) {
reject(err);
} else {
resolve();
}
});
});
}
function getRecords() {
var params = {
TableName: 'TableName',
IndexName: 'Type-index',
KeyConditionExpression: 'Type = :ty',
ExpressionAttributeValues: {
':ty': "1"
},
ProjectionExpression: "Id",
};
return new Promise(function(resolve, reject) {
client.query(params, function(err, data) {
if (err) {
reject(err);
} else {
resolve(data);
}
});
});
}
There is already one right answer, but here is another code snippet to delete all records from Dynamo DB.
const AWS = require("aws-sdk");
AWS.config.update({
region: "us-east-1",
});
const docClient = new AWS.DynamoDB.DocumentClient();
const getAllRecords = async (table) => {
let params = {
TableName: table,
};
let items = [];
let data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
while (typeof data.LastEvaluatedKey != "undefined") {
params.ExclusiveStartKey = data.LastEvaluatedKey;
data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
}
return items;
};
const deleteItem = (table, id) => {
var params = {
TableName: table,
Key: {
id: id,
},
};
return new Promise(function (resolve, reject) {
docClient.delete(params, function (err, data) {
if (err) {
console.log("Error Deleting ", id,err);
reject(err);
} else {
console.log("Success Deleting ", id,err);
resolve();
}
});
});
};
exports.handler = async function (event, context, callback) {
try {
const tableName = "<table>";
// scan and get all items
const allRecords = await getAllRecords(tableName);
// delete one by one
for (const item of allRecords) {
await deleteItem(tableName, item.id);
}
callback(null, {
msg: "All records are deleted.",
});
} catch (e) {
callback(null, JSON.stringify(e, null, 2));
}
};
A Scan operation consumes Read capacity. Each Read returns up to 4 kb of data. When this limit is reached, the Scan returns only what it has found until there. If you need more, you need to issue another Scan request.
This, you'll need two loops: 1) loop to delete all records returned at each Scan; 2) loop to keep scanning multiple times, until you reach the end of the table
Make sure you use consistent Reads or wait 1 or 2 second(s) before issuing another Scan, otherwise you may get repeated items in different Scans.
exports.handler = function(context, callback) {
clearRecords();
};
clearRecords = function() {
getRecords().then((data) => {
data.Items.forEach(function(item) {
deleteItem(item.Id).then((data1) => {});
});
clearRecords(); // Will call the same function over and over
});
}
Observe that Lambda has a timeout limit of 15 minutes. Since you have 500K items in your table, it's likely that your Lambda will timeout and you'll need to trigger it more than once. You could also make your Lambda call itself after 14:50, for example, just take a look at the AWS SDK documentation for triggering Lambda functions. For this matter, you might also want to check the getRemainingTimeInMillis() method from the context object.

Stop Lambda Triggers

I have some records in dynamoDB and trigger associated with it configured via serverless.yml file.
Below the configuration events:
- stream:
type: dynamodb
arn:
Fn::GetAtt:
- myTable
- StreamArn
batchSize: 1
But I have a requirement that based on some flag, I should stop the execution of all records (lambda) if one record ends in failure and subsequently all the records for that transaction has to be deleted from dynamoDB.
I noticed that even after deleting the records from DynamodB, the trigger still continues, Please is there a way to get the exisiting triggers with respect to the context and stop all ?
P.S I am using Nodejs
Code and Steps
Checker.js -- Talks to external system and adds the records to the dynamoDB table specified and exits. The below function Validate data is called via events from dynamodB. The serverless configuration for it is below
ValidateData :
handler: ValidateData.handler
memorySize: 1536
timeout: 300
events:
- stream:
type: dynamodb
arn:
Fn::GetAtt:
- kpiTaskTable
- StreamArn
batchSize: 1
ValidateData --
async.waterfall([
//go get kpis
function getKPIs(next) {
request({
agent: agent,
uri: getKPIsURL(endpoint, APIKey),
maxAttempts: retryCount,
retryDelay: retryDelayTime,
retryStrategy: request.RetryStrategies.HTTPOrNetworkError
}, function (error, response, body) {
if (error) {
console.log("ERROR:Error whilst fetching KPI's: " + error);
next(error);
} else {
//need to add in here to check that at least one kpi was returned otherwise error
kpis = JSON.parse(body);
if (kpis.constructor != Array) {
console.log("ERROR:Error KPI's are not of type Array");
next(error);
} else {
next(null);
}
}
});
}................................
function (err) {
if (err) {
console.log("ERROR: Something has gone wrong: " + err)
var stopReportGen = process.env.STOP_REPORT_CREATION_ON_ERROR;
if (stopReportGen === "true") {
console.log("Deleting records from dynamoDB for report ID " + reportId);
kpiUtil.deleteRecordsFromDynamoDB(reportId).then(function () {
s3Api.deleteFile(reportBucket, reportName, retryCount, retryDelayTime).then(function () {
console.log("INFO : The temp file is deleted from the S3 bucket")
callback(null, "ERROR: " + sourceId + "Report ID :" + reportId);
}).catch(function (err) {
console.log("ERROR : Error in deleting the temp file from the S3 bucket")
callback(null, "ERROR: " + sourceId + "Report ID :" + reportId);
})
});
}
Delete From Dynamodb -- Deleting the records from DB
var AWS = require('aws-sdk');
var fs = require('fs');
var path = require('path');
var zlib = require('zlib');
var fs = require('fs');
(function (exports) {
deleteRecordsFromDynamoDB = function (reportId) {
return new Promise(function (resolve, reject) {
var docClient = new AWS.DynamoDB.DocumentClient();
var table = process.env.KPI_TASK_TABLE;
var params = {
TableName: table,
FilterExpression: "#reportId = :reportId_val",
ExpressionAttributeNames: {
"#reportId": "reportId",
},
ExpressionAttributeValues: { ":reportId_val": parseInt(reportId) }
};
docClient.scan(params, onScan);
var count = 0;
function onScan(err, data) {
if (err) {
console.error("ERROR: Error, Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
reject(err);
} else {
console.log("Scan succeeded for reportID ::"+reportId);
data.Items.forEach(function (itemdata) {
var delParams = {
TableName: table,
Key: {
"reportSource": itemdata.reportSource
}
};
console.log("Attempting a conditional delete...");
docClient.delete(delParams, function (err, data) {
if (err) {
console.error("ERROR:Error, Unable to delete item. Error JSON:", JSON.stringify(err, null, 2));
reject(err);
} else {
console.log("DeleteItem succeeded:", JSON.stringify(data, null, 2));
}
});
console.log("INFO:Item :", ++count, JSON.stringify(itemdata));
});
// continue scanning if we have more items
if (typeof data.LastEvaluatedKey != "undefined") {
console.log("Scanning for more...");
params.ExclusiveStartKey = data.LastEvaluatedKey;
docClient.scan(params, onScan);
}else{
resolve("sucess");
}
}
}
});
}
exports.deleteRecordsFromDynamoDB = deleteRecordsFromDynamoDB;
}(typeof exports === 'undefined' ? this['deleteRecordsFromDynamoDB'] = {} : exports))
Based on the above description, my understanding is that deleting the items will create the streams to lambda as well. You can ignore the delete streams in two ways:-
1) Check for eventName in Record. If the eventName is REMOVE, you can potentially ignore the stream in Lambda function
2) Before deleting the items in Dynamodb, please disable the stream on DynamoDB table using Update Table API.
Please note that Update Table is asynchronous operation. So it will take a while to reflect the change. The items should not be deleted until the stream is disabled. Otherwise, you can implement both option 1 and 2 to be in safer side.
var params = {
TableName: "SomeTableName",
StreamSpecification: {
StreamEnabled: false
}
};
dynamodb.updateTable(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else console.log(data);
}
You may need to enable the stream when you would like to have the Lambda trigger back in operation.

Getting null value when trying to query value which is not present in dynamo db using node.js

I am new to dynamoDB and node.js I have written a code where it will make a query to the database (dynamodb) and look for an element which is entered by the user in the database. I am able to verify that but when the user tries with some other number which is not present in the database I am getting a null value.
My table name is "DevReferenceNumber" and only one column which is primary key "referencenumber".
'use strict';
var AWS = require('aws-sdk');
var docClient = new AWS.DynamoDB.DocumentClient({ region : 'us-east-1'});
function close(sessionAttributes, fulfillmentState, message) {
return {
sessionAttributes,
dialogAction: {
type: 'Close',
fulfillmentState,
message,
},
};
}
exports.handler = (event, context, callback) => {
try{
console.log(`event.bot.name=${event.bot.name}`);
if(event.bot.name != 'getCustomerReferenceNumber'){
callback('Invalid Bot Name');
}
dispatch(event, (response) => {
callback(null, response)
});
}catch(err){
callback("Error is occured while querying");
}
};
function dispatch(intentRequest, callback){
console.log(`dispatch UserID => ${intentRequest.userId}, intentName => ${intentRequest.currentIntent.name}`);
const intentName = intentRequest.currentIntent.name;
if(intentName === "checkReferenceNumber"){
return referenceNumber(intentRequest, callback);
}
}
function referenceNumber(intentRequest, callback){
const enteredReferenceNumber = intentRequest.currentIntent.slots.ReferenceNumber;
const sessionAttributes = intentRequest.sessionAttributes;
console.log("User has entered reference number is --> " + enteredReferenceNumber);
var params = {
TableName : "DevReferenceNumber",
KeyConditionExpression: "#refer = :ref",
ProjectionExpression : "#refer",
ExpressionAttributeNames: {
"#refer" : "referencenumber"
},
ExpressionAttributeValues:{
":ref": parseInt(enteredReferenceNumber)
}
};
docClient.query(params, function(err, data){
if(err){
callback(close(sessionAttributes, 'Fulfilled', {
contentType: 'PlainText',
content : 'Developer reference number is not matched with data from database'}));
}
else {
data.Items.forEach(function (item){
console.log("User matched data is ==> " + item.referencenumber);
callback(close(sessionAttributes, 'Fulfilled', {
contentType: 'PlainText',
content : 'Developer reference number is matched with data from database'}));
});
}
});
}
It is obvious that you will get a null record when you don't have a matching record. If you don't want null from node callback then you can do a custom logic to do a null check and return data according to the way you want.

DynamoDB queries return no items within Lambda function

Basically, I have a DynamoDB connection within a Lambda function. Will post code below. This DynamoDB connection seems to be behaving properly - it's able to call the listTable and describeTable functions successfully, which means it's got the right configuration - but querying it returns nothing, even on queries I know are correct and have tested on the Dynamo console.
UPDATE: Was able to successfully query with a string on a separate index, but still unable to query based on a binary...
Here's a part of the Lambda function:
const AWS = require('aws-sdk');
const SNS = new AWS.SNS({ apiVersion: '2010-03-31', region: 'sa-east-1' });
const DDB = new AWS.DynamoDB({ apiVersion: '2012-08-10', region: 'sa-east-1' })
const Lambda = new AWS.Lambda({ apiVersion: '2015-03-31' });
const async = require('async');
const CREATE_NOTIFICATIONS = 'create-notifications'
const QUERY_TOKENS = 'query-tokens'
function getUsers(functionName, message, callback) {
var msg = JSON.parse(message);
var users = [];
console.log(DDB);
async.forEachOf(msg.targetsb64, function(value, index, cb) {
console.log("getUsers b64: ", value)
console.log(typeof(value))
DDB.describeTable({
TableName: 'tsGroups'
}, function(err, data) {
console.log(err)
console.log(data.Table.KeySchema)
})
DDB.query({
TableName: 'tsGroups',
KeyConditionExpression: "identifier = :v_user",
ExpressionAttributeValues: {
":v_user": {"B": value}
}
}, function(err, data) {
if (err) {
cb(err)
} else {
console.log("data: ", data)
console.log("items: ", data.Items)
data.Items.forEach(function(item) {
users.push.apply(users, item.users.BS)
})
cb()
}
})
}, function(err) {
if (err) {
callback(err)
} else {
console.log("getUsers users: ", users);
const promises = users.map((user) => invokeQueryTokens(functionName, msg, user));
Promise.all(promises).then(() => {
const result = `Users messaged: ${users.length}`;
console.log(result);
callback(null, result);
});
}
})
}
I've tried using KeyConditions instead of KeyConditionExpression, to no avail. Value refers to a base64 identifier string that's passed along from an earlier Lambda function - I've tried hard-coding the correct value, doesn't help. The describeTable function is only there to detail that DynamoDB is connecting properly, and in the correct region.
TL;DR: The data.Items value in the above code snippet is always an empty array, even when doing a query I know should return something. What's my error here?
Thanks, and cheers!

How do I batch delete with DynamoDB?

I am getting an error that "The provided key element does not match the schema". uuid is my primary partition key. I also have a primary sort key for version. I figured I can use batchWrite (docs) to delete all items with same uuid.
My ES6 code is as follows:
delete(uuid) {
const promise = new Promise();
const params = {
RequestItems: {
[this.TABLE]: [
{
DeleteRequest: {
Key: { uuid: uuid }
}
}
]
}
};
// this._client references the DocumentClient
this._client.batchWrite(params, function(err, data) {
if (err) {
// this gets hit with error
console.log(err);
return promise.reject(err);
}
console.log(result);
return promise.resolve(result);
});
return promise;
}
Not sure why it is erroring on the key that is the primary. I have seen posts about needing other indexes for times when I am searching by something that isn't a key. But I don't believe that's the case here.
Here is the batch write delete request sample. This code has been tested and working fine. If you change this code for your requirement, it should work.
Table Definition:-
Bag - Table Name
bag - Hash Key
No partition key in 'Bag' table
Batch Write Code:-
var AWS = require("aws-sdk");
AWS.config.update({
region : "us-west-2",
endpoint : "http://localhost:8000"
});
var documentclient = new AWS.DynamoDB.DocumentClient();
var itemsArray = [];
var item1 = {
DeleteRequest : {
Key : {
'bag' : 'b1'
}
}
};
itemsArray.push(item1);
var item2 = {
DeleteRequest : {
Key : {
'bag' : 'b2'
}
}
};
itemsArray.push(item2);
var params = {
RequestItems : {
'Bag' : itemsArray
}
};
documentclient.batchWrite(params, function(err, data) {
if (err) {
console.log('Batch delete unsuccessful ...');
console.log(err, err.stack); // an error occurred
} else {
console.log('Batch delete successful ...');
console.log(data); // successful response
}
});
Output:-
Batch delete successful ...
{ UnprocessedItems: {} }
This is doable with Node lambda, but there are a few things you need to consider to address concurrency while processing large databases:
Handle paging while querying all of the matching elements from a secondary index
Split into chunks of 25 requests as per BatchWrite/Delete requirements https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
Above 40,000 matches you might need a 1 second delay between cycles https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html
Here a snipped that I wrote:
const AWS = require("aws-sdk");
const dynamodb = new AWS.DynamoDB.DocumentClient();
const log = console.log;
exports.handler = async (event) => {
log(event);
let TableName = event.tableName;
let params = {
let TableName,
FilterExpression: "userId = :uid",
ExpressionAttributeValues: {
":uid": event.userId,
},
};
let getItems = async (lastKey, items) => {
if (lastKey) params.ExclusiveStartKey = lastKey;
let resp = await dynamodb.scan(params).promise();
let items = resp.Items.length
? items.concat(resp.Items.map((x) => x.id))
: items;
if (resp.LastEvaluatedKey)
return await getItems(resp.LastEvaluatedKey, items);
else return items;
};
let ids = await getItems(null, []);
let idGroups = [];
for (let i = 0; i < ids.length; i += 25) {
idGroups.push(ids.slice(i, i + 25));
}
for (const gs of idGroups) {
let delReqs = [];
for (let id of gs) {
delReqs.push({ DeleteRequest: { Key: { id } } });
}
let RequestItems = {};
RequestItems[TableName] = delReqs;
let d = await dynamodb
.batchWrite({ RequestItems })
.promise().catch((e) => log(e));
}
log(ids.length + " items processed");
return {};
};
Not sure why nobody provided a proper answer.
Here's a lambda I did in nodeJS. It will perform a full scan on the table, then batch delete every 25 items per request.
Remember to change TABLE_NAME.
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({ apiVersion: '2012-08-10' });
//const { TABLE_NAME } = process.env;
TABLE_NAME = "CHANGE ME PLEASE"
exports.handler = async (event) => {
let params = {
TableName: TABLE_NAME,
};
let items = [];
let data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
while (typeof data.LastEvaluatedKey != 'undefined') {
params.ExclusiveStartKey = data.LastEvaluatedKey;
data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
}
let leftItems = items.length;
let group = [];
let groupNumber = 0;
console.log('Total items to be deleted', leftItems);
for (const i of items) {
const deleteReq = {
DeleteRequest: {
Key: {
id: i.id,
},
},
};
group.push(deleteReq);
leftItems--;
if (group.length === 25 || leftItems < 1) {
groupNumber++;
console.log(`Batch ${groupNumber} to be deleted.`);
const params = {
RequestItems: {
[TABLE_NAME]: group,
},
};
await docClient.batchWrite(params).promise();
console.log(
`Batch ${groupNumber} processed. Left items: ${leftItems}`
);
// reset
group = [];
}
}
const response = {
statusCode: 200,
// Uncomment below to enable CORS requests
// headers: {
// "Access-Control-Allow-Origin": "*"
// },
body: JSON.stringify('Hello from Lambda!'),
};
return response;
};
Be aware, that you need to follow instructions:
src: https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
DeleteRequest - Perform a DeleteItem operation on the specified item. The item to be deleted is identified by a Key subelement:
Key - A map of primary key attribute values that uniquely identify the item. Each entry in this map consists of an attribute name and an attribute value. For each primary key, you must provide all of the key attributes. For example, with a simple primary key, you only need to provide a value for the partition key. For a composite primary key, you must provide values for both the partition key and the sort key.
For batch delete, we can use batchWrite with DeleteRequest. Here is an example, in this, we are providing tableName whose data is to be deleted, the payload is an array of ids which we need to remove.
In single request 25 items can be deleted.
const AWS = require('aws-sdk');
const dynamodb= new AWS.DynamoDB.DocumentClient({ apiVersion: '2012-08-10' });
const tableName = "PlayerData";
const payload = [{id=101}, {id=105}, {id=106}];
const deleteBatchData = async (tableName, payload, dynamodb) => {
try {
await dynamodb.batchWrite({
RequestItems: {
[tableName]: payload.map(item => {
return {
DeleteRequest: {
Key: {
id: item.id
}
}
};
})
}
}).
promise().
then((response) => {
return response;
})
.catch((err) => {
console.log("err ::", JSON.stringify(err))
});
} catch (err) {
console.log('Error in deleteBatchData ', err);
}
}
Why not use PartiQL. This approach is much more readable. (This too has a limit of 25 items per request just like BatchWriteITems)
// Import required AWS SDK clients and commands for Node.js.
import { BatchExecuteStatementCommand } from "#aws-sdk/client-dynamodb";
import { ddbDocClient } from "../libs/ddbDocClient.js";
const tableName = process.argv[2];
const movieYear1 = process.argv[3];
const movieTitle1 = process.argv[4];
const movieYear2 = process.argv[5];
const movieTitle2 = process.argv[6];
export const run = async (
tableName,
movieYear1,
movieTitle1,
movieYear2,
movieTitle2
) => {
try {
const params = {
Statements: [
{
Statement: "DELETE FROM " + tableName + " where year=? and title=?",
Parameters: [{ N: movieYear1 }, { S: movieTitle1 }],
},
{
Statement: "DELETE FROM " + tableName + " where year=? and title=?",
Parameters: [{ N: movieYear2 }, { S: movieTitle2 }],
},
],
};
const data = await ddbDocClient.send(
new BatchExecuteStatementCommand(params)
);
console.log("Success. Items deleted.", data);
return "Run successfully"; // For unit tests.
} catch (err) {
console.error(err);
}
};
run(tableName, movieYear1, movieTitle1, movieYear2, movieTitle2);

Resources