Delete all items in Dynamodb using Lambda? - node.js

Using Lambda (node.js) - how to delete all the items in the Dynamodb table?
There are 500K rows in the table
I have tried using scan method and then loop through each item and then using delete method. It only allow up to 3000 rows only.
Code
exports.handler = function(context, callback) {
getRecords().then((data) => {
data.Items.forEach(function(item) {
deleteItem(item.Id).then((data1) => {
});
});
});
};
var deleteItem = function(id) {
var params = {
TableName: "TableName",
Key: {
"Id": id
},
};
return new Promise(function(resolve, reject) {
client.delete(params, function(err, data) {
if (err) {
reject(err);
} else {
resolve();
}
});
});
}
function getRecords() {
var params = {
TableName: 'TableName',
IndexName: 'Type-index',
KeyConditionExpression: 'Type = :ty',
ExpressionAttributeValues: {
':ty': "1"
},
ProjectionExpression: "Id",
};
return new Promise(function(resolve, reject) {
client.query(params, function(err, data) {
if (err) {
reject(err);
} else {
resolve(data);
}
});
});
}

There is already one right answer, but here is another code snippet to delete all records from Dynamo DB.
const AWS = require("aws-sdk");
AWS.config.update({
region: "us-east-1",
});
const docClient = new AWS.DynamoDB.DocumentClient();
const getAllRecords = async (table) => {
let params = {
TableName: table,
};
let items = [];
let data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
while (typeof data.LastEvaluatedKey != "undefined") {
params.ExclusiveStartKey = data.LastEvaluatedKey;
data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
}
return items;
};
const deleteItem = (table, id) => {
var params = {
TableName: table,
Key: {
id: id,
},
};
return new Promise(function (resolve, reject) {
docClient.delete(params, function (err, data) {
if (err) {
console.log("Error Deleting ", id,err);
reject(err);
} else {
console.log("Success Deleting ", id,err);
resolve();
}
});
});
};
exports.handler = async function (event, context, callback) {
try {
const tableName = "<table>";
// scan and get all items
const allRecords = await getAllRecords(tableName);
// delete one by one
for (const item of allRecords) {
await deleteItem(tableName, item.id);
}
callback(null, {
msg: "All records are deleted.",
});
} catch (e) {
callback(null, JSON.stringify(e, null, 2));
}
};

A Scan operation consumes Read capacity. Each Read returns up to 4 kb of data. When this limit is reached, the Scan returns only what it has found until there. If you need more, you need to issue another Scan request.
This, you'll need two loops: 1) loop to delete all records returned at each Scan; 2) loop to keep scanning multiple times, until you reach the end of the table
Make sure you use consistent Reads or wait 1 or 2 second(s) before issuing another Scan, otherwise you may get repeated items in different Scans.
exports.handler = function(context, callback) {
clearRecords();
};
clearRecords = function() {
getRecords().then((data) => {
data.Items.forEach(function(item) {
deleteItem(item.Id).then((data1) => {});
});
clearRecords(); // Will call the same function over and over
});
}
Observe that Lambda has a timeout limit of 15 minutes. Since you have 500K items in your table, it's likely that your Lambda will timeout and you'll need to trigger it more than once. You could also make your Lambda call itself after 14:50, for example, just take a look at the AWS SDK documentation for triggering Lambda functions. For this matter, you might also want to check the getRemainingTimeInMillis() method from the context object.

Related

Dynamodb batchWrite doesn't work in Lambda with Async

batchWrite doesn't work with async in Laqmbda. The code is going to insert one record tho, it can't. However, when I remove async, It works.
const AWS = require("aws-sdk");
const documentClient = new AWS.DynamoDB.DocumentClient();
AWS.config.update({ region: "us-west-2" });
const tableName = "BlrSession-56pfbzohnvdqpac6asb627z2wu-dev";
exports.handler = async (event, context, callback) => {
try {
let games = [];
games.push({
PutRequest: {
Item: {
id: Math.random().toString(36).substring(2) + Date.now().toString(36),
},
},
});
let params = {
RequestItems: {
[tableName]: games,
},
};
documentClient.batchWrite(params, function (err, data) {
if (err) {
callback(err);
} else {
callback(null, data);
}
});
} catch (err) {
return err;
}
};
The result is below. There is no error.
Ensuring latest function changes are built...
Starting execution...
Result:
null
Finished execution.
Have you guys got the same behavior?
You can't combine the callback method with the async/await method. The easiest thing to do here is to make it all async/await (and don't forget the .promise() on the call).
const AWS = require("aws-sdk");
const documentClient = new AWS.DynamoDB.DocumentClient();
AWS.config.update({ region: "us-west-2" });
const tableName = "BlrSession-56pfbzohnvdqpac6asb627z2wu-dev";
exports.handler = async (event, context, callback) => {
try {
let games = [];
games.push({
PutRequest: {
Item: {
id: Math.random().toString(36).substring(2) + Date.now().toString(36),
},
},
});
let params = {
RequestItems: {
[tableName]: games,
},
};
return await documentClient.batchWrite(params).promise();
} catch (err) {
return err;
}
};

Get all items from a table without Scan

At the moment I have a function to get all items from a DynamoDB table using the SCAN option. This is an expensive way to do it and I would prefer using the QUERY option. But looking at the docs there does not seem to be a simple way to retrieve all items using the QUERY option - it expects some sort of condition.
Example
var params = {
TableName : "Movies",
KeyConditionExpression: "#yr = :yyyy",
ExpressionAttributeNames:{
"#yr": "year"
},
ExpressionAttributeValues: {
":yyyy": 1985
}
};
docClient.query(params, function(err, data) {
if (err) {
console.error("Unable to query. Error:", JSON.stringify(err, null, 2));
} else {
console.log("Query succeeded.");
data.Items.forEach(function(item) {
console.log(" -", item.year + ": " + item.title);
});
}
});
Expected
var params = {
TableName : "Movies"
};
docClient.query(params, function(err, data) {
if (err) {
console.error("Unable to query. Error:", JSON.stringify(err, null, 2));
} else {
console.log("Query succeeded.");
data.Items.forEach(function(item) {
console.log(" -", item.year + ": " + item.title);
});
}
});
Is it possible to retrieve all data from a table using QUERY? I thought of using BEGINS_WITH or such but all the primary keys are different/random and do not start with a specific character or phrase.
Technically, a query of all items in an Amazon DynamoDB table would return the same amount of data that a scan returns, so there should be no difference in cost.
The usual reduced efficiency of a scan operation is due to the fact that it has to read the whole table and then filters out values to provide the result you want, essentially adding the extra step of removing data from the result set. If you want to read the whole table without filtering, both scan and query have to retrieve all values and there is no additional filtering step.
The only way to do via query would be to loop over every partition key individually.
I'd suggest you look at a secondary index built around your query which will be more efficient: https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/SecondaryIndexes.html
If you want to get all data you can use scan all data, but I recommend you to get data by limit and pagination because it can kill a lot of memory resources if you have millions of data at dynamodb.
this approach for getting all your data
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({
apiVersion: '2012-08-10',
region: 'ap-southeast-1' // put your region
});
exports.handler = async (event, context, callback) => {
const tableName = event.params.querystring.tablename;
let params = {
TableName: tableName
};
let scanResults = [];
let items;
do {
items = await docClient.scan(params).promise();
items.Items.forEach((item) => scanResults.push(item));
params.ExclusiveStartKey = items.LastEvaluatedKey;
} while (typeof items.LastEvaluatedKey != "undefined");
callback(null, scanResults);
};
But with the approach below, after you get data, you need to post the LastEvaluatedKey from the frontend to params and you can use it as ExclusiveStartKey.
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({
apiVersion: '2012-08-10',
region: 'ap-southeast-1' // put your region
});
exports.handler = async (event, context, callback) => {
const tableName = event.params.querystring.tablename;
let pageSize = event.params.querystring.pagesize;
let lastItem = event.params.querystring.lastItem;
try {
const params = {
TableName: tableName,
Limit: pageSize,
};
if (lastItem) {
params.ExclusiveStartKey = { id: lastItem};
}
const response = await docClient.scan(params).promise();
return {
items: response.Items,
lastItem: response.LastEvaluatedKey
};
} catch (error) {
throw error;
}
};

Cannot return array if item not present in dynamodb

I have a function that will take an array of jobs as a parameter in it. This function will check the existence of each job in the database through its id.
If a job is not to present in the database, that particular job needs to be pushed into an array called latestJobs. I'm calling this function in my main.js file. But the code breaks and stops.
Below is my main.js code:
module.exports.app = async () => {
try {
...
const jobs = await getJobsForCountries(body);
const latestJobs = await filterPreDraftedJobs(jobs);
console.log('latestJobs', latestJobs);
} catch (e) {
console.error('Error:- ', e); // Comes to here
}
};
My checker function looks like:
module.exports = async (jobs) => {
let latestJobs = [];
for (const job of jobs) {
const params = {
TableName: process.env.DYNAMODB_TABLE,
Key: {
id: job.Id
}
};
await dynamoDb.get(params, (err, data) => {
if (err) {
latestJobs.push(job);
console.log('Job not found in DB');
}
}).promise();
}
return latestJobs;
};
How can I fix this issue? I want the latestJobs which will not present in the database. Is there a function for dynamodb which can do this for me?
You are mixing callback, promise and await style. I would do it like this
module.exports = async (jobs) => {
let latestJobs = [];
for (const job of jobs) {
const params = {
TableName: process.env.DYNAMODB_TABLE,
Key: {
id: job.Id
}
};
try {
const result = await dynamoDb.get(params).promise();
if (result) {
return;
}
} catch(err) {
latestJobs.push(job);
}
}
return latestJobs;
};
Also, make sure that table is created and the region and name you are passing is correct.
I am not much familiar with dynamoDB but looking at the above conversation code must be something like this. I have tried to improve performance and making sure the code is modular and readable.
async function addUpdateJobs(jobs)
{
let paramsArray = [];
for (const job of jobs)
{
const jobParams = {
params:{
TableName: process.env.DYNAMODB_TABLE,
Key: {
id: job.Id
}
},
job:job
};
paramsArray.push(jobParams );
}
return await this.getJobs(paramsArray);
}
function getJobs(paramsArray)
{
let latestJobs = [];
paramsArray.each(async (jobParam)=>
{
try
{
const result = await dynamoDb.get(jobParam.params).promise();
if (result)
{
return;
}
} catch (err)
{
latestJobs.push(jobParam.job);
}
});
return latestJobs;
}
PS: Also I was gonig through error handling in amazondynamodb.

How to update() DynamoDB

I'm working with DynamoDB for the first time and so far, not too bad. Having some understanding of Node is making this easier, but having none of noSQL, well that hurts!. My application relies on quickly searching a GROUPS list and returning members from that group.
When I add the data using the console, I am able to create a JSON document like this:
"GROUP_ID":"GroupA", "MEMBER": ["MemberA", "MemberB", "MemberC"], "STATUS": ["OWNER", "MEMBER", "INVITED"], "ADD_DATE":[1234567, 2345671, 3456712]
I am using this to create the GROUP_ID as the primary key:
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({region:'us-east-1'});
exports.handler = (event, context, callback) => {
var tableName = "USER_GROUPS";
var checkParams = {
TableName: tableName,
Key:{"GROUP_NAME": event.group_id}
};
var createParams = {
TableName: tableName,
Key:{"GROUP_NAME": event.group_id},
Item:{"GROUP_NAME": event.group_id, "MEMBER_NAME":[event.device_id], "MEMBER_SATUS":["OWNER"], "MEMBER_DATE" : [event.date]}
};
var checkGroupExists = new Promise((resolve, reject) => {
docClient.get(checkParams, (err, data) => {
if(err){
reject(err);
}
if(data.Item){ // exists...
console.log("found Group ID");
reject();
}
else{
console.log("Group ID not found...");
resolve();
}
});
});
checkGroupExists.then((err, data) => {
console.log("adding Group: ");
console.log(data);
docClient.put(createParams, (err, data) =>{
if(err){
console.log(err);
callback(err, null);
}else{
console.log("added device: " + createParams.Key);
callback(null, {"created":createParams.Key.GROUP_NAME});
}
});
}).catch((err) => {
if(err){
callback(err, null);
}
else{
callback(err, {"used":createParams.Key.GROUP_NAME});
}
});
};
but when I use update() I am adding to the array and end up with a type name for the arrays
["S":"MemberA", "S":"MemberB", "S":"MemberC"]
kind of thing....
This is where I have left off, quite frustrated here!
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({region:'us-east-1'});
exports.handler = (event, context, callback) => {
var tableName = "USER_GROUPS";
var checkParams = {
TableName: tableName,
Key:{"GROUP_NAME": event.group_id},
Item:{"MEMBER_NAME": event.member_name}
};
var updateParams = {
TableName: tableName,
Key:{"GROUP_NAME": event.group_id},
//Item:{"MEMBER_NAME":[event.device_id], "MEMBER_STATUS":["OWNER"], "MEMBER_DATE" : [event.date]},
AttributeUpdates: {
"MEMBER_NAME": {
Action: 'ADD',
Value: event.device_id
},
"MEMBER_STATUS":{
Action : 'ADD',
Value: "ENVITED"
},
"MEMBER_DATE":{
Action: 'ADD',
Value: event.date
}
},
};
var checkGroupExists = new Promise((resolve, reject) => {
docClient.get(checkParams, (err, data) => {
console.log(data);
if(err){
reject(err);
}
if(data.Item.GROUP_NAME){ // exists...
console.log("found the group...");
resolve();
}
else{
reject();
}
});
});
checkGroupExists.then((err, data) => {
docClient.update(updateParams, (err, data) =>{
if(err){
console.log(err);
callback(err, null);
}else{
callback(null, {"updated" : updateParams.Key.GROUP_NAME});
}
});
}).catch((err) => {
if(err){
callback(err, null);
}
else{
callback(err, {"noGroup":createParams.Key.GROUP_NAME});
}
});
};
It seems I am lacking the understanding of how to process updates to include NEW item in the member array. Any assistance would be appreciated!
Use list_append() together in an UpdateExpression to append to a list column along with a ConditionExpression to prevent updates to a non-existent group:
const AWS = require('aws-sdk');
const DB = new AWS.DynamoDB.DocumentClient({region:'us-east-1'});
const UpdateExpression = 'SET ' + [
'#members = list_append(#members, :member)',
'#statuses = list_append(#statuses, :status)',
'#dates = list_append(#dates, :date)',
].join(', ')
const ConditionExpression = [
'attribute_type(#members, :L)',
'attribute_type(#statuses, :L)',
'attribute_type(#dates, :L)'
].join(' AND ')
const ExpressionAttributeNames = {
'#members': 'MEMBER_NAME',
'#statuses': 'MEMBER_STATUS',
'#dates': 'MEMBER_DATE'
}
exports.handler = (event, context, callback) => {
DB.update({
TableName: 'USER_GROUPS',
Key: { GROUP_NAME: e.group_id },
ReturnValues: 'ALL_NEW',
UpdateExpression: UpdateExpression,
ConditionExpression: ConditionExpression,
ExpressionAttributeNames: ExpressionAttributeNames,
ExpressionAttributeValues: {
':member': [e.member_name],
':status': ['ENVITED'],
':date': [e.date],
':L': 'L'
}
}, callback)
}

How do I batch delete with DynamoDB?

I am getting an error that "The provided key element does not match the schema". uuid is my primary partition key. I also have a primary sort key for version. I figured I can use batchWrite (docs) to delete all items with same uuid.
My ES6 code is as follows:
delete(uuid) {
const promise = new Promise();
const params = {
RequestItems: {
[this.TABLE]: [
{
DeleteRequest: {
Key: { uuid: uuid }
}
}
]
}
};
// this._client references the DocumentClient
this._client.batchWrite(params, function(err, data) {
if (err) {
// this gets hit with error
console.log(err);
return promise.reject(err);
}
console.log(result);
return promise.resolve(result);
});
return promise;
}
Not sure why it is erroring on the key that is the primary. I have seen posts about needing other indexes for times when I am searching by something that isn't a key. But I don't believe that's the case here.
Here is the batch write delete request sample. This code has been tested and working fine. If you change this code for your requirement, it should work.
Table Definition:-
Bag - Table Name
bag - Hash Key
No partition key in 'Bag' table
Batch Write Code:-
var AWS = require("aws-sdk");
AWS.config.update({
region : "us-west-2",
endpoint : "http://localhost:8000"
});
var documentclient = new AWS.DynamoDB.DocumentClient();
var itemsArray = [];
var item1 = {
DeleteRequest : {
Key : {
'bag' : 'b1'
}
}
};
itemsArray.push(item1);
var item2 = {
DeleteRequest : {
Key : {
'bag' : 'b2'
}
}
};
itemsArray.push(item2);
var params = {
RequestItems : {
'Bag' : itemsArray
}
};
documentclient.batchWrite(params, function(err, data) {
if (err) {
console.log('Batch delete unsuccessful ...');
console.log(err, err.stack); // an error occurred
} else {
console.log('Batch delete successful ...');
console.log(data); // successful response
}
});
Output:-
Batch delete successful ...
{ UnprocessedItems: {} }
This is doable with Node lambda, but there are a few things you need to consider to address concurrency while processing large databases:
Handle paging while querying all of the matching elements from a secondary index
Split into chunks of 25 requests as per BatchWrite/Delete requirements https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
Above 40,000 matches you might need a 1 second delay between cycles https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html
Here a snipped that I wrote:
const AWS = require("aws-sdk");
const dynamodb = new AWS.DynamoDB.DocumentClient();
const log = console.log;
exports.handler = async (event) => {
log(event);
let TableName = event.tableName;
let params = {
let TableName,
FilterExpression: "userId = :uid",
ExpressionAttributeValues: {
":uid": event.userId,
},
};
let getItems = async (lastKey, items) => {
if (lastKey) params.ExclusiveStartKey = lastKey;
let resp = await dynamodb.scan(params).promise();
let items = resp.Items.length
? items.concat(resp.Items.map((x) => x.id))
: items;
if (resp.LastEvaluatedKey)
return await getItems(resp.LastEvaluatedKey, items);
else return items;
};
let ids = await getItems(null, []);
let idGroups = [];
for (let i = 0; i < ids.length; i += 25) {
idGroups.push(ids.slice(i, i + 25));
}
for (const gs of idGroups) {
let delReqs = [];
for (let id of gs) {
delReqs.push({ DeleteRequest: { Key: { id } } });
}
let RequestItems = {};
RequestItems[TableName] = delReqs;
let d = await dynamodb
.batchWrite({ RequestItems })
.promise().catch((e) => log(e));
}
log(ids.length + " items processed");
return {};
};
Not sure why nobody provided a proper answer.
Here's a lambda I did in nodeJS. It will perform a full scan on the table, then batch delete every 25 items per request.
Remember to change TABLE_NAME.
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({ apiVersion: '2012-08-10' });
//const { TABLE_NAME } = process.env;
TABLE_NAME = "CHANGE ME PLEASE"
exports.handler = async (event) => {
let params = {
TableName: TABLE_NAME,
};
let items = [];
let data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
while (typeof data.LastEvaluatedKey != 'undefined') {
params.ExclusiveStartKey = data.LastEvaluatedKey;
data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
}
let leftItems = items.length;
let group = [];
let groupNumber = 0;
console.log('Total items to be deleted', leftItems);
for (const i of items) {
const deleteReq = {
DeleteRequest: {
Key: {
id: i.id,
},
},
};
group.push(deleteReq);
leftItems--;
if (group.length === 25 || leftItems < 1) {
groupNumber++;
console.log(`Batch ${groupNumber} to be deleted.`);
const params = {
RequestItems: {
[TABLE_NAME]: group,
},
};
await docClient.batchWrite(params).promise();
console.log(
`Batch ${groupNumber} processed. Left items: ${leftItems}`
);
// reset
group = [];
}
}
const response = {
statusCode: 200,
// Uncomment below to enable CORS requests
// headers: {
// "Access-Control-Allow-Origin": "*"
// },
body: JSON.stringify('Hello from Lambda!'),
};
return response;
};
Be aware, that you need to follow instructions:
src: https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
DeleteRequest - Perform a DeleteItem operation on the specified item. The item to be deleted is identified by a Key subelement:
Key - A map of primary key attribute values that uniquely identify the item. Each entry in this map consists of an attribute name and an attribute value. For each primary key, you must provide all of the key attributes. For example, with a simple primary key, you only need to provide a value for the partition key. For a composite primary key, you must provide values for both the partition key and the sort key.
For batch delete, we can use batchWrite with DeleteRequest. Here is an example, in this, we are providing tableName whose data is to be deleted, the payload is an array of ids which we need to remove.
In single request 25 items can be deleted.
const AWS = require('aws-sdk');
const dynamodb= new AWS.DynamoDB.DocumentClient({ apiVersion: '2012-08-10' });
const tableName = "PlayerData";
const payload = [{id=101}, {id=105}, {id=106}];
const deleteBatchData = async (tableName, payload, dynamodb) => {
try {
await dynamodb.batchWrite({
RequestItems: {
[tableName]: payload.map(item => {
return {
DeleteRequest: {
Key: {
id: item.id
}
}
};
})
}
}).
promise().
then((response) => {
return response;
})
.catch((err) => {
console.log("err ::", JSON.stringify(err))
});
} catch (err) {
console.log('Error in deleteBatchData ', err);
}
}
Why not use PartiQL. This approach is much more readable. (This too has a limit of 25 items per request just like BatchWriteITems)
// Import required AWS SDK clients and commands for Node.js.
import { BatchExecuteStatementCommand } from "#aws-sdk/client-dynamodb";
import { ddbDocClient } from "../libs/ddbDocClient.js";
const tableName = process.argv[2];
const movieYear1 = process.argv[3];
const movieTitle1 = process.argv[4];
const movieYear2 = process.argv[5];
const movieTitle2 = process.argv[6];
export const run = async (
tableName,
movieYear1,
movieTitle1,
movieYear2,
movieTitle2
) => {
try {
const params = {
Statements: [
{
Statement: "DELETE FROM " + tableName + " where year=? and title=?",
Parameters: [{ N: movieYear1 }, { S: movieTitle1 }],
},
{
Statement: "DELETE FROM " + tableName + " where year=? and title=?",
Parameters: [{ N: movieYear2 }, { S: movieTitle2 }],
},
],
};
const data = await ddbDocClient.send(
new BatchExecuteStatementCommand(params)
);
console.log("Success. Items deleted.", data);
return "Run successfully"; // For unit tests.
} catch (err) {
console.error(err);
}
};
run(tableName, movieYear1, movieTitle1, movieYear2, movieTitle2);

Resources