How do I batch delete with DynamoDB? - node.js

I am getting an error that "The provided key element does not match the schema". uuid is my primary partition key. I also have a primary sort key for version. I figured I can use batchWrite (docs) to delete all items with same uuid.
My ES6 code is as follows:
delete(uuid) {
const promise = new Promise();
const params = {
RequestItems: {
[this.TABLE]: [
{
DeleteRequest: {
Key: { uuid: uuid }
}
}
]
}
};
// this._client references the DocumentClient
this._client.batchWrite(params, function(err, data) {
if (err) {
// this gets hit with error
console.log(err);
return promise.reject(err);
}
console.log(result);
return promise.resolve(result);
});
return promise;
}
Not sure why it is erroring on the key that is the primary. I have seen posts about needing other indexes for times when I am searching by something that isn't a key. But I don't believe that's the case here.

Here is the batch write delete request sample. This code has been tested and working fine. If you change this code for your requirement, it should work.
Table Definition:-
Bag - Table Name
bag - Hash Key
No partition key in 'Bag' table
Batch Write Code:-
var AWS = require("aws-sdk");
AWS.config.update({
region : "us-west-2",
endpoint : "http://localhost:8000"
});
var documentclient = new AWS.DynamoDB.DocumentClient();
var itemsArray = [];
var item1 = {
DeleteRequest : {
Key : {
'bag' : 'b1'
}
}
};
itemsArray.push(item1);
var item2 = {
DeleteRequest : {
Key : {
'bag' : 'b2'
}
}
};
itemsArray.push(item2);
var params = {
RequestItems : {
'Bag' : itemsArray
}
};
documentclient.batchWrite(params, function(err, data) {
if (err) {
console.log('Batch delete unsuccessful ...');
console.log(err, err.stack); // an error occurred
} else {
console.log('Batch delete successful ...');
console.log(data); // successful response
}
});
Output:-
Batch delete successful ...
{ UnprocessedItems: {} }

This is doable with Node lambda, but there are a few things you need to consider to address concurrency while processing large databases:
Handle paging while querying all of the matching elements from a secondary index
Split into chunks of 25 requests as per BatchWrite/Delete requirements https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
Above 40,000 matches you might need a 1 second delay between cycles https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html
Here a snipped that I wrote:
const AWS = require("aws-sdk");
const dynamodb = new AWS.DynamoDB.DocumentClient();
const log = console.log;
exports.handler = async (event) => {
log(event);
let TableName = event.tableName;
let params = {
let TableName,
FilterExpression: "userId = :uid",
ExpressionAttributeValues: {
":uid": event.userId,
},
};
let getItems = async (lastKey, items) => {
if (lastKey) params.ExclusiveStartKey = lastKey;
let resp = await dynamodb.scan(params).promise();
let items = resp.Items.length
? items.concat(resp.Items.map((x) => x.id))
: items;
if (resp.LastEvaluatedKey)
return await getItems(resp.LastEvaluatedKey, items);
else return items;
};
let ids = await getItems(null, []);
let idGroups = [];
for (let i = 0; i < ids.length; i += 25) {
idGroups.push(ids.slice(i, i + 25));
}
for (const gs of idGroups) {
let delReqs = [];
for (let id of gs) {
delReqs.push({ DeleteRequest: { Key: { id } } });
}
let RequestItems = {};
RequestItems[TableName] = delReqs;
let d = await dynamodb
.batchWrite({ RequestItems })
.promise().catch((e) => log(e));
}
log(ids.length + " items processed");
return {};
};

Not sure why nobody provided a proper answer.
Here's a lambda I did in nodeJS. It will perform a full scan on the table, then batch delete every 25 items per request.
Remember to change TABLE_NAME.
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({ apiVersion: '2012-08-10' });
//const { TABLE_NAME } = process.env;
TABLE_NAME = "CHANGE ME PLEASE"
exports.handler = async (event) => {
let params = {
TableName: TABLE_NAME,
};
let items = [];
let data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
while (typeof data.LastEvaluatedKey != 'undefined') {
params.ExclusiveStartKey = data.LastEvaluatedKey;
data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
}
let leftItems = items.length;
let group = [];
let groupNumber = 0;
console.log('Total items to be deleted', leftItems);
for (const i of items) {
const deleteReq = {
DeleteRequest: {
Key: {
id: i.id,
},
},
};
group.push(deleteReq);
leftItems--;
if (group.length === 25 || leftItems < 1) {
groupNumber++;
console.log(`Batch ${groupNumber} to be deleted.`);
const params = {
RequestItems: {
[TABLE_NAME]: group,
},
};
await docClient.batchWrite(params).promise();
console.log(
`Batch ${groupNumber} processed. Left items: ${leftItems}`
);
// reset
group = [];
}
}
const response = {
statusCode: 200,
// Uncomment below to enable CORS requests
// headers: {
// "Access-Control-Allow-Origin": "*"
// },
body: JSON.stringify('Hello from Lambda!'),
};
return response;
};

Be aware, that you need to follow instructions:
src: https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
DeleteRequest - Perform a DeleteItem operation on the specified item. The item to be deleted is identified by a Key subelement:
Key - A map of primary key attribute values that uniquely identify the item. Each entry in this map consists of an attribute name and an attribute value. For each primary key, you must provide all of the key attributes. For example, with a simple primary key, you only need to provide a value for the partition key. For a composite primary key, you must provide values for both the partition key and the sort key.

For batch delete, we can use batchWrite with DeleteRequest. Here is an example, in this, we are providing tableName whose data is to be deleted, the payload is an array of ids which we need to remove.
In single request 25 items can be deleted.
const AWS = require('aws-sdk');
const dynamodb= new AWS.DynamoDB.DocumentClient({ apiVersion: '2012-08-10' });
const tableName = "PlayerData";
const payload = [{id=101}, {id=105}, {id=106}];
const deleteBatchData = async (tableName, payload, dynamodb) => {
try {
await dynamodb.batchWrite({
RequestItems: {
[tableName]: payload.map(item => {
return {
DeleteRequest: {
Key: {
id: item.id
}
}
};
})
}
}).
promise().
then((response) => {
return response;
})
.catch((err) => {
console.log("err ::", JSON.stringify(err))
});
} catch (err) {
console.log('Error in deleteBatchData ', err);
}
}

Why not use PartiQL. This approach is much more readable. (This too has a limit of 25 items per request just like BatchWriteITems)
// Import required AWS SDK clients and commands for Node.js.
import { BatchExecuteStatementCommand } from "#aws-sdk/client-dynamodb";
import { ddbDocClient } from "../libs/ddbDocClient.js";
const tableName = process.argv[2];
const movieYear1 = process.argv[3];
const movieTitle1 = process.argv[4];
const movieYear2 = process.argv[5];
const movieTitle2 = process.argv[6];
export const run = async (
tableName,
movieYear1,
movieTitle1,
movieYear2,
movieTitle2
) => {
try {
const params = {
Statements: [
{
Statement: "DELETE FROM " + tableName + " where year=? and title=?",
Parameters: [{ N: movieYear1 }, { S: movieTitle1 }],
},
{
Statement: "DELETE FROM " + tableName + " where year=? and title=?",
Parameters: [{ N: movieYear2 }, { S: movieTitle2 }],
},
],
};
const data = await ddbDocClient.send(
new BatchExecuteStatementCommand(params)
);
console.log("Success. Items deleted.", data);
return "Run successfully"; // For unit tests.
} catch (err) {
console.error(err);
}
};
run(tableName, movieYear1, movieTitle1, movieYear2, movieTitle2);

Related

The provided key element does not match the schema - Javascript

I'm generating some data and trying to insert it in the database.
const AWSXRay = require('aws-xray-sdk');
const AWS = AWSXRay.captureAWS(require('aws-sdk'));
const documentClient = new AWS.DynamoDB.DocumentClient();
module.exports = {
batchWriteItems: async function (tableName, headers, data) {
// AWS only allows batches of max 25 items
while (data.length) {
const batch = data.splice(0, 25);
const putRequests = batch.map((elem) => {
// Overide field type (default is String)
let items = {};
for (let key in elem) {
const header = headers.find((hdr) => hdr.name === key);
items[key] = {
[header.type]: elem[key],
}
}
return {
PutRequest: {
Item: items,
},
};
});
const params = {
RequestItems: {
[tableName]: putRequests,
},
};
console.log(JSON.stringify(params));
await documentClient.batchWrite(params).promise();
}
},
};
The table is correct, Partition key is countryCode (S) and there is no sort key.
If I copy the items individually and create an item manually in DynamoDB, it works!
I don't see any issues here but I'm still getting this error:
The provided key element does not match the schema
console.log(JSON.stringify(params));
{
"RequestItems":{
"test-table":[
{
"PutRequest":{
"Item":{
"countryCode":{
"S":"LX"
},
"value":{
"N":"0.67"
}
}
}
},
{
"PutRequest":{
"Item":{
"countryCode":{
"S":"AF"
},
"value":{
"N":"0.68"
}
}
}
}
]
}
}
Do you see any issue here? I'm kinda stuck

How do I query this auto generated AWS Amplify API?

I am trying to query an autogenerated Amplify API using postman. I'm banging my head against the wall on something that should be simple. Can someone explain why this query URL doesn't return a JSON object?. The data exists in dynamo but returns an empty array in postman (and a 200 status):
POSTMAN (this is what I expected to work):
https://xxxxx.execute-api.us-east-1.amazonaws.com/staging/api/getShipContainer?location=fl
UPDATE after staring at the code for longer I see that req.params[partitionKeyName] is somehow evaluating to getShipContainer which would explain my issue, but how do I fix this? And why did it happen:
condition[partitionKeyName]['AttributeValueList'] = [ convertUrlType(req.params[partitionKeyName], partitionKeyType) ];
This syntax works (returns dynamo object) but is very clearly incorrect (location is a dynamo column, and fl is the filter param): https://xxxxxx.execute-api.us-east-1.amazonaws.com/staging/api/fl?location
Query method:
const userIdPresent = false; // TODO: update in case is required to use that definition
const partitionKeyName = "location";
const partitionKeyType = "S";
const sortKeyName = "containerId";
const sortKeyType = "S";
const hasSortKey = sortKeyName !== "";
const path = "/api";
const UNAUTH = 'UNAUTH';
const hashKeyPath = '/:' + partitionKeyName;
const sortKeyPath = hasSortKey ? '/:' + sortKeyName : '';
// declare a new express app
var app = express()
app.use(bodyParser.json())
app.use(awsServerlessExpressMiddleware.eventContext())
//Enable CORS for all methods
app.use(function(req, res, next) {
res.header("Access-Control-Allow-Origin", "*")
res.header("Access-Control-Allow-Headers", "*")
next()
});
// convert url string param to expected Type
const convertUrlType = (param, type) => {
switch(type) {
case "N":
return Number.parseInt(param);
default:
return param;
}
}
/********************************
* HTTP Get method for list objects *
********************************/
//api/:location
app.get(path + hashKeyPath, function(req, res) {
var condition = {}
condition[partitionKeyName] = {
ComparisonOperator: 'EQ'
}
if (userIdPresent && req.apiGateway) {
condition[partitionKeyName]['AttributeValueList'] = [req.apiGateway.event.requestContext.identity.cognitoIdentityId || UNAUTH ];
} else {
try {
condition[partitionKeyName]['AttributeValueList'] = [ convertUrlType(req.params[partitionKeyName], partitionKeyType) ];
} catch(err) {
res.statusCode = 500;
res.json({error: 'Wrong column type ' + err});
}
}
let queryParams = {
TableName: tableName,
KeyConditions: condition
}
console.log(`req gg cond::`,JSON.stringify(condition),`params`,Object.entries(req.params).map(([i,k])=>i+' '+k).join(','))
dynamodb.query(queryParams, (err, data) => {
if (err) {
res.statusCode = 500;
res.json({error: 'Could not load items: ' + err});
} else {
res.json(data.Items);
}
});
});
results of the console.log I put in to debug:
req gg cond::
{
"location": {
"ComparisonOperator": "EQ",
"AttributeValueList": [
"getShipContainer"
]
}
}
params location getShipContainer
shouldn't the expected query be using LOCATION and ignoring "getShipContainer" completely? Im very confused because the code was auto generated. getShipContainer is the name of the lambda function that is being called
I also tested this in the API Gateway test console with the same result:
Here is a quick screenshot of my dynamo table as well:
the issue is mentioned in this github issue.
change your handler function to this, and https://xxxxx.execute-api.us-east-1.amazonaws.com/staging/api/location will return the list of items.
app.get(path + hashKeyPath, function (req, res) {
let scanParams = {
TableName: tableName,
};
dynamodb.scan(scanParams, (err, data) => {
if (err) {
res.statusCode = 500;
res.json({ error: "Could not load items: " + err });
} else {
res.json(data.Items);
}
});
});
And here's the code for filtering if the parameter is specified in the URL.
Location is one of the dynamodb reserved word, so I've used attribute mapping.
https://xxxxx.execute-api.us-east-1.amazonaws.com/staging/api/location?location=fl
app.get(path + hashKeyPath, function (req, res) {
var filterParams = {};
const location = req.query[partitionKeyName] || "";
if (location) {
filterParams = {
FilterExpression: "#loc = :loc",
ExpressionAttributeNames: {
"#loc": "location",
},
ExpressionAttributeValues: {
":loc": location,
},
};
}
let scanParams = {
TableName: tableName,
...filterParams,
};
dynamodb.scan(scanParams, (err, data) => {
if (err) {
res.statusCode = 500;
res.json({ error: "Could not load items: " + err });
} else {
res.json(data.Items);
}
});
});
Usually I use DynamoDB Global Secondary Index to query all items. But it might be depends on the use case. For example, I've used to query List of Items by its Site Id.
Table would be something like this:
itemID
itemName
siteID
router.get("/", function (req, res) {
const sortKeyName = "siteID";
let queryParams = {
TableName: tableName,
IndexName: "siteIDGSI",
KeyConditionExpression: "siteID = :site_id",
ExpressionAttributeValues: {
":site_id": req.params[sortKeyName],
},
};
dynamodb.query(queryParams, (err, data) => {
if (err) {
res.statusCode = 500;
res.json({ error: "Could not load items: " + err });
} else {
res.json({
statusCode: 200,
message: "List of Items in " + req.params[sortKeyName],
items: data.Items,
});
}
});
});
So you can create GSI for containerId

Cannot return array if item not present in dynamodb

I have a function that will take an array of jobs as a parameter in it. This function will check the existence of each job in the database through its id.
If a job is not to present in the database, that particular job needs to be pushed into an array called latestJobs. I'm calling this function in my main.js file. But the code breaks and stops.
Below is my main.js code:
module.exports.app = async () => {
try {
...
const jobs = await getJobsForCountries(body);
const latestJobs = await filterPreDraftedJobs(jobs);
console.log('latestJobs', latestJobs);
} catch (e) {
console.error('Error:- ', e); // Comes to here
}
};
My checker function looks like:
module.exports = async (jobs) => {
let latestJobs = [];
for (const job of jobs) {
const params = {
TableName: process.env.DYNAMODB_TABLE,
Key: {
id: job.Id
}
};
await dynamoDb.get(params, (err, data) => {
if (err) {
latestJobs.push(job);
console.log('Job not found in DB');
}
}).promise();
}
return latestJobs;
};
How can I fix this issue? I want the latestJobs which will not present in the database. Is there a function for dynamodb which can do this for me?
You are mixing callback, promise and await style. I would do it like this
module.exports = async (jobs) => {
let latestJobs = [];
for (const job of jobs) {
const params = {
TableName: process.env.DYNAMODB_TABLE,
Key: {
id: job.Id
}
};
try {
const result = await dynamoDb.get(params).promise();
if (result) {
return;
}
} catch(err) {
latestJobs.push(job);
}
}
return latestJobs;
};
Also, make sure that table is created and the region and name you are passing is correct.
I am not much familiar with dynamoDB but looking at the above conversation code must be something like this. I have tried to improve performance and making sure the code is modular and readable.
async function addUpdateJobs(jobs)
{
let paramsArray = [];
for (const job of jobs)
{
const jobParams = {
params:{
TableName: process.env.DYNAMODB_TABLE,
Key: {
id: job.Id
}
},
job:job
};
paramsArray.push(jobParams );
}
return await this.getJobs(paramsArray);
}
function getJobs(paramsArray)
{
let latestJobs = [];
paramsArray.each(async (jobParam)=>
{
try
{
const result = await dynamoDb.get(jobParam.params).promise();
if (result)
{
return;
}
} catch (err)
{
latestJobs.push(jobParam.job);
}
});
return latestJobs;
}
PS: Also I was gonig through error handling in amazondynamodb.

Delete all items in Dynamodb using Lambda?

Using Lambda (node.js) - how to delete all the items in the Dynamodb table?
There are 500K rows in the table
I have tried using scan method and then loop through each item and then using delete method. It only allow up to 3000 rows only.
Code
exports.handler = function(context, callback) {
getRecords().then((data) => {
data.Items.forEach(function(item) {
deleteItem(item.Id).then((data1) => {
});
});
});
};
var deleteItem = function(id) {
var params = {
TableName: "TableName",
Key: {
"Id": id
},
};
return new Promise(function(resolve, reject) {
client.delete(params, function(err, data) {
if (err) {
reject(err);
} else {
resolve();
}
});
});
}
function getRecords() {
var params = {
TableName: 'TableName',
IndexName: 'Type-index',
KeyConditionExpression: 'Type = :ty',
ExpressionAttributeValues: {
':ty': "1"
},
ProjectionExpression: "Id",
};
return new Promise(function(resolve, reject) {
client.query(params, function(err, data) {
if (err) {
reject(err);
} else {
resolve(data);
}
});
});
}
There is already one right answer, but here is another code snippet to delete all records from Dynamo DB.
const AWS = require("aws-sdk");
AWS.config.update({
region: "us-east-1",
});
const docClient = new AWS.DynamoDB.DocumentClient();
const getAllRecords = async (table) => {
let params = {
TableName: table,
};
let items = [];
let data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
while (typeof data.LastEvaluatedKey != "undefined") {
params.ExclusiveStartKey = data.LastEvaluatedKey;
data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
}
return items;
};
const deleteItem = (table, id) => {
var params = {
TableName: table,
Key: {
id: id,
},
};
return new Promise(function (resolve, reject) {
docClient.delete(params, function (err, data) {
if (err) {
console.log("Error Deleting ", id,err);
reject(err);
} else {
console.log("Success Deleting ", id,err);
resolve();
}
});
});
};
exports.handler = async function (event, context, callback) {
try {
const tableName = "<table>";
// scan and get all items
const allRecords = await getAllRecords(tableName);
// delete one by one
for (const item of allRecords) {
await deleteItem(tableName, item.id);
}
callback(null, {
msg: "All records are deleted.",
});
} catch (e) {
callback(null, JSON.stringify(e, null, 2));
}
};
A Scan operation consumes Read capacity. Each Read returns up to 4 kb of data. When this limit is reached, the Scan returns only what it has found until there. If you need more, you need to issue another Scan request.
This, you'll need two loops: 1) loop to delete all records returned at each Scan; 2) loop to keep scanning multiple times, until you reach the end of the table
Make sure you use consistent Reads or wait 1 or 2 second(s) before issuing another Scan, otherwise you may get repeated items in different Scans.
exports.handler = function(context, callback) {
clearRecords();
};
clearRecords = function() {
getRecords().then((data) => {
data.Items.forEach(function(item) {
deleteItem(item.Id).then((data1) => {});
});
clearRecords(); // Will call the same function over and over
});
}
Observe that Lambda has a timeout limit of 15 minutes. Since you have 500K items in your table, it's likely that your Lambda will timeout and you'll need to trigger it more than once. You could also make your Lambda call itself after 14:50, for example, just take a look at the AWS SDK documentation for triggering Lambda functions. For this matter, you might also want to check the getRemainingTimeInMillis() method from the context object.

Confused with promises in a loop

I'm writing a dynamodb code that does the below.
Scan a particular column and get the values and add it to an array
convert the array to set and back to array to get the unique values
Loop through this set values as a parameter and get the actual value.
Basically trying to create a group by in DynamoDb.
Here the 1st and 2nd step I'm able to do it. but coming to step 3 I've a loop and inside the loop the code has to be executed and my code is as below.
var AWS = require("aws-sdk");
var creds = new AWS.Credentials('akid', 'secret', 'session');
AWS.config.update({
"accessKeyId": "myAccessId",
"secretAccessKey": "MySecretAccessKey",
"region": "us-east-1"
});
var dynamodb = new AWS.DynamoDB.DocumentClient();
var params = {
TableName: "MyTable",
FilterExpression: "#target_state = :target_state",
ExpressionAttributeNames: {
"#target_state": "target_state"
},
ExpressionAttributeValues: {
":target_state": "5"
}
};
var array = [];
dynamodb.scan(params).promise().then(function (data) {
data.Items.forEach(function (itemData) {
array.push(itemData.ruleNo)
});
console.log(array);
return array;
}).then(() => {
console.log("Entered 2nd block " + [...new Set(array)]);
var array2 = [...new Set(array)];
for (index = 0; index < array2.length; ++index) {
console.log(array2[index]);
var params1 = {
TableName: "ChemicalData",
FilterExpression: "#target_state = :target_state and #ruleNo=:ruleNo",
ExpressionAttributeNames: {
"#target_state": "target_state",
"#ruleNo": "ruleNo"
},
ExpressionAttributeValues: {
":target_state": "5",
":ruleNo": array2[index]
}
};
return dynamodb.scan(params1).promise().then(function (data) {
var uw = JSON.stringify((data.Items));
return uw;
});
}
}).then((data) => {
console.log(data);
}).catch(err => {
console.log(err)
})
when I run this program, the result that I get is only one value, and that is the first array value, I'm unable to know on how can I loop through all the array variables and then do a console.log(data). please let me know on where am I going wrong and how can I fix this.
Thanks
Using return inside for breaks the loop. You should gather promises from inner scan into array and use Promise.all to resolve then together
dynamodb.scan(params).promise().then(function (data) {
data.Items.forEach(function (itemData) {
array.push(itemData.ruleNo)
});
console.log(array);
return array;
}).then(() => {
console.log("Entered 2nd block " + [...new Set(array)]);
var array2 = [...new Set(array)];
var results = []; //results array
for (index = 0; index < array2.length; ++index) {
console.log(array2[index]);
var params1 = {
TableName: "ChemicalData",
FilterExpression: "#target_state = :target_state and #ruleNo=:ruleNo",
ExpressionAttributeNames: {
"#target_state": "target_state",
"#ruleNo": "ruleNo"
},
ExpressionAttributeValues: {
":target_state": "5",
":ruleNo": array2[index]
}
};
// push results to be resolved later
results.push(dynamodb.scan(params1).promise().then(function (data) {
var uw = JSON.stringify((data.Items));
return uw;
}));
}
// return promise that resolves when all results resolve
return Promise.all(results);
})

Resources