Insert to DynamoDB if item doesn't exits - node.js

I'm setting up an attendance check on a streaming video with aws-rekognition, when a person is identified then a lambda should write it to a DynamoDB. the insertDynamo()works fine alone(when I'm not calling it as function),but when I put it inside a function, it doesn't write to the DynamoDB table. Any idea on what am doing wrong?
var AWS = require('aws-sdk');
var today = new Date();
var date = today.getFullYear()+'-'+(today.getMonth()+1)+'-'+today.getDate();
var hour = today.getHours() + ":" + today.getMinutes() + ":" + today.getSeconds();
exports.handler = async (event, context) => {
//console.log('Received event:', JSON.stringify(event, null, 2));
event.Records.forEach((record) => {
// Kinesis data is base64 encoded so decode here
const load = new Buffer(record.kinesis.data, 'base64').toString('ascii');
const payload = JSON.parse(load);
if(payload.FaceSearchResponse != null)
{
payload.FaceSearchResponse.forEach((face) => {
if(face.MatchedFaces != null &&
Object.keys(face.MatchedFaces).length > 0)
{
var id = JSON.stringify(face.MatchedFaces[0].Face.ExternalImageId, null, 4);
//this is hard code it ---needs to split string from kinesis(id)
insertDynamo(date,hour,'0001');
}
else
{
//do nothing
}
});
}
});
return `Successfully processed ${event.Records.length} records.`;
};
var insertDynamo = function(date,hour,id){
exports.handler = async (event,context) => {
const documentClient = new AWS.DynamoDB.DocumentClient();
let responseBody = "";
let statusCode = 0;
const params = {
TableName: "users",
Item:{
badgeNumber: id,
assistance:{
date:date,
hour:hour
}
},
ConditionExpression: 'attribute_not_exists(badgenumber)'
};
try {
const data = await documentClient.put(params).promise();
responseBody = JSON.stringify(data);
statusCode = 201;
} catch (err) {
responseBody = `Unable to put product: ${err}`;
statusCode = 403;
}
const response = {
statusCode: statusCode,
headers: {
"Content-Type": "application/json"
},
body:responseBody
}
return response
}
};

Your lambda function will finish immediate when it has been called, because you use .forEach to loop through all the records. This mean all your requests to insert to DynamoDB will be cancel before they finish their yobs.
I have 2 solutions for your case:
Wait until lambda callback stack is clear
Just prepend a "config" line to your lambda function
context.callbackWaitsForEmptyEventLoop = true;
Use old school for loop instead of forEach (recommended). forEach use callback style to solve each item, then it will not work as our expectly with async/await keyword.
var AWS = require('aws-sdk');
var today = new Date();
var date = today.getFullYear() + '-' + (today.getMonth() + 1) + '-' + today.getDate();
var hour = today.getHours() + ":" + today.getMinutes() + ":" + today.getSeconds();
exports.handler = async (event, context) => {
//console.log('Received event:', JSON.stringify(event, null, 2));
for (const record of event.Records) { // for of instead of forEach
const load = new Buffer(record.kinesis.data, 'base64').toString('ascii');
const payload = JSON.parse(load);
if (payload.FaceSearchResponse != null) {
for (const face of payload.FaceSearchResponse) { // for of instead of forEach
if (face.MatchedFaces != null &&
Object.keys(face.MatchedFaces).length > 0) {
var id = JSON.stringify(face.MatchedFaces[0].Face.ExternalImageId, null, 4);
//this is hard code it ---needs to split string from kinesis(id)
await insertDynamo(date, hour, '0001'); // wait until task finish then solve next item
}
else {
//do nothing
}
}
}
}
return `Successfully processed ${event.Records.length} records.`;
};
var insertDynamo = function (date, hour, id) {
// What is this?????????
// exports.handler = async (event, context) => {
// }
const documentClient = new AWS.DynamoDB.DocumentClient();
const params = {
TableName: "users",
Item: {
badgeNumber: id,
assistance: {
date: date,
hour: hour
}
},
ConditionExpression: 'attribute_not_exists(badgenumber)'
};
return documentClient.put(params).promise(); // enough for us
};

Related

getting 403 from lambda calling api gateway

I have an api post end point which would update a customer's information in dynamodb. It is set to authenticate using AWS_IAM. I am getting 403 from my lambda when calling this api. I have allowed execute-api:Invoke permission to the api for the role lambda uses. I see in this post that I need to create a canonical request. I was able to come up with the below code and I still get a 403. I can't figure out what is missing and wish if a different eye can spot the problem. Please help!
"use strict";
const https = require("https");
const crypto = require("crypto");
exports.handler = async (event, context, callback) => {
try {
var attributes = {
customerId: 1,
body: { firstName: "abc", lastName: "xyz" }
};
await updateUsingApi(attributes.customerId, attributes.body)
.then((result) => {
var jsonResult = JSON.parse(result);
if (jsonResult.statusCode === 200) {
callback(null, {
statusCode: jsonResult.statusCode,
statusMessage: "Attributes saved successfully!"
});
} else {
callback(null, jsonResult);
}
})
.catch((err) => {
console.log("error: ", err);
callback(null, err);
});
} catch (error) {
console.error("error: ", error);
callback(null, error);
}
};
function sign(key, message) {
return crypto.createHmac("sha256", key).update(message).digest();
}
function getSignatureKey(key, dateStamp, regionName, serviceName) {
var kDate = sign("AWS4" + key, dateStamp);
var kRegion = sign(kDate, regionName);
var kService = sign(kRegion, serviceName);
var kSigning = sign(kService, "aws4_request");
return kSigning;
}
function updateUsingApi(customerId, newAttributes) {
var request = {
partitionKey: `MY_CUSTOM_PREFIX_${customerId}`,
sortKey: customerId,
payLoad: newAttributes
};
var data = JSON.stringify(request);
var apiHost = new URL(process.env.REST_API_INVOKE_URL).hostname;
var apiMethod = "POST";
var path = `/stage/postEndPoint`;
var { amzdate, authorization, contentType } = getHeaders(host, method, path);
const options = {
host: host,
path: path,
method: method,
headers: {
"X-Amz-Date": amzdate,
Authorization: authorization,
"Content-Type": contentType,
"Content-Length": data.length
}
};
return new Promise((resolve, reject) => {
const req = https.request(options, (res) => {
if (res && res.statusCode !== 200) {
console.log("response from api", res);
}
var response = {
statusCode: res.statusCode,
statusMessage: res.statusMessage
};
resolve(JSON.stringify(response));
});
req.on("error", (e) => {
console.log("error", e);
reject(e.message);
});
req.write(data);
req.end();
});
}
function getHeaders(host, method, path) {
var algorithm = "AWS4-HMAC-SHA256";
var region = "us-east-1";
var serviceName = "execute-api";
var secretKey = process.env.AWS_SECRET_ACCESS_KEY;
var accessKey = process.env.AWS_ACCESS_KEY_ID;
var contentType = "application/x-amz-json-1.0";
var now = new Date();
var amzdate = now
.toJSON()
.replace(/[-:]/g, "")
.replace(/\.[0-9]*/, "");
var datestamp = now.toJSON().replace(/-/g, "").replace(/T.*/, "");
var canonicalHeaders = `content-type:${contentType}\nhost:${host}\nx-amz-date:${amzdate}\n`;
var signedHeaders = "content-type;host;x-amz-date";
var payloadHash = crypto.createHash("sha256").update("").digest("hex");
var canonicalRequest = [
method,
path,
canonicalHeaders,
signedHeaders,
payloadHash
].join("/n");
var credentialScope = [datestamp, region, serviceName, "aws4_request"].join(
"/"
);
const sha56 = crypto
.createHash("sha256")
.update(canonicalRequest)
.digest("hex");
var stringToSign = [algorithm, amzdate, credentialScope, sha56].join("\n");
var signingKey = getSignatureKey(secretKey, datestamp, region, serviceName);
var signature = crypto
.createHmac("sha256", signingKey)
.update(stringToSign)
.digest("hex");
var authorization = `${algorithm} Credential=${accessKey}/${credentialScope}, SignedHeaders=${signedHeaders}, Signature=${signature}`;
return { amzdate, authorization, contentType };
}

Batching in nodejs

Very new to nodejs, I have a file which contains only single column jobid. And I am iterating one by one job id and sending requesting to a service which give me the status of the job in a json format. Reading the json response and fetching few values from it and writing to the database. And I want write when it reach the 100th(with the help of a counter) jobid(because I am expecting more than 100 jobid in the file, also this is dynamic).
For example if I have 234 records, then it will write 3 times, first two 100 each and third one with 34. And the jobStatusMetrics array should be cleaned every write.
const fileStatusprocess = require('../controller/readResultFile');
const config = require('../config/config');
const https = require('https');
const uuid = require('uuid-random');
async function jobProcesser() {
const iterator = (await fileStatusprocess.processResultFile("C:\Support\result.csv"))
console.log('Total jobs are',iterator[0].length);
var counter = 0 ;
for (i = 0; i < iterator[0].length; i++) {
counter++;
const formJobStatusURL = "https://localhost:8091/api/job/" + iterator[0][i] + "/status";
const option = {
method: 'GET' ,
headers: {
'X-Message-Created-Ts': `${new Date().toISOString()}`,
'X-Transaction-Created-Ts': `${new Date().toISOString()}`,
'X-User-Id': 'PerformanceExecuter',
'X-Client-Id': `${uuid()}`,
'X-Message-Id': `${uuid()}`,
'X-Transaction-Id': `${uuid()}`,
'Content-Type': 'application/json'
}
}
let content = '';
let reqGet = https.request(formJobStatusURL,option, function (response) {
response.on('data', function (data) {
content += data;
});
response.on('end', function () {
const jsonPayload = JSON.parse(content);
const jobStatusMetrics = {};
for ( var key in jsonPayload){
if(jsonPayload.hasOwnProperty(key)){
jobStatusMetrics.job_id = jsonPayload.id;
jobStatusMetrics.status = jsonPayload.status;
jobStatusMetrics.initiatedBy = jsonPayload.initiatedBy;
jobStatusMetrics.product = jsonPayload.product;
jobStatusMetrics.operation = jsonPayload.operation;
jobStatusMetrics.startTimestamp = jsonPayload.startTimestamp;
jobStatusMetrics.endTimestamp = jsonPayload.endTimestamp;
jobStatusMetrics.totalRecords = jsonPayload.file.totalRecords;
jobStatusMetrics.failedRecords = jsonPayload.file.totalFailedRecords;
jobStatusMetrics.sucessRecords = jsonPayload.file.totalSuccessRecords;
jobStatusMetrics.inprogressRecords = jsonPayload.file.totalInProgressRecords;
jobStatusMetrics.sucessStatus = jsonPayload.results.successFileAvailable;
jobStatusMetrics.failureStatus = jsonPayload.results.failureFileAvailable;
jobStatusMetrics.uploadJob = jsonPayload.actionsAvailable.dataUploadAllowed;
jobStatusMetrics.abortJob = jsonPayload.actionsAvailable.abortJob;
}
if ( counter%100 == 0) {
console.log('Writting to the database')
// logic for influxdb writter
}
else {
}
}
//console.log(jobStatusMetrics);
})
});
reqGet.end();
}
}
jobProcesser()
Rather to use https for http request please use another library like phin-retry. Please check the below code, added the logic. Hope this work.
const rp = require('phin-retry');
const fileStatusprocess = require('../controller/readResultFile');
const config = require('../config/config');
const uuid = require('uuid-random');
const influxDBWrite = require('../controller/influxdbWritter')
async function checkFileStatus() {
const iterator = (await fileStatusprocess.processResultFile("C:\Support\result.csv"));
const jobIds = iterator[0];
console.log(jobIds)
let jobDetailsList = [];
for (i = 0; i < jobIds.length; i++) {
console.log('JobId', i)
const jobId = jobIds[i];
const formJobStatusURL = "https://localhost:8091/api/job/" + jobId + "/status";
const jobDetails = await rp.get({
url: formJobStatusURL,
headers: {
'X-Message-Created-Ts': `${new Date().toISOString()}`,
'X-Transaction-Created-Ts': `${new Date().toISOString()}`,
'X-User-Id': 'PerformanceExecuter',
'X-Client-Id': `${uuid()}`,
'X-Message-Id': `${uuid()}`,
'X-Transaction-Id': `${uuid()}`,
'Content-Type': 'application/json'
}
});
jobDetailsList.push(jobDetails);
if ((i + 1) % 5 === 0) {
await saveResultsToInfux(jobDetailsList);
jobDetailsList = [];
}
}
await saveResultsToInfux(jobDetailsList);
}
async function saveResultsToInfux(jobDetailsList) {
const metrics = [];
for (i = 0; i < jobDetailsList.length; i++) {
console.log('Metrics', i)
const jobDetail = jobDetailsList[i];
const tags = {
Id: jobDetail.id,
Status: jobDetail.status.overall,
Product: jobDetail.product,
Entity: jobDetail.entity,
Operation: jobDetail.operation
}
const fields = {
startTimestamp:jobDetail.startTimestamp,
endTimestamp:jobDetail.endTimestamp,
totalRecords:jobDetail.file.totalRecords,
totalFailedRecords:jobDetail.file.totalFailedRecords,
totalSuccessRecords:jobDetail.file.totalSuccessRecords,
totalInProgressRecords:jobDetail.file.totalInProgressRecords,
successFileAvailable:jobDetail.results.successFileAvailable,
failureFileAvailable:jobDetail.results.failureFileAvailable,
dataUploadAllowed:jobDetail.actionsAvailable.dataUploadAllowed,
abortJob:jobDetail.actionsAvailable.abortJob,
}
metrics.push({
measurement: 'BulkData',
tags,
fields
});
}
influxDBWrite.dbWritter(metrics);
}
checkFileStatus()

Waiting for one callback to be done (to get data) before the other

I am trying to get MedGuideURL to be used in the 2nd callback but it's value is empty.It seems the second callback is always happening before the first one is done. I am thinking of using Promise/Observable but is there an easier way?
var qrImage = require('qr-image');
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({region:'us-west-2'});
exports.handler = function(event, context, callback){
var path = event.path;
var drugId = path.replace(/\//g, '');
var MedGuideURL = "";
var params = {
TableName: 'QRCodeInfo',
Key: {
"DrugId" : drugId
}
};
docClient.get(params, function(err,data) {
if (err) {
callback(err,null);
} else {
console.log("The data is: "+ data.Item.MedGuideURL); //correct value
callback(null,data);
MedGuideURL = data.Item.MedGuideURL;
}
});
callback(null, sendRes(200, MedGuideURL)); //MedGuideURL is empty!
};
const sendRes = (status, body) => {
//console.log(body);
const svg_string = qrImage.imageSync(body, { type: 'svg', size: 10 });
var response = {
statusCode: status,
headers: {
"Content-Type": "image/svg+xml"
},
body: svg_string
};
return response;
};
You need to first understand async behavior.
You have doing callback before method get complete
You can change get with promise
Example
if sendResis promise method
exports.handler = function (event, context, callback) {
var path = event.path;
var drugId = path.replace(/\//g, '');
var MedGuideURL = "";
var params = {
TableName: 'QRCodeInfo',
Key: {
"DrugId": drugId
}
};
docClient.get(params).promise().then((data) => {
console.log('success' + x);
console.log("The data is: " + data.Item.MedGuideURL);
MedGuideURL = data.Item.MedGuideURL;
return sendRes(200, MedGuideURL); //if sendResis promise method
}).then((finalResponse) => {
callback(null, finalResponse);
})
.catch((err) => {
callback(err, null);
})
};
IF sendResis is not promise :
exports.handler = function (event, context, callback) {
var path = event.path;
var drugId = path.replace(/\//g, '');
var MedGuideURL = "";
var params = {
TableName: 'QRCodeInfo',
Key: {
"DrugId": drugId
}
};
docClient.get(params).promise().then((data) => {
console.log('success' + x);
console.log("The data is: " + data.Item.MedGuideURL);
MedGuideURL = data.Item.MedGuideURL;
let finalResponse = sendRes(200, MedGuideURL); //if sendResis not promise method
return callback(null, finalResponse);
})
.catch((err) => {
callback(err, null);
})
};

Request body sometimes null

The request body sometimes (less than 1% of the time) is null when pulled into lambda. I am processing on the order of 14,000 request bodies one at a time. Any request bodies erring out have to be handled manually. Why is the body randomly coming in null?
Sample code ran from prompt (node index):
const async = require('async');
const _ = require('lodash');
const moment = require('moment');
const Client = require('node-rest-client').Client;
const fs = require('fs');
const input = require('./TestFull.json');
module.exports = () => {
const filename = `./eventfulFails-${new moment().format("YYYY-MM-DD-HHmmss")}.json`;
console.log('Start Time: ', new moment().format("HH:mm:ss"));
let failedObjects = {
events: [],
venues: [],
performers: []
};
async.parallel([
async.apply(processVenues, input.venues, failedObjects),
async.apply(processPerformers, input.performers, failedObjects)
], (lookupErr) => {
if (lookupErr) {
return console.error('Error processing venues and performers.', lookupErr);
}
console.log('Start Events: ', new moment().format("HH:mm:ss"));
async.waterfall([
async.apply(processEvents, input.events, failedObjects)
], (eventErr) => {
if (eventErr) {
console.log('Time of Failure: ', new moment().format("HH:mm:ss"));
return console.error('Error processing events.', eventErr);
}
console.log('End Time: ', new moment().format("HH:mm:ss"));
if (failedObjects.events.length || failedObjects.venues.length || failedObjects.performers.length) {
const stream = fs.createWriteStream(filename);
stream.once('open', function(fd) {
stream.write(JSON.stringify(failedObjects));
stream.end();
});
}
});
});
};
function processVenues(venues, failedObjects, callback) {
const calls = [];
for (let i = 0; i < venues.length; i++) {
const v = venues[i];
calls.push(async.apply((venue, postCallback) => {
const client = new Client();
const args = {
data: venue,
headers: {"Content-Type": "application/json"}
};
client.post('https://hm1br4yo34.execute-api.us-west-2.amazonaws.com/dev/eventful-venue', args, (data, response) => {
if (response.statusCode !== 200 && response.statusCode !== 201) {
failedObjects.venues.push({
venue,
response
});
console.log('venue status code: ', response);
console.log('venue data: ', venue);
}
return postCallback(null);
});
}, v));
}
async.waterfall(calls, callback);
}
function processPerformers(performers, failedObjects, callback) {
const calls = [];
for (let i = 0; i < performers.length; i++) {
const v = performers[i];
calls.push(async.apply((performer, postCallback) => {
const client = new Client();
const args = {
data: performer,
headers: {"Content-Type": "application/json"}
};
client.post('https://hm1br4yo34.execute-api.us-west-2.amazonaws.com/dev/eventful-performer', args, (data, response) => {
if (response.statusCode !== 200 && response.statusCode !== 201) {
failedObjects.performers.push({
performer,
response
});
console.log('performer status code: ', response);
console.log('performer data: ', performer);
}
return postCallback(null);
});
}, v));
}
async.waterfall(calls, callback);
}
function processEvents(events, failedObjects, callback) {
const calls = [];
for (let i = 0; i < events.length; i++) {
const v = events[i];
calls.push(async.apply((event, postCallback) => {
const client = new Client();
const args = {
data: event,
headers: {"Content-Type": "application/json"}
};
client.post('https://hm1br4yo34.execute-api.us-west-2.amazonaws.com/dev/eventful', args, (data, response) => {
if (response.statusCode !== 200 && response.statusCode !== 201) {
failedObjects.events.push({
event,
response
});
console.log('event status code: ', response);
console.log('event data: ', event);
}
return postCallback(null);
});
}, v));
}
async.waterfall(calls, callback);
}
if (!module.parent) {
module.exports();
}
Code of function processVenues (eventful-venue-load) that is called:
const _ = require('lodash');
const AWS = require('aws-sdk');
const async = require('async');
const sdk = require('#consultwithmikellc/withify-sdk');
const host = process.env.aurora_host;
const user = process.env.aurora_user;
const database = process.env.aurora_database;
let decryptedPassword;
const lambda = new AWS.Lambda({
region: 'us-west-2' //your region
});
class WithifyEventCreate extends sdk.Lambda {
constructor(event, context, keysToDecrypt) {
super(event, context, keysToDecrypt);
this.getLocation = this.getLocation.bind(this);
this.insertLocations = this.insertLocations.bind(this);
this.insertLocationImages = this.insertLocationImages.bind(this);
}
decryptedKey(key, value) {
switch (key) {
case 'aurora_password':
decryptedPassword = value;
break;
}
}
initializeComplete() {
this.connect(host, user, decryptedPassword, database, true);
}
connectComplete() {
async.waterfall(
[
this.getLocation,
this.insertLocations,
this.insertLocationImages
]
);
}
getLocation(callback) {
const {id: eventfulLocationID} = this.body;
this.connection.query('SELECT * FROM `Location` WHERE `eventfulLocationID` = ?',
[eventfulLocationID],
(err, results) => {
if (err) {
// error call block
return this.sendResponse(err, this.createResponse(500));
} else if (results.length === 1) {
console.log('Invoking withify-eventful-venue-update...');
lambda.invoke({
FunctionName: 'withify-eventful-venue-update',
Payload: JSON.stringify(this.event)
}, (error, data) => {
return this.sendResponse(null, JSON.parse(data.Payload));
});
} else if (results.length > 1) {
return this.sendResponse(`The location lookup produced multiple results. event:${JSON.stringify(this.body)}`, this.createResponse(500));
} else {
return callback(null);
}
}
);
}
insertLocations(callback) {
const {name: locationName, address: street, city, region_abbr: state, postal_code,
description, id: eventfulLocationID, latitude: lat, longitude: lng, withdrawn: locationWithdrawn} = this.body;
let addresses = street.concat(', ', city, ', ', state, ', ', postal_code);
if (!description.length){
var phones = "";
}else{
var re = /(([\(][0-9]{3}[\)][\s][0-9]{3}[-][0-9]{4})|([0-9]{3}[-][0-9]{3}[-][0-9]{4})|([0-9]{3}[\.][0-9]{3}[\.][0-9]{4}))/i;
this.body.found = description.match(re);
if (!this.body.found){
var phone = "";
}else{
if (!this.body.found.length){
var phone = "";
}else{
var phone = this.body.found[0];
}
}
}
this.connection.query('INSERT IGNORE INTO `Location` (`locationName`, `address`, ' +
'`phone`, `lat`, `lng`, `eventfulLocationID`, `locationWithdrawn`) VALUES (?, ?, ?, ?, ?, ?, ?)',
[locationName, addresses, phone, lat, lng, eventfulLocationID, locationWithdrawn],
(err, results) => {
if (err) {
return this.sendResponse(err, this.createResponse(500));
}
this.body.locationID = results.insertId;
return callback(null);
}
);
}
insertLocationImages(callback) {
var altText = "";
const images = _.flatten(this.body.images.map(im => {
return _.map(im.sizes, (ims, idx) => {
const title = `Image ${idx}`;
return [
this.body.locationID,
this.body.name,
ims.url,
null,
null,
this.body.id,
ims.width,
ims.height
];
});
}));
if(!images[0]){
return this.sendResponse(null, this.createResponse(201, this.body));
}
this.connection.query('INSERT IGNORE INTO `LocationImage` (`locationID`, `imageTitle`, `imageUrl`, ' +
'`imageName`, `altText`, `eventfulLocationID`, `width`, `height`) VALUES ?',
[images],
(err, results) => {
if (err) {
return this.sendResponse(err, this.createResponse(500));
} else if (results.affectedRows !== images.length) {
return this.sendResponse('The image inserts did not affect the right number' +
' of rows.', this.createResponse(500));
}
return this.sendResponse(null, this.createResponse(201, this.body));
}
);
}
}
exports.handler = (event, context) => {
const withifyEventCreate = new WithifyEventCreate(event, context, ['aurora_password']);
withifyEventCreate.initialize([decryptedPassword]);
};

How do I batch delete with DynamoDB?

I am getting an error that "The provided key element does not match the schema". uuid is my primary partition key. I also have a primary sort key for version. I figured I can use batchWrite (docs) to delete all items with same uuid.
My ES6 code is as follows:
delete(uuid) {
const promise = new Promise();
const params = {
RequestItems: {
[this.TABLE]: [
{
DeleteRequest: {
Key: { uuid: uuid }
}
}
]
}
};
// this._client references the DocumentClient
this._client.batchWrite(params, function(err, data) {
if (err) {
// this gets hit with error
console.log(err);
return promise.reject(err);
}
console.log(result);
return promise.resolve(result);
});
return promise;
}
Not sure why it is erroring on the key that is the primary. I have seen posts about needing other indexes for times when I am searching by something that isn't a key. But I don't believe that's the case here.
Here is the batch write delete request sample. This code has been tested and working fine. If you change this code for your requirement, it should work.
Table Definition:-
Bag - Table Name
bag - Hash Key
No partition key in 'Bag' table
Batch Write Code:-
var AWS = require("aws-sdk");
AWS.config.update({
region : "us-west-2",
endpoint : "http://localhost:8000"
});
var documentclient = new AWS.DynamoDB.DocumentClient();
var itemsArray = [];
var item1 = {
DeleteRequest : {
Key : {
'bag' : 'b1'
}
}
};
itemsArray.push(item1);
var item2 = {
DeleteRequest : {
Key : {
'bag' : 'b2'
}
}
};
itemsArray.push(item2);
var params = {
RequestItems : {
'Bag' : itemsArray
}
};
documentclient.batchWrite(params, function(err, data) {
if (err) {
console.log('Batch delete unsuccessful ...');
console.log(err, err.stack); // an error occurred
} else {
console.log('Batch delete successful ...');
console.log(data); // successful response
}
});
Output:-
Batch delete successful ...
{ UnprocessedItems: {} }
This is doable with Node lambda, but there are a few things you need to consider to address concurrency while processing large databases:
Handle paging while querying all of the matching elements from a secondary index
Split into chunks of 25 requests as per BatchWrite/Delete requirements https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
Above 40,000 matches you might need a 1 second delay between cycles https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html
Here a snipped that I wrote:
const AWS = require("aws-sdk");
const dynamodb = new AWS.DynamoDB.DocumentClient();
const log = console.log;
exports.handler = async (event) => {
log(event);
let TableName = event.tableName;
let params = {
let TableName,
FilterExpression: "userId = :uid",
ExpressionAttributeValues: {
":uid": event.userId,
},
};
let getItems = async (lastKey, items) => {
if (lastKey) params.ExclusiveStartKey = lastKey;
let resp = await dynamodb.scan(params).promise();
let items = resp.Items.length
? items.concat(resp.Items.map((x) => x.id))
: items;
if (resp.LastEvaluatedKey)
return await getItems(resp.LastEvaluatedKey, items);
else return items;
};
let ids = await getItems(null, []);
let idGroups = [];
for (let i = 0; i < ids.length; i += 25) {
idGroups.push(ids.slice(i, i + 25));
}
for (const gs of idGroups) {
let delReqs = [];
for (let id of gs) {
delReqs.push({ DeleteRequest: { Key: { id } } });
}
let RequestItems = {};
RequestItems[TableName] = delReqs;
let d = await dynamodb
.batchWrite({ RequestItems })
.promise().catch((e) => log(e));
}
log(ids.length + " items processed");
return {};
};
Not sure why nobody provided a proper answer.
Here's a lambda I did in nodeJS. It will perform a full scan on the table, then batch delete every 25 items per request.
Remember to change TABLE_NAME.
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({ apiVersion: '2012-08-10' });
//const { TABLE_NAME } = process.env;
TABLE_NAME = "CHANGE ME PLEASE"
exports.handler = async (event) => {
let params = {
TableName: TABLE_NAME,
};
let items = [];
let data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
while (typeof data.LastEvaluatedKey != 'undefined') {
params.ExclusiveStartKey = data.LastEvaluatedKey;
data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
}
let leftItems = items.length;
let group = [];
let groupNumber = 0;
console.log('Total items to be deleted', leftItems);
for (const i of items) {
const deleteReq = {
DeleteRequest: {
Key: {
id: i.id,
},
},
};
group.push(deleteReq);
leftItems--;
if (group.length === 25 || leftItems < 1) {
groupNumber++;
console.log(`Batch ${groupNumber} to be deleted.`);
const params = {
RequestItems: {
[TABLE_NAME]: group,
},
};
await docClient.batchWrite(params).promise();
console.log(
`Batch ${groupNumber} processed. Left items: ${leftItems}`
);
// reset
group = [];
}
}
const response = {
statusCode: 200,
// Uncomment below to enable CORS requests
// headers: {
// "Access-Control-Allow-Origin": "*"
// },
body: JSON.stringify('Hello from Lambda!'),
};
return response;
};
Be aware, that you need to follow instructions:
src: https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
DeleteRequest - Perform a DeleteItem operation on the specified item. The item to be deleted is identified by a Key subelement:
Key - A map of primary key attribute values that uniquely identify the item. Each entry in this map consists of an attribute name and an attribute value. For each primary key, you must provide all of the key attributes. For example, with a simple primary key, you only need to provide a value for the partition key. For a composite primary key, you must provide values for both the partition key and the sort key.
For batch delete, we can use batchWrite with DeleteRequest. Here is an example, in this, we are providing tableName whose data is to be deleted, the payload is an array of ids which we need to remove.
In single request 25 items can be deleted.
const AWS = require('aws-sdk');
const dynamodb= new AWS.DynamoDB.DocumentClient({ apiVersion: '2012-08-10' });
const tableName = "PlayerData";
const payload = [{id=101}, {id=105}, {id=106}];
const deleteBatchData = async (tableName, payload, dynamodb) => {
try {
await dynamodb.batchWrite({
RequestItems: {
[tableName]: payload.map(item => {
return {
DeleteRequest: {
Key: {
id: item.id
}
}
};
})
}
}).
promise().
then((response) => {
return response;
})
.catch((err) => {
console.log("err ::", JSON.stringify(err))
});
} catch (err) {
console.log('Error in deleteBatchData ', err);
}
}
Why not use PartiQL. This approach is much more readable. (This too has a limit of 25 items per request just like BatchWriteITems)
// Import required AWS SDK clients and commands for Node.js.
import { BatchExecuteStatementCommand } from "#aws-sdk/client-dynamodb";
import { ddbDocClient } from "../libs/ddbDocClient.js";
const tableName = process.argv[2];
const movieYear1 = process.argv[3];
const movieTitle1 = process.argv[4];
const movieYear2 = process.argv[5];
const movieTitle2 = process.argv[6];
export const run = async (
tableName,
movieYear1,
movieTitle1,
movieYear2,
movieTitle2
) => {
try {
const params = {
Statements: [
{
Statement: "DELETE FROM " + tableName + " where year=? and title=?",
Parameters: [{ N: movieYear1 }, { S: movieTitle1 }],
},
{
Statement: "DELETE FROM " + tableName + " where year=? and title=?",
Parameters: [{ N: movieYear2 }, { S: movieTitle2 }],
},
],
};
const data = await ddbDocClient.send(
new BatchExecuteStatementCommand(params)
);
console.log("Success. Items deleted.", data);
return "Run successfully"; // For unit tests.
} catch (err) {
console.error(err);
}
};
run(tableName, movieYear1, movieTitle1, movieYear2, movieTitle2);

Resources