Confused with promises in a loop - node.js

I'm writing a dynamodb code that does the below.
Scan a particular column and get the values and add it to an array
convert the array to set and back to array to get the unique values
Loop through this set values as a parameter and get the actual value.
Basically trying to create a group by in DynamoDb.
Here the 1st and 2nd step I'm able to do it. but coming to step 3 I've a loop and inside the loop the code has to be executed and my code is as below.
var AWS = require("aws-sdk");
var creds = new AWS.Credentials('akid', 'secret', 'session');
AWS.config.update({
"accessKeyId": "myAccessId",
"secretAccessKey": "MySecretAccessKey",
"region": "us-east-1"
});
var dynamodb = new AWS.DynamoDB.DocumentClient();
var params = {
TableName: "MyTable",
FilterExpression: "#target_state = :target_state",
ExpressionAttributeNames: {
"#target_state": "target_state"
},
ExpressionAttributeValues: {
":target_state": "5"
}
};
var array = [];
dynamodb.scan(params).promise().then(function (data) {
data.Items.forEach(function (itemData) {
array.push(itemData.ruleNo)
});
console.log(array);
return array;
}).then(() => {
console.log("Entered 2nd block " + [...new Set(array)]);
var array2 = [...new Set(array)];
for (index = 0; index < array2.length; ++index) {
console.log(array2[index]);
var params1 = {
TableName: "ChemicalData",
FilterExpression: "#target_state = :target_state and #ruleNo=:ruleNo",
ExpressionAttributeNames: {
"#target_state": "target_state",
"#ruleNo": "ruleNo"
},
ExpressionAttributeValues: {
":target_state": "5",
":ruleNo": array2[index]
}
};
return dynamodb.scan(params1).promise().then(function (data) {
var uw = JSON.stringify((data.Items));
return uw;
});
}
}).then((data) => {
console.log(data);
}).catch(err => {
console.log(err)
})
when I run this program, the result that I get is only one value, and that is the first array value, I'm unable to know on how can I loop through all the array variables and then do a console.log(data). please let me know on where am I going wrong and how can I fix this.
Thanks

Using return inside for breaks the loop. You should gather promises from inner scan into array and use Promise.all to resolve then together
dynamodb.scan(params).promise().then(function (data) {
data.Items.forEach(function (itemData) {
array.push(itemData.ruleNo)
});
console.log(array);
return array;
}).then(() => {
console.log("Entered 2nd block " + [...new Set(array)]);
var array2 = [...new Set(array)];
var results = []; //results array
for (index = 0; index < array2.length; ++index) {
console.log(array2[index]);
var params1 = {
TableName: "ChemicalData",
FilterExpression: "#target_state = :target_state and #ruleNo=:ruleNo",
ExpressionAttributeNames: {
"#target_state": "target_state",
"#ruleNo": "ruleNo"
},
ExpressionAttributeValues: {
":target_state": "5",
":ruleNo": array2[index]
}
};
// push results to be resolved later
results.push(dynamodb.scan(params1).promise().then(function (data) {
var uw = JSON.stringify((data.Items));
return uw;
}));
}
// return promise that resolves when all results resolve
return Promise.all(results);
})

Related

How to implement async in for loop?

I have a collection called 'alldetails' which have the details of some collection
{
"name" : "Test1",
"table_name" : "collection1",
"column_name" : "column1"
},
{
"name" : "Test2",
"table_name" : "collection2",
"column_name" : "column2"
},
{
"name" : "Test3",
"table_name" : "collection3",
"column_name" : "column3"
}
I have collection1,collection2 and collection3 which have column1,column2,colum3 respectively
I have to fetch all the name from the 'alldetails' and I have to get the min and max value of other table based on the column name.
So I want the output like below
{name: ["Test1","Test2","Test3"],
date: [{min_date: "2018-12-01", max_date: "2018-12-31", name: "Test1"},
{min_date: "2018-12-01", max_date: "2018-12-31", name: "Test2"},
{min_date: "2018-12-01", max_date: "2018-12-31", name: "Test3"}]
}
I tried the below code because of non blocking its not waiting the response.
alldetails.find({}, { _id: 0 }).then(async function(result) {
let result_data = {};
let resolvedFinalArray = {};
let array = [];
result_data["name"]= [];
result_data["date"] = [];
resolvedFinalArray = await Promise.all(result.map(async value => {
result_data["name"].push(value.name)
getResult(value.table_name,value.column_name,function(response){
result_data["date"].push({min_date: response.minvalue, max_date: response.maxvalue, name:value.name})
});
}));
setTimeout(function()
{
console.log(resolvedFinalArray);
}, 3000);
});
Please suggest me a solution.
If you want to wait for getResult then you need to return Promise from result.map callback.
You are not pushing anything to resolvedFinalArray so why bother with console.log(resolvedFinalArray)
alldetails.find({}, {_id: 0}).then(async (result) => {
let result_data = {};
result_data["name"] = [];
result_data["date"] = [];
await Promise.all(result.map(value => {
// create Promise that resolves where getResult callback is fired
return new Promise((resolve) => {
getResult(value.table_name, value.column_name, (response) => {
result_data["name"].push(value.name);
result_data["date"].push({
min_date: response.minvalue,
max_date: response.maxvalue,
name: value.name
});
resolve();
});
});
}));
console.log(result_data);
});
or using for loop
alldetails.find({}, {_id: 0}).then(async (result) => {
let result_data = {};
result_data["name"] = [];
result_data["date"] = [];
for (let i = 0; i < result.length; i++) {
const value = result[i];
await new Promise((resolve) => {
getResult(value.table_name, value.column_name, (response) => {
result_data["name"].push(value.name);
result_data["date"].push({
min_date: response.minvalue,
max_date: response.maxvalue,
name: value.name
});
resolve();
});
});
}
console.log(result_data);
});
use async.eachOfLimit if you want to apply an async function on all element of an array:
var async = require("async");
var array = [{_id: "...."},{...},{...}];
async.eachOfLimit(array, 1, function(element, index, cb){
myAsyncFunctionWithMyElement(element, function(err){
return cb(err);
});
}, function(err){
// final callback
});
The array forEach method won't work with async function (unless you do deeply evil things like redefining the prototype). This question has a nice insight of the internal.
If you don't want to rely on external libraries, an easy (and my favourite) approach is something like:
for (let i = 0; i < <your array>.length; i++ ) {
await Promise.all( <your logic> );
}
Just adapt it to your need! :)
You might want to use the for await of loop. See this blog post for details.
This, IMHO, is the most modern way to do it, and it doesn't require you to load any external dependencies, since it is built-in to the language itself. It's basically very similar to the classical for of loop.
This should work, if all lexical scope are taken to consideration. Async each is also is better option it would reduce if else blocks and manage promise for you.
alldetails.find({}, { _id: 0 })
.exec((err, result) => {
if (!err) {
let resolvedFinalArray = [];
result.map((value) => {
resolvedFinalArray.push({
name: value.name,
date: []
});
getResult(value.table_name, value.column_name, (err, response) => {
if (!err) {
resolvedFinalArray[resolvedFinalArray.indexOf(value.name)]['date'].push({
min_date: response.minvalue,
max_date: response.maxvalue,
name:value.name
});
} else {
// Send your error messsage.
// res.status(500).send(err);
}
});
});
console.log(resolvedFinalArray);
// res.send(resolvedFinalArray);
} else {
// Send your error messsage.
// res.status(500).send(err);
}
});

Delete all items in Dynamodb using Lambda?

Using Lambda (node.js) - how to delete all the items in the Dynamodb table?
There are 500K rows in the table
I have tried using scan method and then loop through each item and then using delete method. It only allow up to 3000 rows only.
Code
exports.handler = function(context, callback) {
getRecords().then((data) => {
data.Items.forEach(function(item) {
deleteItem(item.Id).then((data1) => {
});
});
});
};
var deleteItem = function(id) {
var params = {
TableName: "TableName",
Key: {
"Id": id
},
};
return new Promise(function(resolve, reject) {
client.delete(params, function(err, data) {
if (err) {
reject(err);
} else {
resolve();
}
});
});
}
function getRecords() {
var params = {
TableName: 'TableName',
IndexName: 'Type-index',
KeyConditionExpression: 'Type = :ty',
ExpressionAttributeValues: {
':ty': "1"
},
ProjectionExpression: "Id",
};
return new Promise(function(resolve, reject) {
client.query(params, function(err, data) {
if (err) {
reject(err);
} else {
resolve(data);
}
});
});
}
There is already one right answer, but here is another code snippet to delete all records from Dynamo DB.
const AWS = require("aws-sdk");
AWS.config.update({
region: "us-east-1",
});
const docClient = new AWS.DynamoDB.DocumentClient();
const getAllRecords = async (table) => {
let params = {
TableName: table,
};
let items = [];
let data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
while (typeof data.LastEvaluatedKey != "undefined") {
params.ExclusiveStartKey = data.LastEvaluatedKey;
data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
}
return items;
};
const deleteItem = (table, id) => {
var params = {
TableName: table,
Key: {
id: id,
},
};
return new Promise(function (resolve, reject) {
docClient.delete(params, function (err, data) {
if (err) {
console.log("Error Deleting ", id,err);
reject(err);
} else {
console.log("Success Deleting ", id,err);
resolve();
}
});
});
};
exports.handler = async function (event, context, callback) {
try {
const tableName = "<table>";
// scan and get all items
const allRecords = await getAllRecords(tableName);
// delete one by one
for (const item of allRecords) {
await deleteItem(tableName, item.id);
}
callback(null, {
msg: "All records are deleted.",
});
} catch (e) {
callback(null, JSON.stringify(e, null, 2));
}
};
A Scan operation consumes Read capacity. Each Read returns up to 4 kb of data. When this limit is reached, the Scan returns only what it has found until there. If you need more, you need to issue another Scan request.
This, you'll need two loops: 1) loop to delete all records returned at each Scan; 2) loop to keep scanning multiple times, until you reach the end of the table
Make sure you use consistent Reads or wait 1 or 2 second(s) before issuing another Scan, otherwise you may get repeated items in different Scans.
exports.handler = function(context, callback) {
clearRecords();
};
clearRecords = function() {
getRecords().then((data) => {
data.Items.forEach(function(item) {
deleteItem(item.Id).then((data1) => {});
});
clearRecords(); // Will call the same function over and over
});
}
Observe that Lambda has a timeout limit of 15 minutes. Since you have 500K items in your table, it's likely that your Lambda will timeout and you'll need to trigger it more than once. You could also make your Lambda call itself after 14:50, for example, just take a look at the AWS SDK documentation for triggering Lambda functions. For this matter, you might also want to check the getRemainingTimeInMillis() method from the context object.

access values after authentication in node js

I've a program that does the below.
Look into a DynamoDB table.
Get the data from the table.
Save the variables in session
After the process, print the values in console.
My code is as below.
intentHandlers['GetMYBusinessInfo'] = function (request, session, response, slots) {
console.log('entered ext bloxk');
if (!session.attributes.userName) {
console.log('eneterd the user entered the block');
var userName = 'jim';
isUserRegistered(userName.toLowerCase(), function (res, err) {
if (err) {
response.fail(err);
console.log(err);
}
else if (!res) {
response.shouldEndSession = true;
}
else {
console.log(res);
var countRes = JSON.stringify(res.Count);
var unpUserRegion = JSON.stringify(res.Items[0].Region);
var unpUserCity = JSON.stringify(res.Items[0].State);
var userRegion = JSON.parse(unpUserRegion);
var userCity = JSON.parse(unpUserCity);
session.attributes.city = userCity;
session.attributes.region = userRegion;
console.log("parsed " + countRes + "\t region is " + userRegion);
session.attributes.userName = true;
}
});
}
console.log(`session values after authentication are user city is ${session.attributes.city}`);
}
The method to check if the value is in DynamoDb or not.
function isUserRegistered(userName, callback) {
var params = {
TableName: "usersTable",
FilterExpression: "#nme = :nme",
ExpressionAttributeNames: {
"#nme": "Name",
},
ExpressionAttributeValues: {
":nme": userName
}
};
var count = 0;
docClient.scan(params, function (err, data) {
if (err) {
console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
callback(false, err);
} else {
console.log("Scan succeeded." + data.Items.length);
if (data.Items.length === 0) {
callback(false);
}
else {
data.Items.forEach(function (itemData) {
console.log("Item :", ++count, JSON.stringify(itemData));
});
callback(data);
}
}
});
}
when I run this, the output that I get is:
session values after authentication are user city is undefined
Scan succeeded.1
Item : 1
{
"ID": "3",
"State": "wisconsin",
"Region": "midwest",
"Name": "jim"
}
{ Items: [ { ID: '3', State: 'wisconsin', Region: 'midwest', Name: 'jim' } ],
Count: 1,
ScannedCount: 1 }
parsed 1 region is midwest
Here I know that Node js being Non-blockable process, the above output is correct, but I want to get the value of city printed in session values after authentication are user city is {HereTheCityComes} instead of session values after authentication are user city is undefined.
I'm sure that placing the console.log(session values after authentication are user city is ${session.attributes.city}); in the last else block(place where the data is returned).
But I need this type of functionality(Get data as shown in my current scenario), as there is some other things to be done after checking if the user is available in database.
please let me know where am I going wrong and how can I fix this.
You can't synchronously expect async result.
What you can do here is solve your problem with promises.
Here is a solution:
intentHandlers['GetMYBusinessInfo'] = function(request, session, response, slots) {
console.log('entered ext bloxk');
var userPromise = Promise.resolve();
if (!session.attributes.userName) {
console.log('eneterd the user entered the block');
var userName = 'jim';
userPromise = new Promise(function (resolve, reject) {
isUserRegistered(userName.toLowerCase(), function (res, err) {
if (err) {
response.fail(err);
reject(err);
}
var countRes = JSON.stringify(res.Count);
var unpUserRegion = JSON.stringify(res.Items[0].Region);
var unpUserCity = JSON.stringify(res.Items[0].State);
var userRegion = JSON.parse(unpUserRegion);
var userCity = JSON.parse(unpUserCity);
session.attributes.city = userCity;
session.attributes.region = userRegion;
console.log("parsed " + countRes + "\t region is " + userRegion);
resolve(res);
});
});
}
userPromise.then(function () {
console.log(`session values after authentication are user city is ${session.attributes.city}`);
});
}
If you are not using ES6, then just install bluebird and use var Promise = require('bluebird')

How do I batch delete with DynamoDB?

I am getting an error that "The provided key element does not match the schema". uuid is my primary partition key. I also have a primary sort key for version. I figured I can use batchWrite (docs) to delete all items with same uuid.
My ES6 code is as follows:
delete(uuid) {
const promise = new Promise();
const params = {
RequestItems: {
[this.TABLE]: [
{
DeleteRequest: {
Key: { uuid: uuid }
}
}
]
}
};
// this._client references the DocumentClient
this._client.batchWrite(params, function(err, data) {
if (err) {
// this gets hit with error
console.log(err);
return promise.reject(err);
}
console.log(result);
return promise.resolve(result);
});
return promise;
}
Not sure why it is erroring on the key that is the primary. I have seen posts about needing other indexes for times when I am searching by something that isn't a key. But I don't believe that's the case here.
Here is the batch write delete request sample. This code has been tested and working fine. If you change this code for your requirement, it should work.
Table Definition:-
Bag - Table Name
bag - Hash Key
No partition key in 'Bag' table
Batch Write Code:-
var AWS = require("aws-sdk");
AWS.config.update({
region : "us-west-2",
endpoint : "http://localhost:8000"
});
var documentclient = new AWS.DynamoDB.DocumentClient();
var itemsArray = [];
var item1 = {
DeleteRequest : {
Key : {
'bag' : 'b1'
}
}
};
itemsArray.push(item1);
var item2 = {
DeleteRequest : {
Key : {
'bag' : 'b2'
}
}
};
itemsArray.push(item2);
var params = {
RequestItems : {
'Bag' : itemsArray
}
};
documentclient.batchWrite(params, function(err, data) {
if (err) {
console.log('Batch delete unsuccessful ...');
console.log(err, err.stack); // an error occurred
} else {
console.log('Batch delete successful ...');
console.log(data); // successful response
}
});
Output:-
Batch delete successful ...
{ UnprocessedItems: {} }
This is doable with Node lambda, but there are a few things you need to consider to address concurrency while processing large databases:
Handle paging while querying all of the matching elements from a secondary index
Split into chunks of 25 requests as per BatchWrite/Delete requirements https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
Above 40,000 matches you might need a 1 second delay between cycles https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html
Here a snipped that I wrote:
const AWS = require("aws-sdk");
const dynamodb = new AWS.DynamoDB.DocumentClient();
const log = console.log;
exports.handler = async (event) => {
log(event);
let TableName = event.tableName;
let params = {
let TableName,
FilterExpression: "userId = :uid",
ExpressionAttributeValues: {
":uid": event.userId,
},
};
let getItems = async (lastKey, items) => {
if (lastKey) params.ExclusiveStartKey = lastKey;
let resp = await dynamodb.scan(params).promise();
let items = resp.Items.length
? items.concat(resp.Items.map((x) => x.id))
: items;
if (resp.LastEvaluatedKey)
return await getItems(resp.LastEvaluatedKey, items);
else return items;
};
let ids = await getItems(null, []);
let idGroups = [];
for (let i = 0; i < ids.length; i += 25) {
idGroups.push(ids.slice(i, i + 25));
}
for (const gs of idGroups) {
let delReqs = [];
for (let id of gs) {
delReqs.push({ DeleteRequest: { Key: { id } } });
}
let RequestItems = {};
RequestItems[TableName] = delReqs;
let d = await dynamodb
.batchWrite({ RequestItems })
.promise().catch((e) => log(e));
}
log(ids.length + " items processed");
return {};
};
Not sure why nobody provided a proper answer.
Here's a lambda I did in nodeJS. It will perform a full scan on the table, then batch delete every 25 items per request.
Remember to change TABLE_NAME.
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({ apiVersion: '2012-08-10' });
//const { TABLE_NAME } = process.env;
TABLE_NAME = "CHANGE ME PLEASE"
exports.handler = async (event) => {
let params = {
TableName: TABLE_NAME,
};
let items = [];
let data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
while (typeof data.LastEvaluatedKey != 'undefined') {
params.ExclusiveStartKey = data.LastEvaluatedKey;
data = await docClient.scan(params).promise();
items = [...items, ...data.Items];
}
let leftItems = items.length;
let group = [];
let groupNumber = 0;
console.log('Total items to be deleted', leftItems);
for (const i of items) {
const deleteReq = {
DeleteRequest: {
Key: {
id: i.id,
},
},
};
group.push(deleteReq);
leftItems--;
if (group.length === 25 || leftItems < 1) {
groupNumber++;
console.log(`Batch ${groupNumber} to be deleted.`);
const params = {
RequestItems: {
[TABLE_NAME]: group,
},
};
await docClient.batchWrite(params).promise();
console.log(
`Batch ${groupNumber} processed. Left items: ${leftItems}`
);
// reset
group = [];
}
}
const response = {
statusCode: 200,
// Uncomment below to enable CORS requests
// headers: {
// "Access-Control-Allow-Origin": "*"
// },
body: JSON.stringify('Hello from Lambda!'),
};
return response;
};
Be aware, that you need to follow instructions:
src: https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
DeleteRequest - Perform a DeleteItem operation on the specified item. The item to be deleted is identified by a Key subelement:
Key - A map of primary key attribute values that uniquely identify the item. Each entry in this map consists of an attribute name and an attribute value. For each primary key, you must provide all of the key attributes. For example, with a simple primary key, you only need to provide a value for the partition key. For a composite primary key, you must provide values for both the partition key and the sort key.
For batch delete, we can use batchWrite with DeleteRequest. Here is an example, in this, we are providing tableName whose data is to be deleted, the payload is an array of ids which we need to remove.
In single request 25 items can be deleted.
const AWS = require('aws-sdk');
const dynamodb= new AWS.DynamoDB.DocumentClient({ apiVersion: '2012-08-10' });
const tableName = "PlayerData";
const payload = [{id=101}, {id=105}, {id=106}];
const deleteBatchData = async (tableName, payload, dynamodb) => {
try {
await dynamodb.batchWrite({
RequestItems: {
[tableName]: payload.map(item => {
return {
DeleteRequest: {
Key: {
id: item.id
}
}
};
})
}
}).
promise().
then((response) => {
return response;
})
.catch((err) => {
console.log("err ::", JSON.stringify(err))
});
} catch (err) {
console.log('Error in deleteBatchData ', err);
}
}
Why not use PartiQL. This approach is much more readable. (This too has a limit of 25 items per request just like BatchWriteITems)
// Import required AWS SDK clients and commands for Node.js.
import { BatchExecuteStatementCommand } from "#aws-sdk/client-dynamodb";
import { ddbDocClient } from "../libs/ddbDocClient.js";
const tableName = process.argv[2];
const movieYear1 = process.argv[3];
const movieTitle1 = process.argv[4];
const movieYear2 = process.argv[5];
const movieTitle2 = process.argv[6];
export const run = async (
tableName,
movieYear1,
movieTitle1,
movieYear2,
movieTitle2
) => {
try {
const params = {
Statements: [
{
Statement: "DELETE FROM " + tableName + " where year=? and title=?",
Parameters: [{ N: movieYear1 }, { S: movieTitle1 }],
},
{
Statement: "DELETE FROM " + tableName + " where year=? and title=?",
Parameters: [{ N: movieYear2 }, { S: movieTitle2 }],
},
],
};
const data = await ddbDocClient.send(
new BatchExecuteStatementCommand(params)
);
console.log("Success. Items deleted.", data);
return "Run successfully"; // For unit tests.
} catch (err) {
console.error(err);
}
};
run(tableName, movieYear1, movieTitle1, movieYear2, movieTitle2);

Sorting MapReduce results on MongoDB

I have my MapReduce working correctly to group my results by date. All works well, however I'd like to have the results to be returned from Most Recent to Oldest based on 'created' value.
findTimelineByQuery: function (query, fields, options, callback) {
var obj = {};
obj.map = function() {
emit(Date.UTC(this.created.getFullYear(), this.created.getMonth(), this.created.getDate()), {
created:this.created,
title:this.title,
type: this.type,
id: this._id,
owner: this.owner,
value: this.value
});
};
obj.reduce = function(previous, current) {
var array = [];
var res = {items:array};
current.forEach(function (v) {
res.items.push(v);
});
return res;
};
obj.verbose = true;
obj.query = query;
_Items.mapReduce(obj, function(error, model, stats) {
callback(model);
});
}

Resources