I have a use case where I need to perform a batch_write operation on dynamodb. I referred this article which has a good solution for similar use case. I implemented it with few cleanup in my code and it works as expected.
const _ = require('lodash');
// helper methods
async function asyncForEach(array, cb) {
await Promise.all(array.map(async (item) => {
await cb(item, array);
}));
}
function to(promise) {
return promise.then((data) => [null, data])
.catch((err) => [err]);
}
const call = function (params) {
return dynamoDb.batchWriteItem(params).promise();
};
async function batchWrite25(arrayOf25, tableName) {
// 25 is as many as you can write in one time
const itemsArray = [];
_.forEach(arrayOf25, (item) => {
itemsArray.push({
PutRequest: {
Item: item,
},
});
});
const params = {
RequestItems: {
[tableName]: itemsArray,
},
};
await to(call(params));
}
async function batchWrite(itemArray, tableName) {
let mainIndex = 0;
let subIndex = 0;
let arrayOf25 = [];
const arrayLength = itemArray.length;
await asyncForEach(itemArray, async (item) => {
arrayOf25.push(item);
subIndex += 1;
mainIndex += 1;
// 25 is as many as you can write in one time
if (subIndex % 25 === 0 || mainIndex === arrayLength) {
await to(batchWrite25(arrayOf25, tableName));
subIndex = 0; // reset
arrayOf25 = [];
}
});
}
module.exports = {
batchWrite,
};
However, the code looks a bit complicated here with so many callbacks involved. Is there a cleaner way of writing the same thing without using -- call or asyncForEach or to methods ?
Here's one simple way to batch the items:
const BATCH_MAX = 25;
const batchWrite = async (items, table_name) => {
const BATCHES = Math.floor((items.length + BATCH_MAX - 1) / BATCH_MAX);
for (let batch = 0; batch < BATCHES; batch++) {
const itemsArray = [];
for (let ii = 0; ii < BATCH_MAX; ii++) {
const index = batch * BATCH_MAX + ii;
if (index >= items.length) break;
itemsArray.push({
PutRequest: {
Item: items[index],
},
});
}
const params = {
RequestItems: {
[table_name]: itemsArray,
},
};
console.log("Batch", batch, "write", itemsArray.length, "items");
await dynamodb.batchWriteItem(params).promise();
}
};
To make the entire process asynchronous, you can convert this function to return an array of promises and later call Promise.all(promises) on that array. For example:
const batchWrite = (items, table_name) => {
const promises = [];
const BATCHES = Math.floor((items.length + BATCH_MAX - 1) / BATCH_MAX);
for (let batch = 0; batch < BATCHES; batch++) {
// same code as above here ...
promises.push(dynamodb.batchWriteItem(params).promise());
}
return promises;
};
A much cleaner way using lodash that worked for me is listed below. Hope this helps somone.
batchWrite=async ()=> {
const batchSplitArr=_.chunk(this.dynamoPayload,25); //dynamoPayload has the entire payload in the desired format for dynamodb insertion.
await Promise.all(
batchSplitArr.map(async (item) => {
const params = {
RequestItems: {
[this.tableName]: item,
},
};
await this.dynamoDb.batchWriteItem(params).promise();
})
);
};
Related
I'm learning NodeJs and having some problems using async/ await. I'm using Firebase database to read/write data. Here what i'm doing. (full function in case you need it).
async getImport(reqData: any): Promise<any> {
const username = 'kdat0310';
const db = admin.database();
const userRef = db.ref('/user');
const importRef = db.ref('/import');
const refAuthentication = db.ref('/Authentication');
const keyList = [];
const providerKey = [];
const khoList = [];
let index = 0;
const providerList = [];
const isValid = await refAuthentication.once('value', function (snapshot) {
for (const val of Object.values(snapshot.val())) {
if (
Object(val).username === Object(reqData).username &&
Object(val).token === Object(reqData).token
) {
return true;
}
}
return false;
});
if (isValid) {
await userRef.once('value', function (snapshot) {
for (const value of Object.values(snapshot.val())) {
if (value) {
if (Object(value).username == username) {
for (const val of Object(value).workAt) {
if (val) khoList.push(val.khoId);
}
}
}
}
});
const typeAndColorKey = [];
const typeAndColorValue = [];
const typeAndColorRef = db.ref('/TypeAndColor');
await typeAndColorRef.once('value', function (snapshot) {
let count = 0;
for (const key in snapshot.val()) {
typeAndColorKey.push(key);
}
for (const value of snapshot.val()) {
if (value !== undefined && value != null) {
typeAndColorValue.push({
id: typeAndColorKey[count],
type: value.type,
color: value.color,
});
count = count + 1;
}
}
});
const findTypeAndColor = (id: any) => {
for (const value of typeAndColorValue) {
if (id == value.id) {
return { type: value.type, color: value.color };
}
}
};
const userKey = [];
const userList = [];
await userRef.once('value', function (snapshot) {
let count = 0;
for (const key in snapshot.val()) {
userKey.push(key);
}
for (const value of Object(snapshot.val())) {
if (value != undefined && value != null) {
userList.push({
id: userKey[count],
name: Object(value).name,
});
count++;
}
}
});
const findUserName = (userId: any) => {
const returnValue = '';
for (const value of userList) {
if (userId == Object(value).id) {
return Object(value).name;
}
}
};
const importList = [];
await importRef.once('value', async function (snapshot) {
const importKey = [];
const cloneArr = snapshot.val().map((item: any) => {
return item;
});
for (const key in snapshot.val()) {
importKey.push(key);
}
let countTemp = 0;
for (const value of Object.values(cloneArr)) {
const goodsKeyList = [];
let count = 0;
if (khoList.indexOf(Object(value).warehouseId) !== -1) {
const listGoodsList = [];
if (Object(value).listGoods) {
for (const key in Object(value).listGoods) {
goodsKeyList.push(key);
}
const refListGoods = db.ref(
'/import/' + importKey[countTemp] + '/listGoods',
);
await refListGoods.once('value', function (snapshot) {
let item: any;
for (item of Object.values(snapshot.val())) {
if (item) {
const tempItem = item.filter((n: any) => n);
listGoodsList.push({
typeAndColor: findTypeAndColor(goodsKeyList[count]),
listGoods: tempItem,
number: tempItem.length,
});
}
count++;
}
});
}
console.log('test 1', listGoodsList);
if (listGoodsList !== []) {
importList.push({
listGoods: listGoodsList,
driver: Object(value).driver,
userId: Object(value).importEmployee,
name: findUserName(Object(value).importEmployee),
orderId: Object(value).orderId,
warehouseId: Object(value).warehouseId,
time: Object(value).time,
});
}
}
countTemp++;
}
console.log('test 2', importList);
});
return importList;
}
return 'Invalid';
}
The problem show up when it came to await importRef.once When I tried to handle some data and add the Firebase once function "async" and await inside to push the data I need to the array. Then return importList; return nothing. I figure that the await refListGoods.once cause this problems. As i thought, the await inside had done its duty and I can console.log importList inside very well. But I thought that await importRef.once will finish before return too. when I delete await refListGoods.once, the return is fine but I dont get the data I need. Do I need to refactor all code as I do to findTypeAndColor and findUserName above or there's a better way to solve this problem?
If you want to use await on the Promise returned by once, you should not pass a callback function to it.
So instead of:
const isValid = await refAuthentication.once('value', function (snapshot) {
for (const val of Object.values(snapshot.val())) {
if (
Object(val).username === Object(reqData).username &&
Object(val).token === Object(reqData).token
) {
return true;
}
}
return false;
});
Do:
const snapshot = await refAuthentication.once('value');
let isValid = false;
snapshot.forEach((child) => {
const val = child.val();
if (val.username === Object(reqData).username &&
val.token === Object(reqData).token
) {
isValid = true;
}
})
I want to write a script that divides the lines read from the file into packages of 25, unfortunately the sample package returns 40 codes. I would like to do so that, for example, he divided me into packages of 25 items. I mean, I have, for example, 60 codes, this creates 2 packages of 25, and one with 10 codes. Unfortunately, I can't handle it.
const fs = require('fs');
fs.readFile('code.txt', function (err, data) {
if (err) throw err;
const array = data.toString().split("\n");
let count = 0;
let items = [];
for (let i in array) {
items.push({
PutRequest: {
Item: {
code: array[i]
}
}
});
let params = {
RequestItems: {
'TABLE_NAME': items
}
};
if (count === 25) {
dynamoDB.batchWrite(params, function (err, data) {
if (err) {
console.log(err);
} else {
count = 0;
items = [];
}
});
}else{
count++;
}
}
});
code.txt content
https://0bin.net/paste/NA8-4hkq#1Ohwt5uUkQqE0YscwnxTX2gxEqlvAUVKp1JRipBCsZg
Any idea what I do wrong?
Your dynamoDB.batchWrite() is asynchronous. Thus its callback is executed only after the loop has completed. So items and count are never reset ...
The easiest would be, if you could switch to an promise based approach like the following
const BATCHSIZE = 25;
const fs = require('fs').promises;
async function batchLoad() {
const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
while (lines.length > 0) {
const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
await new Promise((resolve, reject) => {
dynamoDb.batchWrite(params, (err) => {
if (err) return reject(err);
resolve();
});
});
}
}
A callback based approach could look like this
const BATCHSIZE = 25;
fs.readFile("code.txt", "utf-8", (err, data) => {
const lines = data.split("\n");
function writeBatch() {
if (!lines.length) return;
const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
dynamoDb.batchWrite(params, err => {
if (err) ...
else writeBatch();
});
}
writeBatch();
}
The function writeBatch takes a certain number of lines from your original array and writes them into the database. Only afer the write into the DB was successful, it recursively calls itself and handles the next batch. But be aware, that this approach may exceed the maximum call stack size and throw an error.
You can also make either of this approaches not manipulate the lines array (which may be quite expensive), but just get out the current slice
const BATCHSIZE = 25;
const fs = require('fs').promises;
async function batchLoad() {
const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
let currentIndex = 0;
while (currentIndex < lines.length) {
const items = lines.slice(currentIndex, currentIndex + BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
await new Promise((resolve, reject) => {
dynamoDb.batchWrite(params, (err) => {
if (err) return reject(err);
resolve();
});
});
currentIndex += BATCHSIZE;
}
}
and
const BATCHSIZE = 25;
fs.readFile("code.txt", "utf-8", (err, data) => {
const lines = data.split("\n");
function writeBatch(currentIndex) {
if (currentIndex >= lines.length) return;
const items = lines.slice(currentIndex, currentIndex + BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
dynamoDb.batchWrite(params, err => {
if (err) ...
else writeBatch(currentIndex + BATCHSIZE);
});
}
writeBatch(0);
}
To prevent stumbling into a maximum callstack exception you may also add the next batch to the eventloop and not call it recursively. Ie
dynamoDb.batchWrite(params, err => {
if (err) ...
else setTimeout(()=> { writeBatch(currentIndex + BATCHSIZE);}, 0);
});
This way you won't build up a massive callstack from recursive calls.
To keep track of how many records are already saved to the db you could simply store the current counter in a file. When you restart the process, load that file and check how many lines to skip. Don't forget to remove the file, once all records have been saved ... For example with the first approach:
const BATCHSIZE = 25;
const fs = require('fs').promises;
async function batchLoad() {
const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
const skipLines = 0;
try {
skipLines = +(await fs.readFile("skip.txt", "utf-8"));
if (isNaN(skipLines)) skipLines = 0;
lines.splice(0, skipLines);
} catch (e) {
skipLines = 0;
}
while (lines.length > 0) {
const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
await new Promise((resolve, reject) => {
dynamoDb.batchWrite(params, (err) => {
if (err) return reject(err);
resolve();
});
});
skipLines += BATCHSIZE;
await fs.writeFile("skip.txt", `${skipLines}`);
}
try {
await fs.unlink("skip.txt");
} catch (e) {
}
}
I am creating a web scraper that scrapes all of the movies coming out for the next year from this site (https://www.imdb.com/movies-coming-soon/) and it loops through an array of links that contain all the movies for each month for the next year, its working but the only problem is that its not returning them in order due to node.js asynchronous behavior, how do i get it to loop through the array and return the data in order?
Ive tried to make a callback function but I don't know where it would go at
const request = require('request')
const cheerio = require('cheerio')
const movieArray = [ '/movies-coming-soon/2019-09/',
'/movies-coming-soon/2019-10/',
'/movies-coming-soon/2019-11/',
'/movies-coming-soon/2019-12/',
'/movies-coming-soon/2020-01/',
'/movies-coming-soon/2020-02/',
'/movies-coming-soon/2020-03/',
'/movies-coming-soon/2020-04/',
'/movies-coming-soon/2020-05/',
'/movies-coming-soon/2020-06/',
'/movies-coming-soon/2020-07/',
'/movies-coming-soon/2020-08/' ]
for (let i = 0; i < movieArray.length; i++) {
request.get('https://www.imdb.com' + movieArray[i] , (err, res, body) => {
if (!err && res.statusCode == 200) {
console.log(res.request.href)
const $ = cheerio.load(body)
//console.log(next)
$('h4').each((i, v) => {
const date = $(v).text()
console.log(date)
})
}
})
}
I'm expecting it to return the data in order instead of it being returned in a order based off how fast the data is returned due to nodes asynchronous behavior
It's a classic async issue in for loop as per explained https://lavrton.com/javascript-loops-how-to-handle-async-await-6252dd3c795/. Below would be the solution:
// const request = require('request')
const request = require('request-promise');
const cheerio = require('cheerio');
const movieArray = [
'/movies-coming-soon/2019-09/',
'/movies-coming-soon/2019-10/',
'/movies-coming-soon/2019-11/',
'/movies-coming-soon/2019-12/',
'/movies-coming-soon/2020-01/',
'/movies-coming-soon/2020-02/',
'/movies-coming-soon/2020-03/',
'/movies-coming-soon/2020-04/',
'/movies-coming-soon/2020-05/',
'/movies-coming-soon/2020-06/',
'/movies-coming-soon/2020-07/',
'/movies-coming-soon/2020-08/',
];
async function processMovieArray(array) {
for (const item of array) {
await getMovie(item);
}
console.log('Done');
}
async function getMovie(item) {
const options = {
method: `GET`,
uri: 'https://www.imdb.com' + item,
};
const response = await request(options);
const $ = cheerio.load(response.body);
$('h4').each((i, v) => {
const date = $(v).text();
console.log(date);
});
}
processMovieArray(movieArray);
The low tech way that deviates the least from your current code is to just use the index of your for loop to populate an array. Since let in the for loop will make a separate variable for i for each iteration of the for loop, we can use that index inside the async callback to reference the desired spot in a results array. Then, you also use a cntr to know when you've finished with all the results:
const request = require('request');
const cheerio = require('cheerio');
if (!Array.prototype.flat) {
Array.prototype.flat = function() {
return this.reduce((acc, val) => acc.concat(val), []);
}
}
const movieArray = [ '/movies-coming-soon/2019-09/',
'/movies-coming-soon/2019-10/',
'/movies-coming-soon/2019-11/',
'/movies-coming-soon/2019-12/',
'/movies-coming-soon/2020-01/',
'/movies-coming-soon/2020-02/',
'/movies-coming-soon/2020-03/',
'/movies-coming-soon/2020-04/',
'/movies-coming-soon/2020-05/',
'/movies-coming-soon/2020-06/',
'/movies-coming-soon/2020-07/',
'/movies-coming-soon/2020-08/' ];
let results = [];
let cntr = 0;
for (let i = 0; i < movieArray.length; i++) {
request.get('https://www.imdb.com' + movieArray[i] , (err, res, body) => {
++cntr;
if (!err && res.statusCode == 200) {
console.log(res.request.href)
const $ = cheerio.load(body)
//console.log(next)
let textArray = [];
$('h4').each((i, v) => {
console.log(date)
textArray.push($(v).text());
});
results[i] = textArray;
}
if (cntr === moveArray.length) {
// all results are done now
let allResults = results.flat();
}
})
}
A bit more elegant way is to switch over to promises and let the promise infrastructure keep everything in order for you:
const rp = require('request-promise');
const cheerio = require('cheerio');
if (!Array.prototype.flat) {
Array.prototype.flat = function() {
return this.reduce((acc, val) => acc.concat(val), []);
}
}
const movieArray = [ '/movies-coming-soon/2019-09/',
'/movies-coming-soon/2019-10/',
'/movies-coming-soon/2019-11/',
'/movies-coming-soon/2019-12/',
'/movies-coming-soon/2020-01/',
'/movies-coming-soon/2020-02/',
'/movies-coming-soon/2020-03/',
'/movies-coming-soon/2020-04/',
'/movies-coming-soon/2020-05/',
'/movies-coming-soon/2020-06/',
'/movies-coming-soon/2020-07/',
'/movies-coming-soon/2020-08/' ];
//
if (!Array.prototype.flat) {
Array.prototype.flat = function() {
return this.reduce((acc, val) => acc.concat(val), []);
}
}
Promise.all(movieArray.map(path => {
return rp('https://www.imdb.com' + path).then(body => {
const $ = cheerio.load(body);
let textArray = [];
$('h4').each((i, v) => {
// console.log($(v).text());
textArray.push($(v).text());
});
return textArray;
}).catch(err => {
// ignore errors on urls that didn't work
// so we can get the rest of the results without aborting
console.log("err");
return undefined;
});
})).then(results => {
// flatten the two level array and remove empty items
let allResults = results.flat().filter(item => !!item);
console.log(allResults);
}).catch(err => {
console.log(err);
});
FYI, I tested the 2nd version in nodejs version 10.16.0 and it works.
I'm trying to collect all values from a mysql table with all the values of the referenced_table_name for each index of the table.
How avoid set a random time out while waiting for a promise
To collect the expected information i need to set a random time out, otherwise my object is undefined...
module.exports = {
getTable: async (req, res) => {
const tablename = req.params.table,
dbName = req.params.dbName;
let jsonResult = {};
getTableValues(dbName, tablename)
.then(tableValues => {
getTableIndexedCol(dbName, tablename)
.then(indexedColumns => {
let indexedArr = {};
for (let index = 0; index < indexedColumns.length; index++) {
const element = indexedColumns[index],
column = element.column_name,
referencedTable = element.referenced_table_name;
let allValuesRefTable = new Array();
getTableValues(dbName, referencedTable)
.then(referencedTableValues => {
for (let i = 0; i < referencedTableValues.length; i++) {
const el = referencedTableValues[i];
allValuesRefTable.push(el.name);
}
})
.catch(err => console.log(err));
/*IF NO TIMEOUT DOESN'T WORK*/
setTimeout(function(){
indexedArr[column] = allValuesRefTable;
}, 100);
}
setTimeout(function(){
jsonResult = {
name: tablename,
rows : tableValues,
rowIndexed : indexedArr
}
res.json(jsonResult);
}, 5000);
})
.catch(err => console.log(err));
})
.catch(err => console.log(err));
}
};
Is there a way to don't use setTimeout? or how can I 'wait' that the promise is resolved?
Here is my function getTableIndexedCol for example:
async function getTableIndexedCol(dbName, tablename) {
const sqlRefTable = SELECT...;
return new Promise (async function(resolve, reject){
try{
[refTable, refTableFields] = await promisePool.query(sqlRefTable)
}
catch(err){
reject(err)
}
setTimeout(function () {
resolve(refTable);
}, 500);
})
If you are already using async/await you can use it all the way and avoid the "Promise Hell" (nested .then calls):
module.exports = {
getTable: async (req, res) => {
try {
const tablename = req.params.table,
dbName = req.params.dbName;
const tableValues = await getTableValues(dbName, tablename);
const indexedColumns = await getTableIndexedCol(dbName, tablename);
let indexedArr = {};
for (let index = 0; index < indexedColumns.length; index++) {
const element = indexedColumns[index],
column = element.column_name,
referencedTable = element.referenced_table_name;
let allValuesRefTable = new Array();
const referencedTableValues = await getTableValues(dbName, referencedTable);
for (let i = 0; i < referencedTableValues.length; i++) {
const el = referencedTableValues[i];
allValuesRefTable.push(el.name);
}
indexedArr[column] = allValuesRefTable;
}
const = jsonResult = {
name: tablename,
rows: tableValues,
rowIndexed: indexedArr
}
res.json(jsonResult);
} catch (err) {
console.log(err);
}
}
};
Node.js database result return late inside the function
const db = req.app.db;
function getFeaturebyID(featureids) {
db.planFeatures.findOne({"_id": featureids }).then(features => {
return features.planFeaturesTitle;
});
}
const planLists ={};
db.planManagement.find({}).toArray((err, planList) => {
// res.end(JSON.stringify(planList));
featurearray = [];
var j =1;
planList.forEach(function(row) {
planLists._id = row._id;
features = row.planFeatures.split(',');
for (let i = 0; i < features.length; i++) {
featurearray[i] = getFeaturebyID(features[i]);
// console.log(getFeaturebyID(features[i]));
}
//row.planFeaturesName[j] = featurearray;
console.log(featurearray);
j++;
});
//console.log(planList);
// res.end(JSON.stringify(req.session));
res.render('stylist/plan_selection', {
top_results: planList,
title: 'Schedule',
config: req.app.config,
session: req.session,
message: common.clearSessionValue(req.session, 'message'),
messageType: common.clearSessionValue(req.session, 'messageType'),
helpers: req.handlebars.helpers,
showFooter: 'showFooter'
});
});
});
return features.planFeaturesTitle; return a value late while calling the function. I try callback but not works
This is due to asynchronous nature of node.js,
First declare your function async like this,
const getFeaturebyID = async (featureids) => {
const features = await db.planFeatures.findOne({"_id": featureids });
return features.planFeaturesTitle;
}
Then use it like this,
planList.forEach(async (row) => {
// your code here
for (let i = 0; i < features.length; i++) {
featurearray[i] = await getFeaturebyID(features[i]);
}
// your code here
});