Node.js split file lines - node.js

I want to write a script that divides the lines read from the file into packages of 25, unfortunately the sample package returns 40 codes. I would like to do so that, for example, he divided me into packages of 25 items. I mean, I have, for example, 60 codes, this creates 2 packages of 25, and one with 10 codes. Unfortunately, I can't handle it.
const fs = require('fs');
fs.readFile('code.txt', function (err, data) {
if (err) throw err;
const array = data.toString().split("\n");
let count = 0;
let items = [];
for (let i in array) {
items.push({
PutRequest: {
Item: {
code: array[i]
}
}
});
let params = {
RequestItems: {
'TABLE_NAME': items
}
};
if (count === 25) {
dynamoDB.batchWrite(params, function (err, data) {
if (err) {
console.log(err);
} else {
count = 0;
items = [];
}
});
}else{
count++;
}
}
});
code.txt content
https://0bin.net/paste/NA8-4hkq#1Ohwt5uUkQqE0YscwnxTX2gxEqlvAUVKp1JRipBCsZg
Any idea what I do wrong?

Your dynamoDB.batchWrite() is asynchronous. Thus its callback is executed only after the loop has completed. So items and count are never reset ...
The easiest would be, if you could switch to an promise based approach like the following
const BATCHSIZE = 25;
const fs = require('fs').promises;
async function batchLoad() {
const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
while (lines.length > 0) {
const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
await new Promise((resolve, reject) => {
dynamoDb.batchWrite(params, (err) => {
if (err) return reject(err);
resolve();
});
});
}
}
A callback based approach could look like this
const BATCHSIZE = 25;
fs.readFile("code.txt", "utf-8", (err, data) => {
const lines = data.split("\n");
function writeBatch() {
if (!lines.length) return;
const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
dynamoDb.batchWrite(params, err => {
if (err) ...
else writeBatch();
});
}
writeBatch();
}
The function writeBatch takes a certain number of lines from your original array and writes them into the database. Only afer the write into the DB was successful, it recursively calls itself and handles the next batch. But be aware, that this approach may exceed the maximum call stack size and throw an error.
You can also make either of this approaches not manipulate the lines array (which may be quite expensive), but just get out the current slice
const BATCHSIZE = 25;
const fs = require('fs').promises;
async function batchLoad() {
const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
let currentIndex = 0;
while (currentIndex < lines.length) {
const items = lines.slice(currentIndex, currentIndex + BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
await new Promise((resolve, reject) => {
dynamoDb.batchWrite(params, (err) => {
if (err) return reject(err);
resolve();
});
});
currentIndex += BATCHSIZE;
}
}
and
const BATCHSIZE = 25;
fs.readFile("code.txt", "utf-8", (err, data) => {
const lines = data.split("\n");
function writeBatch(currentIndex) {
if (currentIndex >= lines.length) return;
const items = lines.slice(currentIndex, currentIndex + BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
dynamoDb.batchWrite(params, err => {
if (err) ...
else writeBatch(currentIndex + BATCHSIZE);
});
}
writeBatch(0);
}
To prevent stumbling into a maximum callstack exception you may also add the next batch to the eventloop and not call it recursively. Ie
dynamoDb.batchWrite(params, err => {
if (err) ...
else setTimeout(()=> { writeBatch(currentIndex + BATCHSIZE);}, 0);
});
This way you won't build up a massive callstack from recursive calls.
To keep track of how many records are already saved to the db you could simply store the current counter in a file. When you restart the process, load that file and check how many lines to skip. Don't forget to remove the file, once all records have been saved ... For example with the first approach:
const BATCHSIZE = 25;
const fs = require('fs').promises;
async function batchLoad() {
const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
const skipLines = 0;
try {
skipLines = +(await fs.readFile("skip.txt", "utf-8"));
if (isNaN(skipLines)) skipLines = 0;
lines.splice(0, skipLines);
} catch (e) {
skipLines = 0;
}
while (lines.length > 0) {
const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
await new Promise((resolve, reject) => {
dynamoDb.batchWrite(params, (err) => {
if (err) return reject(err);
resolve();
});
});
skipLines += BATCHSIZE;
await fs.writeFile("skip.txt", `${skipLines}`);
}
try {
await fs.unlink("skip.txt");
} catch (e) {
}
}

Related

How to use dynamoDB batchWriteItem with nodejs sdk?

I have a use case where I need to perform a batch_write operation on dynamodb. I referred this article which has a good solution for similar use case. I implemented it with few cleanup in my code and it works as expected.
const _ = require('lodash');
// helper methods
async function asyncForEach(array, cb) {
await Promise.all(array.map(async (item) => {
await cb(item, array);
}));
}
function to(promise) {
return promise.then((data) => [null, data])
.catch((err) => [err]);
}
const call = function (params) {
return dynamoDb.batchWriteItem(params).promise();
};
async function batchWrite25(arrayOf25, tableName) {
// 25 is as many as you can write in one time
const itemsArray = [];
_.forEach(arrayOf25, (item) => {
itemsArray.push({
PutRequest: {
Item: item,
},
});
});
const params = {
RequestItems: {
[tableName]: itemsArray,
},
};
await to(call(params));
}
async function batchWrite(itemArray, tableName) {
let mainIndex = 0;
let subIndex = 0;
let arrayOf25 = [];
const arrayLength = itemArray.length;
await asyncForEach(itemArray, async (item) => {
arrayOf25.push(item);
subIndex += 1;
mainIndex += 1;
// 25 is as many as you can write in one time
if (subIndex % 25 === 0 || mainIndex === arrayLength) {
await to(batchWrite25(arrayOf25, tableName));
subIndex = 0; // reset
arrayOf25 = [];
}
});
}
module.exports = {
batchWrite,
};
However, the code looks a bit complicated here with so many callbacks involved. Is there a cleaner way of writing the same thing without using -- call or asyncForEach or to methods ?
Here's one simple way to batch the items:
const BATCH_MAX = 25;
const batchWrite = async (items, table_name) => {
const BATCHES = Math.floor((items.length + BATCH_MAX - 1) / BATCH_MAX);
for (let batch = 0; batch < BATCHES; batch++) {
const itemsArray = [];
for (let ii = 0; ii < BATCH_MAX; ii++) {
const index = batch * BATCH_MAX + ii;
if (index >= items.length) break;
itemsArray.push({
PutRequest: {
Item: items[index],
},
});
}
const params = {
RequestItems: {
[table_name]: itemsArray,
},
};
console.log("Batch", batch, "write", itemsArray.length, "items");
await dynamodb.batchWriteItem(params).promise();
}
};
To make the entire process asynchronous, you can convert this function to return an array of promises and later call Promise.all(promises) on that array. For example:
const batchWrite = (items, table_name) => {
const promises = [];
const BATCHES = Math.floor((items.length + BATCH_MAX - 1) / BATCH_MAX);
for (let batch = 0; batch < BATCHES; batch++) {
// same code as above here ...
promises.push(dynamodb.batchWriteItem(params).promise());
}
return promises;
};
A much cleaner way using lodash that worked for me is listed below. Hope this helps somone.
batchWrite=async ()=> {
const batchSplitArr=_.chunk(this.dynamoPayload,25); //dynamoPayload has the entire payload in the desired format for dynamodb insertion.
await Promise.all(
batchSplitArr.map(async (item) => {
const params = {
RequestItems: {
[this.tableName]: item,
},
};
await this.dynamoDb.batchWriteItem(params).promise();
})
);
};

How to return 2 arrays after saving data to mongodb using node js

I need help with code below. I get an array of items from the client then the goal is to save them in mongodb and return the list classified as 'saved' and 'failed' items. sample of failed items are those that are duplicate on a unique attribute.
I know the code below will not work because of variable scope. how do i get around it? the code below returns an empty array for both savedItems and failedItems. Thanks!
router.post('/addItems', async (req, res, next) => {
let items = req.body;
let result = {
savedItems: [],
failedItems: []
};
function saveData() {
for (i = 0; i < items.length; i++) {
item = items[i];
Model.create({ ...item }, (err, data) => {
if (err) {
result.failedItems.push(item);
} else {
result.savedItems.push(item);
}
});
}
return result;
}
saveData().then(result => {
res.send({
results: result
});
});
});
router.post('/addItems', async (req, res, next) => {
// use try catch when use async
try {
let items = req.body;
let result = {
savedItems: [],
failedItems: []
};
for (let i = 0; i < items.length; i++) {
const item = items[i];
// use the returned promise instead of callback for Model.create
const data = await Model.create({ ...item });
result.savedItems.push(item);
// if also need to handle failed item in result use anathor try catch inside
/*try {
const data = await Model.create({ ...item });
result.savedItems.push(item);
} catch( err ) {
result.failedItems.push(item);
}*/
}
res.send({
results: result
});
} catch( err ) {
// To all the errors unexpected errors + thrown rejected promises
res.send({
error: err
});
}
});
Your saveData method didn't return a promise, try this
function saveData() {
return new Promise(resolve => {
let items = req.body;
let result = {
savedItems: [],
failedItems: []
};
let promises = [];
for (i = 0; i < items.length; i++) {
item = items[i];
let promise = new Promise(resolve => {
Model.create({ ...item }, (err, data) => {
if (err) {
result.failedItems.push(item);
} else {
result.savedItems.push(item);
}
resolve();
});
});
promises.push(promise);
}
Promise.all(promises).then(() => resolve(result));
})
}

How to get code to execute in order in node.js

I am trying to finish my script, but for some reason i don't know, it refuses to execute in the order i put it in.
I've tried placing a 'wait' function between the JoinRequest update function and the following code, but when run, it acts as if the function call and wait function were the other way round, countering the point of the wait().
const Roblox = require('noblox.js')
var fs = require('fs');
var joinRequests = []
...
function wait(ms) {
var d = new Date();
var d2 = null;
do { d2 = new Date(); }
while(d2-d < ms*1000);
};
...
function updateJReqs() {
Roblox.getJoinRequests(4745601).then((array) => {
var i;
var final = [];
for(i = 0; i < array.length; i++) {
final.push(array[i].username);
};
if(final === '') {
final = '-None';
};
joinRequests = final
console.log('Updated join requests.')
});
}
function check() {
setTimeout(() => {
fs.readFile('Request.txt',encoding = 'utf-8', function(err, data) {
if (err) {
check();
} else {
updateJReqs(); //for some reason this function is executed alongside the below, not before it.
// Tried putting wait(x) in here.
console.log('Request received: ' + data)
var solution = joinRequests
console.log('Fuffiling request with ' + solution)
fufillRequest(solution)
fs.unlink('Request.txt', function(err) {
if(err) throw err;
});
check();
}
});
}, 400)
}
check();
The script is supposed to wait until a file is created (accomplished), update the list of join requests (accomplished) and then create a new file with the list of join requests in(not accomplished).
if I understand your code you work with async code, you need to return a promise in updateJReqs and add a condition of leaving from the function because you have an infinite recursion
function updateJReqs() {
return new Promise(resolve => {
Roblox.getJoinRequests(4745601).then((array) => {
var i;
var final = [];
for(i = 0; i < array.length; i++) {
final.push(array[i].username);
};
if(final === '') {
final = '-None';
};
joinRequests = final
console.log('Updated join requests.')
resolve();
});
}
}
async function check() {
setTimeout(() => {
fs.readFile('Request.txt',encoding = 'utf-8', function(err, data) {
if (err) {
await check();
} else {
await updateJReqs();
// Tried putting wait(x) in here.
console.log('Request received: ' + data)
var solution = joinRequests
console.log('Fuffiling request with ' + solution)
fufillRequest(solution)
fs.unlink('Request.txt', function(err) {
if(err) throw err;
});
// you dont have an exit from your function check();
return 'Success';
}
});
}, 400)
}
check().then(res => console.log(res));

Get value out of function in Node.js

I don't know about the correct title for my problem. I just need a value going out of a function, just like return but I think it's not same.
i have code snippet from controller in adonisjs framework:
var nmeadata = "";
jsondata.forEach(element => {
var x = element.nmea
var buff = new Buffer(x, 'base64')
zlib.unzip(buff, (err, res) => {
if(err)
{
//
}
else
{
nmeadata += "greed island"
nmeadata += res.toString()
}
})
});
return view.render('admin.index', {
data: datanmea.toJSON(),
nmea: nmeadata
})
I need the result of unzipped string data that inserted to nmeadata from zlib function then send it to view. But, for this time, even I cannot displaying a simple output like greed island to my view.
thank you.
UPDATE
Still not working after using promises:
class NmeaController {
async index({view})
{
const datanmea = await NmeaModel.all()
const jsondata = datanmea.toJSON()
var promises = [];
var nmeadata = "";
jsondata.forEach(element => {
promises.push(
new Promise(resolve => {
let x = element.nmea
let buff = new Buffer(x, 'base64')
zlib.unzip(buff,
(err, res) => {
if (err) {
//
} else {
nmeadata += "test add text"
// nmeadata += res.toString()
}
//im also try using resolve() and resolve("any text")
resolve(nmeadata);
})
}
)
)
});
await Promise.all(promises);
return view.render('admin.index', {
data: datanmea.toJSON(),
nmea: nmeadata
});
}
UPDATE AUGUST 22 2019
i'm already tried solution from maksbd19 but still not working
class NmeaController {
async index({view})
{
const datanmea = await NmeaModel.all()
const jsondata = datanmea.toJSON()
var promises = [];
var nmeadata = "";
jsondata.forEach(element => {
promises.push(
new Promise(resolve => {
let x = element.nmea
let buff = new Buffer(x, 'base64')
zlib.unzip(buff,
(err, res) => {
if (err) {
// since you are interested in the text only, so no need to reject here
return resolve("");
}
return resolve("greed island")
})
}
)
)
});
const result = await Promise.all(promises); // this will be an array of results of each promises respectively.
nmeadata = result.join(""); // process the array of result
return view.render('admin.index', {
data: datanmea.toJSON(),
nmea: nmeadata
});
}
}
I'd suggest two things-
modify zlib.unzip callback function to resolve properly;
(err, res) => {
if (err) {
// since you are interested in the text only, so no need to reject here
return resolve("");
}
return resolve(res.toString())
}
retrieve the final data from the result of Promise.all
const result = await Promise.all(promises); // this will be an array of results of each promises respectively.
nmeadata = result.join(""); // process the array of result
In this approach every promise will resolve and finally you will get the expected result in array.

is there a way to avoid set a timeout

I'm trying to collect all values from a mysql table with all the values of the referenced_table_name for each index of the table.
How avoid set a random time out while waiting for a promise
To collect the expected information i need to set a random time out, otherwise my object is undefined...
module.exports = {
getTable: async (req, res) => {
const tablename = req.params.table,
dbName = req.params.dbName;
let jsonResult = {};
getTableValues(dbName, tablename)
.then(tableValues => {
getTableIndexedCol(dbName, tablename)
.then(indexedColumns => {
let indexedArr = {};
for (let index = 0; index < indexedColumns.length; index++) {
const element = indexedColumns[index],
column = element.column_name,
referencedTable = element.referenced_table_name;
let allValuesRefTable = new Array();
getTableValues(dbName, referencedTable)
.then(referencedTableValues => {
for (let i = 0; i < referencedTableValues.length; i++) {
const el = referencedTableValues[i];
allValuesRefTable.push(el.name);
}
})
.catch(err => console.log(err));
/*IF NO TIMEOUT DOESN'T WORK*/
setTimeout(function(){
indexedArr[column] = allValuesRefTable;
}, 100);
}
setTimeout(function(){
jsonResult = {
name: tablename,
rows : tableValues,
rowIndexed : indexedArr
}
res.json(jsonResult);
}, 5000);
})
.catch(err => console.log(err));
})
.catch(err => console.log(err));
}
};
Is there a way to don't use setTimeout? or how can I 'wait' that the promise is resolved?
Here is my function getTableIndexedCol for example:
async function getTableIndexedCol(dbName, tablename) {
const sqlRefTable = SELECT...;
return new Promise (async function(resolve, reject){
try{
[refTable, refTableFields] = await promisePool.query(sqlRefTable)
}
catch(err){
reject(err)
}
setTimeout(function () {
resolve(refTable);
}, 500);
})
If you are already using async/await you can use it all the way and avoid the "Promise Hell" (nested .then calls):
module.exports = {
getTable: async (req, res) => {
try {
const tablename = req.params.table,
dbName = req.params.dbName;
const tableValues = await getTableValues(dbName, tablename);
const indexedColumns = await getTableIndexedCol(dbName, tablename);
let indexedArr = {};
for (let index = 0; index < indexedColumns.length; index++) {
const element = indexedColumns[index],
column = element.column_name,
referencedTable = element.referenced_table_name;
let allValuesRefTable = new Array();
const referencedTableValues = await getTableValues(dbName, referencedTable);
for (let i = 0; i < referencedTableValues.length; i++) {
const el = referencedTableValues[i];
allValuesRefTable.push(el.name);
}
indexedArr[column] = allValuesRefTable;
}
const = jsonResult = {
name: tablename,
rows: tableValues,
rowIndexed: indexedArr
}
res.json(jsonResult);
} catch (err) {
console.log(err);
}
}
};

Resources