Nodejs fast-csv and promises - node.js

I feel like things are running out of order here. I want to establish a connection to a mySQL database. Then I want to read in a file. Row by row I want to grab the name and run a query. I would have assumed that my sqlSelectQuery function, which returns a promise, would have waited for the promise to resolve before moving onto the next row. What am I missing here?
const mysql = require('mysql');
const fs = require('fs');
const path = require('path');
const csv = require('fast-csv');
const config = require('./config')
const connection = mysql.createConnection({
user: config.user,
password: config.password,
database: config.database,
host: config.host
});
connection.connect((err) => {
if(err){
console.log('Error connecting to Db');
return;
}
console.log('Connection established');
});
fs.createReadStream(path.resolve(__dirname,'data.csv'))
.pipe(csv.parse({ headers: true }))
.on('error', error => console.error("error", error))
.on('data', row => { // need to get this to block
sqlSelectQuery(row).then(result => console.log("result: ", result))
})
.on('end', rowCount => console.log(`Parsed ${rowCount} rows`));
const sqlSelectQuery = (row) => {
return new Promise((resolve, reject) => {
console.log("inside promise");
const selectQuery = 'SELECT * FROM loans where business_name = ?;';
connection.query(selectQuery, [row.BorrowerName], (err,rows) => {
let result = {};
if(err) reject(err);
if (rows.length === 1){
let res = rows[0];
result = {
business_name: res.business_name,
loan_range: res.loan_range,
loan_amount: row.InitialApprovalAmount,
count: 1
};
resolve(result);
} else {
result = {
business_name: row.BorrowerName,
loan_range: "",
loan_amount: "",
unique: rows.length
};
resolve(result);
}
});
})
}
my console looks like this
inside promise
inside promise //20 times (I have 20 rows)
Parsed 20 rows
Connection established
result: {....}
result: {...}....

I found this answer. I need to add a pause and resume
nodejs async await inside createReadStream
.on('data', async (row) => { // need to get this to block
stream.pause();
await sqlSelectQuery(row).then(result => console.log("result: ", result))
stream.resume();
})
The issue now is that my .on('end') runs before the last row.

You could add each row to a rowsToProcess array, then, once the file data is read, process each row one by one:
const mysql = require('mysql');
const fs = require('fs');
const path = require('path');
const csv = require('fast-csv');
const config = require('./config')
const connection = mysql.createConnection({
user: config.user,
password: config.password,
database: config.database,
host: config.host
});
connection.connect((err) => {
if (err) {
console.error('Error connecting to Db:', err);
return;
}
console.log('Connection established');
const rowsToProcess = [];
fs.createReadStream(path.resolve(__dirname,'data.csv'))
.pipe(csv.parse({ headers: true }))
.on('error', error => console.error("error", error))
.on('data', row => {
// Add row to process.
rowsToProcess.push(row);
})
.on('end', async rowCount => {
await processRows(rowsToProcess);
console.log("processRows: complete.")
})
});
async function processRows(rowsToProcess) {
console.log(`Read ${rowsToProcess.length} row(s) from csv file...`)
for (let i = 0; i < rowsToProcess.length; i++) {
console.log(`processing row ${i+1} of ${rowsToProcess.length}...`);
let result = await sqlSelectQuery(rowsToProcess[i])
console.log(`row ${i+1} result:`, result);
}
}
const sqlSelectQuery = (row) => {
return new Promise((resolve, reject) => {
console.log("Processing row:", row);
const selectQuery = 'SELECT * FROM loans where business_name = ?;';
connection.query(selectQuery, [row.BorrowerName], (err,rows) => {
let result = {};
if(err) reject(err);
if (rows.length === 1){
let res = rows[0];
result = {
business_name: res.business_name,
loan_range: res.loan_range,
loan_amount: row.InitialApprovalAmount,
count: 1
};
resolve(result);
} else {
result = {
business_name: row.BorrowerName,
loan_range: "",
loan_amount: "",
unique: rows.length
};
resolve(result);
}
});
})
}

Related

how to work with response object in nodejs stream, exceljs and worker thread

I am using worker thread and stream at same time in node JS project. At initial I was not able to pass res object through main process to worker thread. I saw many stackoverflow question and solution and wrote a solution which works great. I created a Readable stream in main thread and writable stream in worker thread. while doing this, I have done a huge calculation from more than 10 table and export data which takes nearly 1 minutes for processing.
code:
router.get("/downloadAll", (req, res) => {
new Promise((resolve, reject) => {
const promise = [];
promise.push(Dashboard.DUser());
promise.push(Dashboard.DDUser());
promise.push(Dashboard.DDLUser());
promise.push(Dashboard.Din());
promise.push(Dashboard.Str());
promise.push(Dashboard.R());
promise.push(Dashboard.Q());
Promise.all(promise).catch(err => err)
.then(results => {
const worker = new Worker(`${process.cwd()}/src/route/modules/dashboard/worker.js`, {
workerData: { results }
});
const fileHeaders = [
{
name: "Content-Type",
value: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
}
];
res.setHeader("Content-Disposition", `attachment; filename="Stream.xlsx`);
fileHeaders.forEach(header => res.setHeader(header.name, header.value));
const readStream = new Readable({
read() {}
});
readStream.pipe(res);
worker.on("message", message => {
readStream.push(message);
});
worker.on("exit", code => {
console.log("exit", code);
resolve(true);
//if (code !== 0) reject(new Error(`stopped with ${code} exit code`));
});
});
})
.then(() => res.end())
.catch(err => console.log(err));
});
WORKER THREAD:
const { workerData, parentPort } = require("worker_threads");
const { Writable } = require("stream");
const Excel = require("exceljs");
const writableStream = new Writable();
// writableStream.on("message", () => {});
writableStream._write = (chunk, encoding, next) => {
parentPort.postMessage(chunk);
next();
};
const createWorkbook = () => {
const workbook = new Excel.stream.xlsx.WorkbookWriter({
stream: writableStream, // stream to server response
useStyles: true // not sure about this one, check with it turned off.
});
workbook.title = "Serious";
workbook.creator = "SS";
workbook.created = new Date();
return workbook;
};
const createSheet = workbook => {
workerData.results.forEach((result, index) => {
const worksheet = workbook.addWorksheet(result.title, {
properties: { outlineLevelCol: 1 }
});
worksheet.columns = Object.keys(result.data[0]).map(item => {
return { header: item, key: item };
});
result.data.forEach(row => worksheet.addRow(row).commit);
});
};
const workbook = createWorkbook();
createSheet(workbook);
workbook.commit();
The above code works fine and is fast for small calculation. when I have huge computation it is showing processing for 1 minutes and finish processing and download the xls file. so i updated the code to:
router.get("/downloadAll", (req, res) => {
const worker = new Worker(`${process.cwd()}/src/worker/worker.js`);
const fileHeaders = [
{
name: "Content-Type",
value: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
}
];
const today = new Date();
res.setHeader(
"Content-Disposition",
`attachment; filename=Q-${today.getFullYear()}${String(today.getMonth() + 1).padStart(2, "0")}${String(
today.getDate()
).padStart(2, "0")}.xlsx`
);
fileHeaders.forEach(header => res.setHeader(header.name, header.value));
const readStream = new Readable({
read() {}
});
readStream.pipe(res);
worker.on("message", message => {
readStream.push(message);
});
worker.on("exit", code => {
console.log("exit", code);
res.end();
//if (code !== 0) reject(new Error(`stopped with ${code} exit code`));
});
});
and worker thread code:
const { workerData, parentPort } = require("worker_threads");
const { Writable } = require("stream");
const Excel = require("exceljs");
const { resolve } = require("path");
const db = require(`${process.cwd()}/src/modules/db.module`);
const Dashboard = require(`${process.cwd()}/src/route/modules/dashboard.model`);
const promise = [];
promise.push(Dashboard.DUser());
promise.push(Dashboard.DDUser());
promise.push(Dashboard.DDLUser());
promise.push(Dashboard.Din());
promise.push(Dashboard.Str());
promise.push(Dashboard.R());
promise.push(Dashboard.Q());
Promise.all(promise).catch(err => err)
.then(results => { const writableStream = new Writable();
// writableStream.on("message", () => {});
writableStream._write = (chunk, encoding, next) => {
console.log(chunk.toString());
parentPort.postMessage(chunk);
next();
};
const createWorkbook = () => {
const workbook = new Excel.stream.xlsx.WorkbookWriter({
stream: writableStream, // stream to server response
useStyles: true // not sure about this one, check with it turned off.
});
workbook.creator = "ss";
workbook.created = new Date();
return workbook;
};
const createSheet = workbook => {
results.forEach((result, index) => {
// console.log(result);
const worksheet = workbook.addWorksheet(result.title, {
properties: { outlineLevelCol: 1 }
});
worksheet.columns = Object.keys(result.data[0]).map(item => {
return { header: item, key: item };
});
result.data.forEach(row => worksheet.addRow(row).commit);
});
};
The above code doesnot work correctly. I can get the data from callback from promise but when its downloading its shows 300kb , 200b,1byte and ends to 0 but it does not download.
if I try to insert the promise inside createsheet then i am getting error:
Error [ERR_UNHANDLED_ERROR]: Unhandled error. ({ message: 'queue closed', code: 'QUEUECLOSED', data: undefined })
code:
const createSheet = workbook => {
let promise = [];
/**
* get count of all the user list
*/
promise.push(Dashboard.DDPro());
Promise.all(promise)
.then(results => {
results.forEach((result, index) => {
console.log(result);
const worksheet = workbook.addWorksheet(result.title, {
properties: { outlineLevelCol: 1 }
});
worksheet.columns = Object.keys(result.data[0]).map(item => {
return { header: item, key: item };
});
result.data.forEach(row => worksheet.addRow(row).commit);
});
})
.catch(err => console.log(err));
};
can any body helps me solve the problem.

Error after some insertion MongoNetworkError: connection 812 to 127.0.0.1:27017 closed in Node js

I want to insert around 60 Million data in Mongo DB using a Node js Script so i have create a connection and reuse it like that :
connection.js
const MongoClient = require("mongodb").MongoClient,
{ mongourl,dbName } = require('../../env');
let db;
let mongoobject;
const option = {
useUnifiedTopology: true,
useNewUrlParser: true,
socketTimeoutMS: 300000,
poolSize:1000,
keepAlive: 300000,
connectTimeoutMS: 300000,
};
const connectDb = (callback) => {
if (db) return callback()
MongoClient.connect( mongourl, option,
(err, database) => {
if (err) return console.log(err)
db = database.db(dbName);
mongoobject = database;
callback()
}
)
}
const getDb = () => {
return db;
}
const connectclient = () => {
return mongoobject;
}
module.exports = {
connectDb,
getDb,
connectclient
}
and my insertion function is
function saveData(){
return new Promise(function (resolve, reject) {
try {
fs.access(dirPath, fs.F_OK, (err) => {
if (err) {
console.error(err)
return
}
const startTime = new Date();
let numlines = 0;
const fileReference = {}
fs.readdir(dirPath, function (err, filenames) {
if (err) {
console.error("Directory Not Found")
return;
}
filenames.forEach(function (filename) {
const readInterface = new readline(dirPath + filename);
let promises = [];
fileReference[filename] = 0
readInterface.on('line', function (line) {
fileReference[filename]++
let countcol = line.split('\t').length,
productname = line.split("\t"),
productsku = line.split("\t"),
productprice = line.split("\t");
let product_sku, product_name, product_price;
if (countcol == 3) {
product_sku = productname.splice(0, 2).join("-").toLowerCase();
product_name = productsku.splice(0, 2).join(" ");
product_price = productprice.splice(-1, 1);
} else if (countcol == 4) {
let product_sku_ini = productsku.splice(0, 2).join("").toLowerCase(),
product_sku_end = productsku.splice(0, 1).join(" ").toLowerCase(),
product_name_ini = productname.splice(0, 2).join(""),
product_name_end = productname.splice(0, 1).join(" ");
product_price = productprice.splice(-1, 1);
product_sku = product_sku_ini + "-" + product_sku_end
product_name = product_name_ini + " " + product_name_end
delete product_sku_ini, product_sku_end, product_name_ini, product_name_end,product_sku,product_name,product_price;
}
console.info('row start processing ==>>', filename, product_sku, line);
delete countcol, productname, productsku, productprice;
if (numlines >= 80000) {
readInterface.pause();
// console.log('promises:', promises)
Promise.all(promises)
.then(response => {
numlines = 0;
promises = [];
localStorage.setItem(filename, fileReference[filename]);
console.info(`filename Batch Resolved 1 ========>> ${filename}`, localStorage.getItem(filename))
console.log("====================================================== END 1============================================")
readInterface.resume()
// showHeapUses()
// setTimeout(() => process.exit(), 500)
// console.log('resume 1 time:', (new Date().getTime()) - startTime.getTime())
})
.catch(error => {
console.info(`Error in executing`, error)
numlines = 0;
readInterface.resume()
// console.log('resume 2 time:', (new Date()) - startTime)
})
}
console.log("num line", numlines)
numlines++
if(product_sku && product_name && product_price) {
const memoryUsedMb = process.memoryUsage().heapUsed / 1024 / 1024
console.info('the program used', memoryUsedMb, 'MB')
async.waterfall([
function (callback) {
const checkskuexists = async () => {
let checksamepro = { sku:product_sku };
let check_doc_exist = db.collection(collectionName).findOne(checksamepro);
return check_doc_exist;
}
checkskuexists().then(function(result) {
if(result === null){
callback(true, 'PRODUCT_NOT_FOUND');
}else{
callback(null, result.sku);
}
});
},
function (sku, callback) {
db.collection(collectionName).updateOne({sku:sku}, {$set:{price:product_price}});
resolve();
},
],function (err, result){
if (err) {
if (err && result == 'PRODUCT_NOT_FOUND') {
prodetails = {name:product_name, sku:product_sku, price:product_price, status:'active'}
db.collection(collectionName).insertOne(prodetails, function(err, res) {
if (err) throw err;
client.close();
});
}
resolve();
}
});
delete product_sku, product_name, product_price;
}else {
console.log('product is undefined -- so skiped', line);
delete product_sku, product_name, product_price;
}
});
readInterface.on('error', function (error) {
delete readInterface, fileReference, promises;
console.error("Error in reading file: ", error);
});
readInterface.on('end', function () {
// printPerformance(startTime);
localStorage.removeItem(filename);
Promise.all(promises)
.then(response => {
console.info(`filename Batch Resolved 2 ========>> ${filename} -- Completed`)
console.log("====================================================== END 2============================================")
})
.catch(error => {
console.info(`Error in executing`, error)
})
delete readInterface, fileReference, promises;
});
});
});
});
} catch (error) {
reject("ERROR GOES HERE ", error)
}
});
}
The error I am getting is:
MongoNetworkError: connection 812 to 127.0.0.1:27017 closed
at /var/www/html/reg-dealers-mongodb-script/node_modules/mongodb/lib/cmap/connection.js:68:15
at Map.forEach (<anonymous>)
at Socket.<anonymous> (/var/www/html/reg-dealers-mongodb-script/node_modules/mongodb/lib/cmap/connection.js:67:20)
at Socket.emit (events.js:314:20)
at Socket.EventEmitter.emit (domain.js:483:12)
at TCP.<anonymous> (net.js:675:12)
This comes after some insertion like 10k or 20k and sometimes some 100k just connection 812 that no. is different and rest error is same so any idea why this is happening and how to solve the issue.
Your insertion function is too big to follow. But from the error, it is clear that your insertion function is creating a new mongo connection from the pool.
Generally, when a single connection is used to work for a blocking operation other available connections from pool are used to handle the incoming requests that need to use the db. As you have defined 1000 as pool size that's why you are seeing 812 connection closed.
It is not a wise idea to insert 60 Million data at once instead divide that in smaller part organize your DB architecture and follow some recommended way to save them(like collection max size, read/write ops, indexing etc). When you need to save multiple documents, you should use the below mongo function:
db.collection.insertMany(
[ <document 1> , <document 2>, ... ],
{
writeConcern: <document>,
ordered: <boolean>
}
)
For more details check this.

Bulk update to Postgres with node js performance issue

I'm facing performance issue while trying to do bulk update in PostgresDB. It's taking more than 180 seconds to update around 23000 records. PFB the code. I'm using pg-promise library. Is there anything I could do to improve the performance?
const pgp = require('pg-promise')();
const postgresDBConfig = {
host: Config.postgresDBHost,
port: Config.postgresDBPort,
database: Constants.postgresDBName,
user: Config.postgresDBUser,
password: 'pswd'
};
export async function getTransactionDetails(): Promise<any> {
return new Promise<any>(async function (resolve, reject) {
try {
let db = pgp(postgresDBConfig);
db.connect();
let query = "SELECT * FROM table_name";
db.any(query)
.then(data => {
console.log("Executed successfully::");
resolve(data);
})
.catch(error => {
console.log('ERROR:', error);
})
} catch (error) {
log.error("Error::" + error);
throw error;
}
});
}
export async function updateStatus(result: any, status: string) {
try {
let db = pgp(postgresDBConfig);
//db.connect();
let updateData = [];
_.forEach(result, function (row) {
let updateInfo = {};
updateInfo["sessionid"] = row.sessionid;
updateInfo["status"] = status;
updateData.push(updateInfo);
});
console.log("updateData::" + updateData.length);
const tableName = new pgp.helpers.TableName('table_name', 'schema_name');
let columnset = new pgp.helpers.ColumnSet(['?sessionid', 'status'], { table: tableName });
let update = pgp.helpers.update(updateData, columnset);
db.none(update).then(() => {
console.log("Updated successfully");
})
.catch(error => {
console.log("Error updating the status" + error);
});
}
catch (error) {
log.error("Error in function updateStatus::" + error);
throw error;
}
}
The code exhibits problems all over the place
You should initialize the database object only once
You should not use db.connect() at all, which you also use incorrectly for the async code
You again incorrectly use async block, skipping await, so it doesn't execute correctly.
You do not append any UPDATE logic clause, so it is updating everything all over again, unconditionally, which may be resulting in a delayed mess that you're in.
Here's an improved example, though it may need some more work from your side...
const pgp = require('pg-promise')();
const postgresDBConfig = {
host: Config.postgresDBHost,
port: Config.postgresDBPort,
database: Constants.postgresDBName,
user: Config.postgresDBUser,
password: 'pswd'
};
const db = pgp(postgresDBConfig);
const tableName = new pgp.helpers.TableName('table_name', 'schema_name');
const columnSet = new pgp.helpers.ColumnSet(['?sessionid', 'status'], {table: tableName});
export async function getTransactionDetails(): Promise<any> {
try {
const res = await db.any('SELECT * FROM table_name');
console.log('Executed successfully::');
return res;
} catch (error) {
console.log('ERROR:', error);
throw error;
}
}
export async function updateStatus(result: any, status: string) {
try {
let updateData = [];
_.forEach(result, row => {
let updateInfo = {};
updateInfo["sessionid"] = row.sessionid;
updateInfo["status"] = status;
updateData.push(updateInfo);
});
console.log('updateData::', updateData.length);
const update = pgp.helpers.update(updateData, columnSet) +
' WHERE v.sessionid = t.sessionid';
await db.none(update);
console.log('Updated successfully');
}
catch (error) {
console.log('Error in function updateStatus:', error);
throw error;
}
}

how to stop process nodejs within promises

I've created nodejs to trigger(with cronjobs) firebase realtime database as follow:
var db = admin.database();
var ref = db.ref('myusers');
var promises = [];
function updateUnlocked(isLocked, locked, msisdn) {
return new Promise(function (resolve, reject) {
if (isLocked === 1) {
var startDate = moment(locked);
var endDate = moment();
var result = endDate.diff(startDate, 'minutes');
if (result > 5) {
var ref = db.ref('myusers/' + msisdn);
ref.update({isLocked: 2});
}
}
resolve('done');
});
}
ref.once('value', function(snapshot) {
snapshot.forEach(childSnapshot => {
promises.push(updateUnlocked(childSnapshot.val().isLocked, childSnapshot.val().locked, childSnapshot.key));
});
});
Promise.all(promises).then(function(data) {
console.log(data);
}).catch(function(err) {
console.log('error');
});
Please let me know where can I add process.exit(). Thanks.
You must wait for the "once" callback to get executed. Else the promise array is empty and the process could exit immediately.
var db = admin.database();
var ref = db.ref('myusers');
function updateUnlocked(isLocked, locked, msisdn) {
...
}
ref.once('value', function(snapshot) {
const promises = snapshot.map(childSnapshot => {
return updateUnlocked(childSnapshot.val().isLocked, childSnapshot.val().locked, childSnapshot.key);
})
Promise.all(promises).then(() => {
console.log('done')
process.exit(0)
}).catch(err => {
console.log('error', err)
process.exit(1)
})
});
Demonstrating the control flow.
setTimeout(() => {
const x = [1, 2, 3]
const promises = x.map(i => {
return new Promise(resolve => resolve(i))
})
Promise.all(promises).then(() => {
console.log('done. process.exit(0) here')
})
}, 200)
If you want to exit on successful completion then refer below code:
Promise.all(promises).then(function(data) {
console.log(data);
process.exit(0);
}).catch(function(err) {
console.log('error');
});
If you want to exit on error as well then add process.exit(1) in catch block.

Chaining Promises In Loop

I am struggling to loop through the array periods whilst querying the DB and inserting data. The problem I am facing can be seen on the 5th from last line. The final db.queryPromise does not get invoked.
Please see comment 5 lines from the bottom for where the problem is.
// db.js
const mysql = require('mysql');
const connection = mysql.createConnection({
host : 'localhost',
user : ****,
password : ****,
database : ****,
});
module.exports = connection;
module.exports.queryPromise = function (args) {
return new Promise((resolve, reject) => {
connection.query(args, (err, rows, fields) => {
if (err) return reject(err);
resolve(rows);
});
});
};
module.exports.connectPromise = new Promise((resolve, reject) => {
connection.connect(err => {
if (err) reject(err);
resolve();
});
});
// app.js
const db = require('../config/db');
const periods = ['1h','12h','24h','1w','1m','3m','1y','all'];
const sqlCarIds = `SELECT id FROM car_models ORDER BY id DESC LIMIT 200;`;
return db.queryPromise(sqlCarIds)
.then((rows) => {
const car_ids = [];
for (let i = rows.length - 1; i >= 0; i--) {
car_ids.push(rows[i].car_id);
};
for (let i = periods.length - 1; i >= 0; i--) {
const sqlSnapshot = `SELECT price FROM car_models;`;
db.queryPromise(sqlSnapshot)
.then(([row]) => {
if (!row) {
throw new Error('API call found nothin');
}
const highPrice = row.high;
const sqlInsert = `INSERT into price_cache (high) VALUES (` + highPrice` + )`;`
console.log(sqlInsert); // logs correctly formed query
db.queryPromise(sqlInsert)
.then(() => {
console.log('this should fire'); // doesn't fire
});
});
}
});
The SQL syntax for the sqlInsert is invalid. You will need to write it like following example. You need to use the ${expression} literals to add the value of an expression into a "Template String". Your promise doesn't get resolved because there is an error which rejects it.
const sqlInsert = `INSERT into price_cache (high) VALUES (${highPrice})`;

Resources