Nodejs Streams - Help find my memory leak - node.js

So I have a process that selects from a table. I partition my select programmatically into 20 sub-selects. I then go through each on of those select and stream its data to an indexing client (solr). Every select memory jumps up and holds until I get an OOM.
I logged when each query went off and can be seen in in the following charts:
These correlate with each jump in the this memory graph:
14 of 20 queries ran before I oomed.
I see this behavior with code that is similar but with a delta that runs every 15 mins. Every delta holds some sort of memory until it eventually causes the server to crash with OOM (which recovers)
I have tried to track down issues with the delta past but gave up and just created a way to gracefully restart. What am I missing here?
Here is my entire code chain that makes this work... I know its a lot to look through but I figured as much detail as possible would help.
Library Stack:
"node": "~11.10.1"
"knex": "^0.20.9",
"oracledb": "^4.0.0"
"camelize2": "^1.0.0"
Knex - DB connection factory
'use strict'
const objection = require('objection')
const knex = require('knex')
module.exports = function ObjectionFactory(log) {
class MyObjection extends objection.Model {
constructor() {
super()
}
static get tableName() {
return ''
}
}
MyObjection.pickJsonSchemaProperties = true
log.info('Connecting to Oracle Pluggable...', {
host: 'myHost',
username: 'myUser',
database: 'myDatabase"
})
const knexInstance = knex({
client: 'oracledb',
connection: 'connectionInfo',
pool: {
min: 0,
max: 10
},
acquireConnectionTimeout: 10000
})
process.once('SIGINT', () => {
log.info('Disconnecting from Oracle Pluggable.')
knexInstance.destroy()
.then(() => process.exit(0))
.catch(() => process.exit(1))
})
// Shut down cleanly for nodemon
process.once('SIGUSR2', () => {
log.info('Disconnecting from Oracle Pluggable')
knexInstance.destroy()
.then(() => process.kill(process.pid, 'SIGUSR2'))
.catch(() => process.kill(process.pid, 'SIGUSR2'))
})
const knexBoundClass = MyObjection.bindKnex(knexInstance)
knexBoundClass.tag = 'Oracle Connection'
return knexBoundClass
}
My Select Stream Code:
module.exports = function oracleStream(log, MyObjection) {
const knex = MyObjection.knex()
const fetchArraySize = 10000
const outFormat = oracledb.OBJECT
return {
selectStream
}
async function selectStream(sql, bindings = [], fetchSize = fetchArraySize) {
let connection = await knex.client.acquireConnection()
log.info(`Fetch size is set to ${fetchSize}`)
let select = connection.queryStream(sql, bindings, {
fetchArraySize: fetchSize,
outFormat: outFormat
})
select.on('error', (err) => {
log.error('Oracle Error Event', err)
knex.client.releaseConnection(connection)
})
select.on('end', () => {
log.info('Destroying the Stream')
select.destroy()
})
select.on('close', () => {
log.info('Oracle Close Event')
knex.client.releaseConnection(connection)
select = null
connection = null
})
return select
}
}
My index/stream pipeline code
async function indexJob() {
const reindexStartTime = new moment().local()
let rowCount = 0
log.info('Reindex Started at', reindexStartTime.format())
let queryNumber = 1
const partitionedQueries = ['Select * from table where 1=1', 'Select * from table where 2=2', 'Select * from table where 3=3'] //There would be 20 queries in this array
let partitionedQueriesLength = partitionedQueries.length
while (partitionedQueries.length > 0) {
let query = partitionedQueries.pop()
log.info('RUNNING Query', {
queryNumber: `${queryNumber++} of ${partitionedQueriesLength}`,
query: query
})
let databaseStream = await oracleStream.selectStream(query, [], 10000) //10k represents the oracle fetch size
databaseStream.on('data', () => {
rowCount++
})
let logEveryFiveSec = setInterval(() => {
log.info('Status: ', getReindexInfo(reindexStartTime, rowCount))
}, 5000)
try {
let pipeline = util.promisify(stream.pipeline)
await pipeline(
databaseStream,
camelizeAndStringify(),
streamReindex(core)
)
} catch (err) {
databaseStream.destroy(err)
throw new JobFailedError(err)
} finally {
databaseStream.destroy()
clearInterval(logEveryFiveSec)
}
}
}
function camelizeAndStringify() {
let first = true
const serialize = new Transform({
objectMode: true,
highWaterMark: 1000,
transform(chunk, encoding, callback) {
if (first) {
this.push('[' + JSON.stringify(camelize(chunk)))
first = false
} else {
this.push(',' + JSON.stringify(camelize(chunk)))
}
callback()
chunk = null
},
flush(callback) {
this.push(']')
callback()
}
})
return serialize
}
function streamReindex(core) {
const updateUrl = baseUrl + core + '/update'
const options = {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
'auth': `${user.username}:${user.password}`,
}
let postStream = https.request(updateUrl, options, (res) => {
let response = {
status: {
code: res.statusCode,
message: res.statusMessage
},
headers: res.headers,
}
if (res.statusCode !== 200) {
postStream.destroy(new Error(JSON.stringify(response)))
}
})
postStream.on('error', (err)=>{
throw new Error(err)
})
postStream.on('socket', (socket) => {
socket.setKeepAlive(true, 110000)
})
return postStream
}
EDIT 1:
I tried removing knex out of the equation by doing a singular connection to my db with the oracle library. Unfortunately I still see the same behavior.
This is how I changed my select to not use knex
async function selectStream(sql, bindings = [], fetchSize = fetchArraySize) {
const connectionInfo = {
user: info.user,
password: info.password,
connectString: info.host +'/'+info.database
}
const connection = await oracledb.getConnection(connectionInfo)
log.info('Connection was successful!')
log.info(`Fetch size is set to ${fetchSize}`)
let select = connection.queryStream(sql, bindings, {
fetchArraySize: fetchSize,
outFormat: outFormat
})
select.on('error', async (err) => {
log.error('Oracle Error Event', err)
await connection.close()
})
select.on('end', () => {
log.info('Destroying the Stream')
select.destroy()
})
select.on('close', async () => {
log.info('Oracle Close Event')
await connection.close()
})
return select
}

Related

how to work with response object in nodejs stream, exceljs and worker thread

I am using worker thread and stream at same time in node JS project. At initial I was not able to pass res object through main process to worker thread. I saw many stackoverflow question and solution and wrote a solution which works great. I created a Readable stream in main thread and writable stream in worker thread. while doing this, I have done a huge calculation from more than 10 table and export data which takes nearly 1 minutes for processing.
code:
router.get("/downloadAll", (req, res) => {
new Promise((resolve, reject) => {
const promise = [];
promise.push(Dashboard.DUser());
promise.push(Dashboard.DDUser());
promise.push(Dashboard.DDLUser());
promise.push(Dashboard.Din());
promise.push(Dashboard.Str());
promise.push(Dashboard.R());
promise.push(Dashboard.Q());
Promise.all(promise).catch(err => err)
.then(results => {
const worker = new Worker(`${process.cwd()}/src/route/modules/dashboard/worker.js`, {
workerData: { results }
});
const fileHeaders = [
{
name: "Content-Type",
value: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
}
];
res.setHeader("Content-Disposition", `attachment; filename="Stream.xlsx`);
fileHeaders.forEach(header => res.setHeader(header.name, header.value));
const readStream = new Readable({
read() {}
});
readStream.pipe(res);
worker.on("message", message => {
readStream.push(message);
});
worker.on("exit", code => {
console.log("exit", code);
resolve(true);
//if (code !== 0) reject(new Error(`stopped with ${code} exit code`));
});
});
})
.then(() => res.end())
.catch(err => console.log(err));
});
WORKER THREAD:
const { workerData, parentPort } = require("worker_threads");
const { Writable } = require("stream");
const Excel = require("exceljs");
const writableStream = new Writable();
// writableStream.on("message", () => {});
writableStream._write = (chunk, encoding, next) => {
parentPort.postMessage(chunk);
next();
};
const createWorkbook = () => {
const workbook = new Excel.stream.xlsx.WorkbookWriter({
stream: writableStream, // stream to server response
useStyles: true // not sure about this one, check with it turned off.
});
workbook.title = "Serious";
workbook.creator = "SS";
workbook.created = new Date();
return workbook;
};
const createSheet = workbook => {
workerData.results.forEach((result, index) => {
const worksheet = workbook.addWorksheet(result.title, {
properties: { outlineLevelCol: 1 }
});
worksheet.columns = Object.keys(result.data[0]).map(item => {
return { header: item, key: item };
});
result.data.forEach(row => worksheet.addRow(row).commit);
});
};
const workbook = createWorkbook();
createSheet(workbook);
workbook.commit();
The above code works fine and is fast for small calculation. when I have huge computation it is showing processing for 1 minutes and finish processing and download the xls file. so i updated the code to:
router.get("/downloadAll", (req, res) => {
const worker = new Worker(`${process.cwd()}/src/worker/worker.js`);
const fileHeaders = [
{
name: "Content-Type",
value: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
}
];
const today = new Date();
res.setHeader(
"Content-Disposition",
`attachment; filename=Q-${today.getFullYear()}${String(today.getMonth() + 1).padStart(2, "0")}${String(
today.getDate()
).padStart(2, "0")}.xlsx`
);
fileHeaders.forEach(header => res.setHeader(header.name, header.value));
const readStream = new Readable({
read() {}
});
readStream.pipe(res);
worker.on("message", message => {
readStream.push(message);
});
worker.on("exit", code => {
console.log("exit", code);
res.end();
//if (code !== 0) reject(new Error(`stopped with ${code} exit code`));
});
});
and worker thread code:
const { workerData, parentPort } = require("worker_threads");
const { Writable } = require("stream");
const Excel = require("exceljs");
const { resolve } = require("path");
const db = require(`${process.cwd()}/src/modules/db.module`);
const Dashboard = require(`${process.cwd()}/src/route/modules/dashboard.model`);
const promise = [];
promise.push(Dashboard.DUser());
promise.push(Dashboard.DDUser());
promise.push(Dashboard.DDLUser());
promise.push(Dashboard.Din());
promise.push(Dashboard.Str());
promise.push(Dashboard.R());
promise.push(Dashboard.Q());
Promise.all(promise).catch(err => err)
.then(results => { const writableStream = new Writable();
// writableStream.on("message", () => {});
writableStream._write = (chunk, encoding, next) => {
console.log(chunk.toString());
parentPort.postMessage(chunk);
next();
};
const createWorkbook = () => {
const workbook = new Excel.stream.xlsx.WorkbookWriter({
stream: writableStream, // stream to server response
useStyles: true // not sure about this one, check with it turned off.
});
workbook.creator = "ss";
workbook.created = new Date();
return workbook;
};
const createSheet = workbook => {
results.forEach((result, index) => {
// console.log(result);
const worksheet = workbook.addWorksheet(result.title, {
properties: { outlineLevelCol: 1 }
});
worksheet.columns = Object.keys(result.data[0]).map(item => {
return { header: item, key: item };
});
result.data.forEach(row => worksheet.addRow(row).commit);
});
};
The above code doesnot work correctly. I can get the data from callback from promise but when its downloading its shows 300kb , 200b,1byte and ends to 0 but it does not download.
if I try to insert the promise inside createsheet then i am getting error:
Error [ERR_UNHANDLED_ERROR]: Unhandled error. ({ message: 'queue closed', code: 'QUEUECLOSED', data: undefined })
code:
const createSheet = workbook => {
let promise = [];
/**
* get count of all the user list
*/
promise.push(Dashboard.DDPro());
Promise.all(promise)
.then(results => {
results.forEach((result, index) => {
console.log(result);
const worksheet = workbook.addWorksheet(result.title, {
properties: { outlineLevelCol: 1 }
});
worksheet.columns = Object.keys(result.data[0]).map(item => {
return { header: item, key: item };
});
result.data.forEach(row => worksheet.addRow(row).commit);
});
})
.catch(err => console.log(err));
};
can any body helps me solve the problem.

one of my friend is trying to automate a process in which the bot will post instagram stories as a video from a specific folder

below is the working code in which it can post images but is there any way i can also share videos as instagram story?
the error i get when i try to post video instead of image are:**
error image
PS D:\Softwares\programming\Insta Bot\story> node index.js
18:45:11 - info: Dry Run Activated
18:45:11 - info: Post() called! ======================
18:45:11 - debug: 1 files found in ./images/
18:45:11 - warn: Record file not found, saying yes to D:\Softwares\programming\Insta Bot\story\images\meme.mp4
18:45:11 - debug: Read File Success
18:45:11 - error: undefined
(MAIN CODE)
index.js
const logger = require("./logger.js")
const { random, sleep } = require('./utils')
require('dotenv').config();
const { IgApiClient, IgLoginTwoFactorRequiredError } = require("instagram-private-api");
const ig = new IgApiClient();
const Bluebird = require('bluebird');
const inquirer = require('inquirer');
const { CronJob } = require('cron');
const path = require("path");
const fs = require("fs");
const fsp = fs.promises;
const sharp = require("sharp");
//==================================================================================
const statePath = "./etc/state.conf";
const recordPath = "./etc/usedfiles.jsonl";
const imgFolderPath = "./images/";
const dryrun = true;
const runOnStart = true;
//==================================================================================
(async () => { // FOR AWAIT
// LOGIN TO INSTAGRAM
if (!dryrun) {
await login();
logger.info("Log In Successful");
} else {
logger.info("Dry Run Activated");
}
// SCHEDULER
// logger.silly("I'm a schedule, and I'm running!! :)");
const job = new CronJob('38 43 * * * *', post, null, true); //https://crontab.guru/
if (!runOnStart) logger.info(`Next few posts scheduled for: \n${job.nextDates(3).join("\n")}\n`);
else post();
// MAIN POST COMMAND
async function post() {
logger.info("Post() called! ======================");
let postPromise = fsp.readdir(imgFolderPath)
.then(filenames => {
if (filenames.length < 1) throw new Error(`Folder ${imgFolderPath} is empty...`)
logger.debug(`${filenames.length} files found in ${imgFolderPath}`);
return filenames;
})
.then(filenames => filenames.map(file => path.resolve(imgFolderPath + file)))
.then(filenames => pickUnusedFileFrom(filenames, filenames.length))
.then(filename => {
if (!dryrun) registerFileUsed(filename)
return filename
})
.then(fsp.readFile)
.then(async buffer => {
logger.debug("Read File Success "); //TODO move this to previous then?
return sharp(buffer).jpeg().toBuffer()
.then(file => {
logger.debug("Sharp JPEG Success");
return file
})
})
.then(async file => {
if (!dryrun) {
// await sleep(random(1000, 60000)) //TODO is this necessary?
return ig.publish.story({ file })
.then(fb => logger.info("Posting successful!?"))
}
else return logger.info("Data not sent, dryrun = true")
})
.then(() => logger.info(`Next post scheduled for ${job.nextDates()}\n`))
.catch(logger.error)
}
})();
//=================================================================================
async function login() {
ig.state.generateDevice(process.env.IG_USERNAME);
// ig.state.proxyUrl = process.env.IG_PROXY;
//register callback?
ig.request.end$.subscribe(async () => {
const serialized = await ig.state.serialize();
delete serialized.constants; // this deletes the version info, so you'll always use the version provided by the library
await stateSave(serialized);
});
if (await stateExists()) {
// import state accepts both a string as well as an object
// the string should be a JSON object
const stateObj = await stateLoad();
await ig.state.deserialize(stateObj)
.catch(err => logger.debug("deserialize: " + err));
} else {
let standardLogin = async function() {
// login like normal
await ig.simulate.preLoginFlow();
logger.debug("preLoginFlow finished");
await ig.account.login(process.env.IG_USERNAME, process.env.IG_PASSWORD);
logger.info("Logged in as " + process.env.IG_USERNAME);
process.nextTick(async () => await ig.simulate.postLoginFlow());
logger.debug("postLoginFlow finished");
}
// Perform usual login
// If 2FA is enabled, IgLoginTwoFactorRequiredError will be thrown
return Bluebird.try(standardLogin)
.catch(
IgLoginTwoFactorRequiredError,
async err => {
logger.info("Two Factor Auth Required");
const {username, totp_two_factor_on, two_factor_identifier} = err.response.body.two_factor_info;
// decide which method to use
const verificationMethod = totp_two_factor_on ? '0' : '1'; // default to 1 for SMS
// At this point a code should have been sent
// Get the code
const { code } = await inquirer.prompt([
{
type: 'input',
name: 'code',
message: `Enter code received via ${verificationMethod === '1' ? 'SMS' : 'TOTP'}`,
},
]);
// Use the code to finish the login process
return ig.account.twoFactorLogin({
username,
verificationCode: code,
twoFactorIdentifier: two_factor_identifier,
verificationMethod, // '1' = SMS (default), '0' = TOTP (google auth for example)
trustThisDevice: '1', // Can be omitted as '1' is used by default
});
},
)
.catch(e => logger.error('An error occurred while processing two factor auth', e, e.stack));
}
return
//================================================================================
async function stateSave(data) {
// here you would save it to a file/database etc.
await fsp.mkdir(path.dirname(statePath), { recursive: true }).catch(logger.error);
return fsp.writeFile(statePath, JSON.stringify(data))
// .then(() => logger.info('state saved, daddy-o'))
.catch(err => logger.error("Write error" + err));
}
async function stateExists() {
return fsp.access(statePath, fs.constants.F_OK)
.then(() => {
logger.debug('Can access state info')
return true
})
.catch(() => {
logger.warn('Cannot access state info')
return false
});
}
async function stateLoad() {
// here you would load the data
return fsp.readFile(statePath, 'utf-8')
.then(data => JSON.parse(data))
.then(data => {
logger.info("State load successful");
return data
})
.catch(logger.error)
}
}
async function registerFileUsed( filepath ) {
let data = JSON.stringify({
path: filepath,
time: new Date().toISOString()
}) + '\n';
return fsp.appendFile(recordPath, data, { encoding: 'utf8', flag: 'a+' } )
.then(() => {
logger.debug("Writing filename to record file");
return filepath
})
}
function pickUnusedFileFrom( filenames, iMax = 1000) {
return new Promise((resolve, reject) => {
let checkFileUsed = async function ( filepath ) {
return fsp.readFile(recordPath, 'utf8')
.then(data => data.split('\n'))
.then(arr => arr.filter(Boolean))
.then(arr => arr.map(JSON.parse))
.then(arr => arr.some(entry => entry.path === filepath))
}
let trythis = function( iMax, i = 1) {
let file = random(filenames);
checkFileUsed(file)
.then(async used => {
if (!used) {
logger.info(`Unused file found! ${file}`);
resolve(file);
} else if (i < iMax) {
logger.debug(`Try #${i}: File ${file} used already`);
await sleep(50);
trythis(iMax, ++i)
} else {
reject(`I tried ${iMax} times and all the files I tried were previously used`)
}
})
.catch(err => {
logger.warn("Record file not found, saying yes to " + file);
resolve(file);
})
}( iMax );
})
}

How to mock Tedious Module SQL Connection functions in JEST

I am using Azure functions written in Nodejs.
I have logic to insert into DB after all actions are completed. This gets called from main index.js after some api calls. So, from test class im expecting to mock database methods. and cant understand mocking much!
Below is the code for Database logic.
'use strict';
const { Connection, Request, TYPES } = require('tedious');
const config = {
server: process.env.myDB_Server,
authentication: {
type: 'default',
options: {
userName: process.env.myDB_User,
password: process.env.myDB_Pwd
}
},
options: {
encrypt: true,
database: process.env.myDB_Name
}
};
const myDB = process.env.myDB;
module.exports = async(context, myPayload, last_Modified_By, status, errorCode, errorMsg, errorDescription) => {
try {
context.log('inside azureTable function');
let connection = new Connection(config);
connection.on('connect', function(err1) {
if (err1) {
context.log('Error connection.OnConnect to DB:::', err1.message);
//logger.error('Error connection.OnConnect to DB::', err1);
let dbStatus = {};
dbStatus["status"] = 400;
dbStatus["message"] = err1.message;
context.res.body["dbStatus"] = dbStatus;
context.done();
} else {
context.log('Database Connection Successful.');
var request = new Request("INSERT INTO " + myDB + " (Correlation_Id,Created_Date,LastModified_Date,Last_Modified_By,Status_CD,Error_Code,Error_Msg,Error_Description,Payload) VALUES (#correlationId,CURRENT_TIMESTAMP,CURRENT_TIMESTAMP,#Last_Modified_By,#Status_CD,#Error_Code,#Error_Msg,#Error_Description,#Payload);", function(err2) {
if (err2) {
context.log('Error inserting records to DB::', err2.message);
//logger.error('Error inserting records to DB::' + err2.message);
let dbStatus = {};
dbStatus["status"] = 400;
dbStatus["message"] = err2.message;
context.res.body["dbStatus"] = dbStatus;
context.done();
}
});
request.addParameter('correlationId', TYPES.NVarChar, JSON.parse(myPayload).correlationId);
request.addParameter('Last_Modified_By', TYPES.NVarChar, last_Modified_By);
request.addParameter('Status_CD', TYPES.NVarChar, status);
request.addParameter('Error_Code', TYPES.Int, errorCode);
request.addParameter('Error_Msg', TYPES.NVarChar, errorMsg);
request.addParameter('Error_Description', TYPES.NVarChar, errorDescription);
request.addParameter('Payload', TYPES.NVarChar, myPayload);
// Close the connection after the final event emitted by the request, after the callback passes
request.on("requestCompleted", function(rowCount, more) {
context.log('Records Successfully inserted into DB');
connection.close();
let dbStatus = {};
dbStatus["status"] = 201;
dbStatus["message"] = "Records Successfully inserted into DB";
context.res.body["dbStatus"] = dbStatus;
context.done();
});
connection.execSql(request);
}
});
connection.connect();
} catch (err) {
context.log('Error in main function::', err.message);
//logger.error('Error in main function::' + err.message);
let dbStatus = {};
dbStatus["status"] = 400;
dbStatus["message"] = err.message;
context.res.body["dbStatus"] = dbStatus;
context.done();
}
};
How can i mock the connection.on connect or request = new Request without actually hitting DB ?
I tried this, but its going to actual connection.
index.test.js
test('return 500 when db connection fails" ', async() => {
const tedious = require('tedious');
const connectionMock = jest.spyOn(tedious, 'connect');
connectionMock.mockImplementation(() => {
return {
}
});
//calling index js
}, 15000);
test('return 500 when db connection fails" ', async() => {
const tedious = require('tedious');
const connectionMock = jest.spyOn(tedious, 'Connection');
connectionMock.mockImplementation(() => {
{
throw new Error('some err');
}
});
//calling index js
}, 15000);
After going through some docs, tried below with no luck. Jest is not setting return value and gets timed out.
jest.mock('tedious', () => ({
Connection: jest.fn(() => ({
connect: jest.fn().mockReturnValue('err'),
on: jest.fn().mockReturnValue('err')
}))
}))
/* jest.mock('tedious', () => ({
Connection: jest.fn(() => ({
connect: jest.fn(() => (connect, cb) => cb(null)),
on: jest.fn(() => (connect, cb) => cb('err'))
}))
})) */
Finally I figured it out.
Issue was the mocking params not being set correctly. Unnecessarily used Jest.fn() for inner methods which actually doesn't help.
Here is the final solution:
jest.mock('tedious', () => ({
Connection: jest.fn(() => ({
connect: () => {},
on: (connect, cb) => cb(),
close: () => {},
execSql: () => {}
})),
TYPES: jest.fn(),
Request: jest.fn(() => ({
constructor: (sqlString, cb) => cb('err', null, null),
addParameter: (name, type, value) => {},
on: (requestCompleted, cb) => cb('rowCount', 'more')
}))
}))

Error after some insertion MongoNetworkError: connection 812 to 127.0.0.1:27017 closed in Node js

I want to insert around 60 Million data in Mongo DB using a Node js Script so i have create a connection and reuse it like that :
connection.js
const MongoClient = require("mongodb").MongoClient,
{ mongourl,dbName } = require('../../env');
let db;
let mongoobject;
const option = {
useUnifiedTopology: true,
useNewUrlParser: true,
socketTimeoutMS: 300000,
poolSize:1000,
keepAlive: 300000,
connectTimeoutMS: 300000,
};
const connectDb = (callback) => {
if (db) return callback()
MongoClient.connect( mongourl, option,
(err, database) => {
if (err) return console.log(err)
db = database.db(dbName);
mongoobject = database;
callback()
}
)
}
const getDb = () => {
return db;
}
const connectclient = () => {
return mongoobject;
}
module.exports = {
connectDb,
getDb,
connectclient
}
and my insertion function is
function saveData(){
return new Promise(function (resolve, reject) {
try {
fs.access(dirPath, fs.F_OK, (err) => {
if (err) {
console.error(err)
return
}
const startTime = new Date();
let numlines = 0;
const fileReference = {}
fs.readdir(dirPath, function (err, filenames) {
if (err) {
console.error("Directory Not Found")
return;
}
filenames.forEach(function (filename) {
const readInterface = new readline(dirPath + filename);
let promises = [];
fileReference[filename] = 0
readInterface.on('line', function (line) {
fileReference[filename]++
let countcol = line.split('\t').length,
productname = line.split("\t"),
productsku = line.split("\t"),
productprice = line.split("\t");
let product_sku, product_name, product_price;
if (countcol == 3) {
product_sku = productname.splice(0, 2).join("-").toLowerCase();
product_name = productsku.splice(0, 2).join(" ");
product_price = productprice.splice(-1, 1);
} else if (countcol == 4) {
let product_sku_ini = productsku.splice(0, 2).join("").toLowerCase(),
product_sku_end = productsku.splice(0, 1).join(" ").toLowerCase(),
product_name_ini = productname.splice(0, 2).join(""),
product_name_end = productname.splice(0, 1).join(" ");
product_price = productprice.splice(-1, 1);
product_sku = product_sku_ini + "-" + product_sku_end
product_name = product_name_ini + " " + product_name_end
delete product_sku_ini, product_sku_end, product_name_ini, product_name_end,product_sku,product_name,product_price;
}
console.info('row start processing ==>>', filename, product_sku, line);
delete countcol, productname, productsku, productprice;
if (numlines >= 80000) {
readInterface.pause();
// console.log('promises:', promises)
Promise.all(promises)
.then(response => {
numlines = 0;
promises = [];
localStorage.setItem(filename, fileReference[filename]);
console.info(`filename Batch Resolved 1 ========>> ${filename}`, localStorage.getItem(filename))
console.log("====================================================== END 1============================================")
readInterface.resume()
// showHeapUses()
// setTimeout(() => process.exit(), 500)
// console.log('resume 1 time:', (new Date().getTime()) - startTime.getTime())
})
.catch(error => {
console.info(`Error in executing`, error)
numlines = 0;
readInterface.resume()
// console.log('resume 2 time:', (new Date()) - startTime)
})
}
console.log("num line", numlines)
numlines++
if(product_sku && product_name && product_price) {
const memoryUsedMb = process.memoryUsage().heapUsed / 1024 / 1024
console.info('the program used', memoryUsedMb, 'MB')
async.waterfall([
function (callback) {
const checkskuexists = async () => {
let checksamepro = { sku:product_sku };
let check_doc_exist = db.collection(collectionName).findOne(checksamepro);
return check_doc_exist;
}
checkskuexists().then(function(result) {
if(result === null){
callback(true, 'PRODUCT_NOT_FOUND');
}else{
callback(null, result.sku);
}
});
},
function (sku, callback) {
db.collection(collectionName).updateOne({sku:sku}, {$set:{price:product_price}});
resolve();
},
],function (err, result){
if (err) {
if (err && result == 'PRODUCT_NOT_FOUND') {
prodetails = {name:product_name, sku:product_sku, price:product_price, status:'active'}
db.collection(collectionName).insertOne(prodetails, function(err, res) {
if (err) throw err;
client.close();
});
}
resolve();
}
});
delete product_sku, product_name, product_price;
}else {
console.log('product is undefined -- so skiped', line);
delete product_sku, product_name, product_price;
}
});
readInterface.on('error', function (error) {
delete readInterface, fileReference, promises;
console.error("Error in reading file: ", error);
});
readInterface.on('end', function () {
// printPerformance(startTime);
localStorage.removeItem(filename);
Promise.all(promises)
.then(response => {
console.info(`filename Batch Resolved 2 ========>> ${filename} -- Completed`)
console.log("====================================================== END 2============================================")
})
.catch(error => {
console.info(`Error in executing`, error)
})
delete readInterface, fileReference, promises;
});
});
});
});
} catch (error) {
reject("ERROR GOES HERE ", error)
}
});
}
The error I am getting is:
MongoNetworkError: connection 812 to 127.0.0.1:27017 closed
at /var/www/html/reg-dealers-mongodb-script/node_modules/mongodb/lib/cmap/connection.js:68:15
at Map.forEach (<anonymous>)
at Socket.<anonymous> (/var/www/html/reg-dealers-mongodb-script/node_modules/mongodb/lib/cmap/connection.js:67:20)
at Socket.emit (events.js:314:20)
at Socket.EventEmitter.emit (domain.js:483:12)
at TCP.<anonymous> (net.js:675:12)
This comes after some insertion like 10k or 20k and sometimes some 100k just connection 812 that no. is different and rest error is same so any idea why this is happening and how to solve the issue.
Your insertion function is too big to follow. But from the error, it is clear that your insertion function is creating a new mongo connection from the pool.
Generally, when a single connection is used to work for a blocking operation other available connections from pool are used to handle the incoming requests that need to use the db. As you have defined 1000 as pool size that's why you are seeing 812 connection closed.
It is not a wise idea to insert 60 Million data at once instead divide that in smaller part organize your DB architecture and follow some recommended way to save them(like collection max size, read/write ops, indexing etc). When you need to save multiple documents, you should use the below mongo function:
db.collection.insertMany(
[ <document 1> , <document 2>, ... ],
{
writeConcern: <document>,
ordered: <boolean>
}
)
For more details check this.

Bulk update to Postgres with node js performance issue

I'm facing performance issue while trying to do bulk update in PostgresDB. It's taking more than 180 seconds to update around 23000 records. PFB the code. I'm using pg-promise library. Is there anything I could do to improve the performance?
const pgp = require('pg-promise')();
const postgresDBConfig = {
host: Config.postgresDBHost,
port: Config.postgresDBPort,
database: Constants.postgresDBName,
user: Config.postgresDBUser,
password: 'pswd'
};
export async function getTransactionDetails(): Promise<any> {
return new Promise<any>(async function (resolve, reject) {
try {
let db = pgp(postgresDBConfig);
db.connect();
let query = "SELECT * FROM table_name";
db.any(query)
.then(data => {
console.log("Executed successfully::");
resolve(data);
})
.catch(error => {
console.log('ERROR:', error);
})
} catch (error) {
log.error("Error::" + error);
throw error;
}
});
}
export async function updateStatus(result: any, status: string) {
try {
let db = pgp(postgresDBConfig);
//db.connect();
let updateData = [];
_.forEach(result, function (row) {
let updateInfo = {};
updateInfo["sessionid"] = row.sessionid;
updateInfo["status"] = status;
updateData.push(updateInfo);
});
console.log("updateData::" + updateData.length);
const tableName = new pgp.helpers.TableName('table_name', 'schema_name');
let columnset = new pgp.helpers.ColumnSet(['?sessionid', 'status'], { table: tableName });
let update = pgp.helpers.update(updateData, columnset);
db.none(update).then(() => {
console.log("Updated successfully");
})
.catch(error => {
console.log("Error updating the status" + error);
});
}
catch (error) {
log.error("Error in function updateStatus::" + error);
throw error;
}
}
The code exhibits problems all over the place
You should initialize the database object only once
You should not use db.connect() at all, which you also use incorrectly for the async code
You again incorrectly use async block, skipping await, so it doesn't execute correctly.
You do not append any UPDATE logic clause, so it is updating everything all over again, unconditionally, which may be resulting in a delayed mess that you're in.
Here's an improved example, though it may need some more work from your side...
const pgp = require('pg-promise')();
const postgresDBConfig = {
host: Config.postgresDBHost,
port: Config.postgresDBPort,
database: Constants.postgresDBName,
user: Config.postgresDBUser,
password: 'pswd'
};
const db = pgp(postgresDBConfig);
const tableName = new pgp.helpers.TableName('table_name', 'schema_name');
const columnSet = new pgp.helpers.ColumnSet(['?sessionid', 'status'], {table: tableName});
export async function getTransactionDetails(): Promise<any> {
try {
const res = await db.any('SELECT * FROM table_name');
console.log('Executed successfully::');
return res;
} catch (error) {
console.log('ERROR:', error);
throw error;
}
}
export async function updateStatus(result: any, status: string) {
try {
let updateData = [];
_.forEach(result, row => {
let updateInfo = {};
updateInfo["sessionid"] = row.sessionid;
updateInfo["status"] = status;
updateData.push(updateInfo);
});
console.log('updateData::', updateData.length);
const update = pgp.helpers.update(updateData, columnSet) +
' WHERE v.sessionid = t.sessionid';
await db.none(update);
console.log('Updated successfully');
}
catch (error) {
console.log('Error in function updateStatus:', error);
throw error;
}
}

Resources