Force NodeJS to wait pipe stream in loop? - node.js

I'm trying to import .csv files into SQLite using NodeJS:
function get_files_array(path) {
let arr = [];
const files = fs.readdirSync(path);
for (const file of files) {
arr.push(path + file);
}
return arr;
}
let file_path = "./insert.sql";
let files_array = "./lots_of_csv/"
for (const file of get_files_array(files_array)) {
let csv2sql = CSV2SQL({
tableName: table_name,
});
let rstream = fs.createReadStream(file);
let wstream = fs.createWriteStream(file_path);
let stream = rstream.pipe(csv2sql).pipe(wstream);
stream.on("finish", () => {
const dataSql = fs.readFileSync(file_path).toString();
db.run("BEGIN TRANSACTION;");
db.run(dataSql, [], (err) => {
if (err) return console.error(err.message);
});
db.run("COMMIT;");
db.all("select count(*) from table_name", [], (err, rows) => {
if (err) return console.error(err.message);
rows.forEach((row) => {
console.log(row);
});
});
});
}
The problem is: as soon as it enters the stream.on("finish" line, it will start executing the next file in the loop simultaneously and crash.

Related

Node.js split file lines

I want to write a script that divides the lines read from the file into packages of 25, unfortunately the sample package returns 40 codes. I would like to do so that, for example, he divided me into packages of 25 items. I mean, I have, for example, 60 codes, this creates 2 packages of 25, and one with 10 codes. Unfortunately, I can't handle it.
const fs = require('fs');
fs.readFile('code.txt', function (err, data) {
if (err) throw err;
const array = data.toString().split("\n");
let count = 0;
let items = [];
for (let i in array) {
items.push({
PutRequest: {
Item: {
code: array[i]
}
}
});
let params = {
RequestItems: {
'TABLE_NAME': items
}
};
if (count === 25) {
dynamoDB.batchWrite(params, function (err, data) {
if (err) {
console.log(err);
} else {
count = 0;
items = [];
}
});
}else{
count++;
}
}
});
code.txt content
https://0bin.net/paste/NA8-4hkq#1Ohwt5uUkQqE0YscwnxTX2gxEqlvAUVKp1JRipBCsZg
Any idea what I do wrong?
Your dynamoDB.batchWrite() is asynchronous. Thus its callback is executed only after the loop has completed. So items and count are never reset ...
The easiest would be, if you could switch to an promise based approach like the following
const BATCHSIZE = 25;
const fs = require('fs').promises;
async function batchLoad() {
const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
while (lines.length > 0) {
const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
await new Promise((resolve, reject) => {
dynamoDb.batchWrite(params, (err) => {
if (err) return reject(err);
resolve();
});
});
}
}
A callback based approach could look like this
const BATCHSIZE = 25;
fs.readFile("code.txt", "utf-8", (err, data) => {
const lines = data.split("\n");
function writeBatch() {
if (!lines.length) return;
const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
dynamoDb.batchWrite(params, err => {
if (err) ...
else writeBatch();
});
}
writeBatch();
}
The function writeBatch takes a certain number of lines from your original array and writes them into the database. Only afer the write into the DB was successful, it recursively calls itself and handles the next batch. But be aware, that this approach may exceed the maximum call stack size and throw an error.
You can also make either of this approaches not manipulate the lines array (which may be quite expensive), but just get out the current slice
const BATCHSIZE = 25;
const fs = require('fs').promises;
async function batchLoad() {
const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
let currentIndex = 0;
while (currentIndex < lines.length) {
const items = lines.slice(currentIndex, currentIndex + BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
await new Promise((resolve, reject) => {
dynamoDb.batchWrite(params, (err) => {
if (err) return reject(err);
resolve();
});
});
currentIndex += BATCHSIZE;
}
}
and
const BATCHSIZE = 25;
fs.readFile("code.txt", "utf-8", (err, data) => {
const lines = data.split("\n");
function writeBatch(currentIndex) {
if (currentIndex >= lines.length) return;
const items = lines.slice(currentIndex, currentIndex + BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
dynamoDb.batchWrite(params, err => {
if (err) ...
else writeBatch(currentIndex + BATCHSIZE);
});
}
writeBatch(0);
}
To prevent stumbling into a maximum callstack exception you may also add the next batch to the eventloop and not call it recursively. Ie
dynamoDb.batchWrite(params, err => {
if (err) ...
else setTimeout(()=> { writeBatch(currentIndex + BATCHSIZE);}, 0);
});
This way you won't build up a massive callstack from recursive calls.
To keep track of how many records are already saved to the db you could simply store the current counter in a file. When you restart the process, load that file and check how many lines to skip. Don't forget to remove the file, once all records have been saved ... For example with the first approach:
const BATCHSIZE = 25;
const fs = require('fs').promises;
async function batchLoad() {
const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
const skipLines = 0;
try {
skipLines = +(await fs.readFile("skip.txt", "utf-8"));
if (isNaN(skipLines)) skipLines = 0;
lines.splice(0, skipLines);
} catch (e) {
skipLines = 0;
}
while (lines.length > 0) {
const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
await new Promise((resolve, reject) => {
dynamoDb.batchWrite(params, (err) => {
if (err) return reject(err);
resolve();
});
});
skipLines += BATCHSIZE;
await fs.writeFile("skip.txt", `${skipLines}`);
}
try {
await fs.unlink("skip.txt");
} catch (e) {
}
}

readFile synchronously nodejs

I am new to nodejs and just started learning. I need to read 5 json files and place them in an array. I have created 2 functions: readDirectory and processFile.
let transactionArray = [];
router.get('/', (req,res) => {
//joining path of directory
const directoryPath = path.join(__dirname, '../data');
readDirectory(directoryPath);
res.send(JSON.stringify(transactionArray))
})
readDirectory will get the dir and will read the filenames.
function readDirectory(directoryPath){
//passsing directoryPath and callback function
fs.readdir(directoryPath, function (err, files) {
//handling error
if (err) {
return console.log('Unable to scan directory: ' + err);
}
//listing all files using map
let fileSummary = files.map(file => {
//get the filename
let categoryName = ''
if (file.includes('category1')) {
categoryName = 'category1'
} else if (file.includes('category2')) {
categoryName = 'category2'
} else {
categoryName = 'Others'
}
// read the file
const filePath = directoryPath +'/'+ file
fs.readFile(filePath, 'utf8', (err, fileContents) => {
if (err) {
console.error(err)
return
}
try {
let data = JSON.parse(fileContents, categoryName)
processFile(data, categoryName);
} catch(err) {
console.error(err)
}
})
})
});
}
Then it will read the file using function processFile.
function processFile(data, categoryName)
{
let paymentSource = ''
if (categoryName == 'category1'){
paymentSource = categoryName +': '+ categoryName +' '+ data.currency_code
} else if (categoryName == 'category2') {
paymentSource = categoryName +': '+ data.extra.payer +'-'+ data.currency_code
} else {
paymentSource = 'Others'
}
let transactionDetails = new Transaction(
data.id,
data.description,
categoryName,
data.made_on,
data.amount,
data.currency_code,
paymentSource)
transactionArray.push(transactionDetails)
console.log(transactionArray);
}
The console log is something like this:
[{Transaction1}] [{Transaction1},{Transaction2}] [{Transaction1},{Transaction2},{Transaction3}]
but the result on the UI is only []
During debug, I noticed that it is not reading synchronously so I tried using readFileSync but it did not work. How can I read both functions synchronously so it will not give an empty array?
Do some playing around to understand what the fs functions do when they have callbacks, and when they're synchronous. From the code that you have we have make a few changes so that you don't have to use the synchronous functions from the file system library.
First of all you need to wait for all the asynchronous tasks to complete before returning response.
router.get('/', async (req, res) => {
// joining path of directory
const directoryPath = path.join(__dirname, '../data')
readDirectory(directoryPath).then(() => {
res.send(JSON.stringify(transactionArray))
}).catch(err => {
res.status(500).json(err)
})
})
Secondly, to keep the code as is as to teach you something about promises, lets wrap the first function in a promise.
function readDirectory (directoryPath) {
return new Promise((resolve, reject) => {
// passsing directoryPath and callback function
fs.readdir(directoryPath, function (err, files) {
// handling error
if (err) {
return console.log('Unable to scan directory: ' + err)
}
// listing all files using map
const fileSummary = Promise.all(
files.map(file => {
return new Promise((resolve, reject) => {
// get the filename
let categoryName = ''
if (file.includes('category1')) {
categoryName = 'category1'
} else if (file.includes('category2')) {
categoryName = 'category2'
} else {
categoryName = 'Others'
}
// read the file
const filePath = directoryPath + '/' + file
fs.readFile(filePath, 'utf8', (err, fileContents) => {
if (err) {
console.error(err)
reject(err)
}
try {
const data = JSON.parse(fileContents, categoryName)
processFile(data, categoryName).then(data => {
data()
})
} catch (err) {
console.error(err)
reject(err)
}
})
})
})
).then(() => {
resolve()
}).catch(err => {
reject(err)
})
})
})
}
Please refer to the bible (MDN) for javascript about promises -> https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise
And finally wrap the processFile function in a promise
function processFile (data, categoryName) {
return new Promise((resolve, reject) => {
let paymentSource = ''
if (categoryName == 'category1') {
paymentSource = categoryName + ': ' + categoryName + ' ' + data.currency_code
} else if (categoryName == 'category2') {
paymentSource = categoryName + ': ' + data.extra.payer + '-' + data.currency_code
} else {
paymentSource = 'Others'
}
const transactionDetails = new Transaction(
data.id,
data.description,
categoryName,
data.made_on,
data.amount,
data.currency_code,
paymentSource)
transactionArray.push(transactionDetails)
console.log(transactionArray)
resolve()
})
}
What the heck am I doing? I'm just making your code execute asynchronous task, but wait for them to be completed before moving on. Promises are a way to handle this. You can easily pull this off with the FS synchronous functions, but this way you can learn about promises!

Return result from python script in Node JS child process

Im trying to return the value from a python script from a nodejs child process and i just cant seem to get it to work, it prints in the console using console.log correctly as it should but only returns undefined, i was wondering if there is a way to directly return that value, or to parse the console.log results into a string.
var sys = require('util');
module.exports = function() {
this.getPlay = function getPlaylist(name) {
const childPython = spawn('python' ,['main.py', name]);
var result = '';
childPython.stdout.on(`data` , (data) => {
result += data.toString();
});
childPython.on('exit' , () => {
console.log(result);
});
}};
Python script is empty for now and prints "Hello 'name' "
Edit:
I tried to use promises and here is what i have:
(async function(){
function test(name) {
return new Promise((resolve , reject) => {
const childPython = spawn('python' ,['main.py', "He"]);
var result = '';
childPython.stdout.on(`data` , (data) => {
result += data.toString();
});
childPython.on('close' , function(code) {
t = result
resolve(result)
});
childPython.on('error' , function(err){
reject(err)
});
})};
var t;
await test(name);
console.log(t);
return t;
})();
Define it like this.
function getPlaylist(name) {
return new Promise((resolve , reject) => {
const childPython = spawn('python' ,['main.py', name]);
var result = '';
childPython.stdout.on(`data` , (data) => {
result += data.toString();
});
childPython.on('close' , function(code) {
resolve(result)
});
childPython.on('error' , function(err){
reject(err)
});
})
};
Remeber to use try...catch for it it gets rejected. https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/try...catch
async function runTest() {
try {
const playList = await getPlaylist();
console.log(playList);
} catch (err) {
}
}
runTest()
const {spawn} = require('child_process');
const getPythonScriptStdout = (pythonScriptPath) => {
const python = spawn('python', [pythonScriptPath]);
return new Promise((resolve, reject) => {
let result = ""
python.stdout.on('data', (data) => {
result += data
});
python.on('close', () => {
resolve(result)
});
python.on('error', (err) => {
reject(err)
});
})
}
getPythonScriptStdout('./python.py').then((output) => {
console.log(output)
})
python.py file
print("hi from python")

How to convert all djvu files to pdf

it's answer. Just use nodejs and ddjvu from DJView lib.
There
imports
const fs = require('fs');
const os = require('os');
const {spawn} = require('child_process');
const path = require('path');
const maxProcess = os.cpus().length - 1;// count of procces - 1 for system needs
let nowPlayed = 0;
method for convert file, and delete when converted.
function chpoc(args) {
console.log(args[1] + " start converting");
spawn(`ddjvu`, ["-format=pdf", args[0], args[1] + ".pdf"]).on('close', (data) => {
console.log(args[1] + ".pdf converted");
fs.unlink(args[0], (err) => {
if (err) throw err;
console.log(args[0] + ' successfully deleted!');
nowPlayed--;
})
});
}
queue for optimize max convertions at one time
let queue = [];
function startQueue() {
if (nowPlayed < maxProcess && queue.length) {
nowPlayed++;
queue.pop()();
}
}
setInterval(startQueue, 500)
fillthe queue and start it
function workWithFile(filepath) {
const args = filepath.match(/(.*)\.djvu/)
if (args && args.length) {
queue.push(() => {
chpoc(args);
});
}
}
show errors
const eachCallback = function (err) {
err && console.error(err);
}
catalog three and finde the djvus
let filePaths = [];
function getFiles(dirPath, callback) {
fs.readdir(dirPath, function (err, files) {
if (err) return callback(err);
files.forEach((fileName) => {
setTimeout(() => {
let filePath = path.join(dirPath, fileName);
if (filePath) {
fs.stat(filePath, function (err, stat) {
if (err) return eachCallback(err);
if (stat.isDirectory()) {
getFiles(filePath, callback);
} else if (stat.isFile() && /\.djvu$/.test(filePath)) {
filePaths.push(filePath);
callback(filePath)
}
})
}
});
});
});
}
init from started dir
getFiles(__dirname, function (file) {
workWithFile(file);
});
imports
const fs = require('fs');
const os = require('os');
const {spawn} = require('child_process');
const path = require('path');
const maxProcess = os.cpus().length - 1;// count of procces - 1 for system needs
let nowPlayed = 0;
method for convert file, and delete when converted.
function chpoc(args) {
console.log(args[1] + " start converting");
spawn(`ddjvu`, ["-format=pdf", args[0], args[1] + ".pdf"]).on('close', (data) => {
console.log(args[1] + ".pdf converted");
fs.unlink(args[0], (err) => {
if (err) throw err;
console.log(args[0] + ' successfully deleted!');
nowPlayed--;
})
});
}
queue for optimize max convertions at one time let queue = [];
function startQueue() {
if (nowPlayed < maxProcess && queue.length) {
nowPlayed++;
queue.pop()();
}
}
setInterval(startQueue, 500)
fill the queue and start it
function workWithFile(filepath) {
const args = filepath.match(/(.*)\.djvu/)
if (args && args.length) {
queue.push(() => {
chpoc(args);
});
}
}
show errors
const eachCallback = function (err) {
err && console.error(err);
}
catalog three and finde the djvus
let filePaths = [];
function getFiles(dirPath, callback) {
fs.readdir(dirPath, function (err, files) {
if (err) return callback(err);
files.forEach((fileName) => {
setTimeout(() => {
let filePath = path.join(dirPath, fileName);
if (filePath) {
fs.stat(filePath, function (err, stat) {
if (err) return eachCallback(err);
if (stat.isDirectory()) {
getFiles(filePath, callback);
} else if (stat.isFile() && /\.djvu$/.test(filePath)) {
filePaths.push(filePath);
callback(filePath)
}
})
}
});
});
});
}
init from started dir
getFiles(__dirname, function (file) {
workWithFile(file);
});

Unable to get callback from function

Trying to get the variable random_song from the function functions.random_song
Function:
functions.random_song = () => {
fs.readFile('auto_playlist.txt', 'utf8', function(err, data) {
if (err) {
console.log(`${`ERR`.red} || ${`Error fetching song from auto playlist ${err}`.red}`);
}
let songs = data.split('\n');
songs.splice(-1, 1);
var random = Math.floor(Math.random()*songs.length);
let random_song = (songs[random]);
return random_song;
})
}
Attempting to callback the random song
functions.random_song(song => {
console.log(song)
})
The code just return undefined Ideas?
Your function:
functions.random_song = (callback) => {
fs.readFile('auto_playlist.txt', 'utf8', function(err, data) {
if (err) {
// Do stuff
return callback(err);
}
// Do stuff
callback(null, random_song);
})
}
And then call it:
functions.random_song((err, song) => {
// Do stuff
});
You may want to read more about using Promise/async-await instead of callback.
functions.random_song = () => {
let data = fs.readFileSync('auto_playlist.txt', 'utf8');
let songs = data.split('\n');
songs.splice(-1, 1);
var random = Math.floor(Math.random()*songs.length);
let rand_song = (songs[random]);
return rand_song;
}
console.log(functions.random_song())
got it working, thanks for the help <3
Following is the code which use Promise to handle file read and data processing task asynchronously
const fs = require('fs')
const functions = {}
functions.random_song = () => {
return new Promise((resolve, reject) => {
fs.readFile('auto_playlist.txt', 'utf8', function(err, data) {
if (err) {
console.log(err)
reject(err)
}
let songs = data.split('\n');
songs.splice(-1, 1);
var random = Math.floor(Math.random()*songs.length);
let random_song = (songs[random]);
resolve(random_song);
})
})
}
functions.random_song()
.then(song => console.log('Song Name', song))
.catch(err => console.error('Error fetching songs', err))
console.log('It will be executed before promise is resolved')
fs.readFile is asynchronus and your function will return before the file is read. Use fs.readFileSync instead. Like this:
functions.random_song = () => {
const data = fs.readFileSync('auto_playlist.txt', 'utf8');
let songs = data.split('\n');
songs.splice(-1, 1);
var random = Math.floor(Math.random()*songs.length);
let random_song = (songs[random]);
return random_song;
}

Resources