Can't get NodeJS to write anything to a writable stream - node.js

Running NodeJS (v8.16.2) locally on the command line. I scrape an e-commerce website, gather the relevant information into a data-structure, then try to write it into a plain-text CSV file (my records don't have a fixed set of fields) manually by creating a write stream. This last step isn't working.
// Other stuff
const exitHandler = function(options, exitCode) {
if (exitCode || exitCode !== 0) console.log(exitCode);
// Other stuff
writeToCsv();
if (options.exit) process.exit();
}
const writeToCsv = function() {
let ws = fs.createWriteStream('./final-data.csv');
const crlf = '\n\r'; // might need to reverse this
// Please ignore the weird layout
for (let seller in finalData.sellers) {
ws.write('Seller:,' + seller + crlf + ',Brands:');
for (let brand of finalData.sellers[seller].brands) {
ws.write(',' + brand);
}
ws.write(crlf + ',Addresses:');
for (let addr of finalData.sellers[seller].addrs) {
ws.write(',"' + addr + '"');
}
ws.write(crlf);
}
ws.on('finish', () => {
console.log('Wrote all data'); // never prints this
});
ws.end();
}
process.on('exit', exitHandler.bind(null,{cleanup:true}));
I suspect this is because NodeJS exits before the data has been flushed to disk, but can't figure out a way to make NodeJS flush the data synchronously.
PS: new to NodeJS

please check out below example and integrate it
as per your comment i updated code
async function writeDataInCSV(filePath, dynamicHeader, data) {
const csvWriter = createCsvWriter({
path: filePath,
header: dynamicHeader
});
await csvWriter
.writeRecords(data)
.then(()=> console.log('The CSV file was written successfully'));
}
writeDataInCSV('out.csv',dynamicHeader, Data)
here set array of header and make data and pass to writeDataInCSV method

Related

Execute Loop after ReadStream pipe has finished

Not sure if the Title is quite right in this as I'm pretty much stumped (in over my head)...
I'm trying to pull the headers from a csv, as part of an automation test, to the validate those headers. I'm using csv-parse to read the csv file.
Once I've gathered the headers I'm then doing a simple assertion to go through and assert against each one. Using the string values I've entered into my test script.
However currently, the FOR is executing before the csv read and headers have been gathered. I'm not sure how to wait for this to finish before executing the loop.
const fs = require('fs');
const csv = require('csv-parser');
let headerArray = null;
const headerValues = values.split(',');
browser.pause(10000);
fs.createReadStream(""+global.downloadDir + "\\" + fs.readdirSync(global.downloadDir)[0])
.pipe(csv())
.on('headers', function (headers) {
return headerArray = headers
})
for(var i =0; i<headerValues.length; i++){
assert.equal(headerValues[i], headerArray[i]);
}
The solution is to run the for loop with your assertions inside the 'headers' event handler, eg:
var results = [] // we'll collect the rows from the csv into this array
var rs = fs.createReadStream(""+global.downloadDir + "\\" + fs.readdirSync(global.downloadDir)[0])
.pipe(csv())
.on('headers', function (headers) {
try {
for(var i =0; i<headerValues.length; i++){
assert.equal(headerValues[i], headers[i]);
}
} catch (err) {
// an assertion failed so let's end
// the stream (triggering the 'error' event)
rs.destroy(err)
}
}).on('data', function(data) {
results.push(data)
}).on('end', function() {
//
// the 'end' event will fire when the csv has finished parsing.
// so you can do something useful with the `results` array here...
//
}).on('error', function(err) {
// otherwise, the 'error' event will have likely fired.
console.log('something went wrong:', err)
})

Why does fs.watch only trigger when I open the file being watched?

I'm currently trying to implement a live file feed in which writes data to the connected client in the web. This works perfectly fine if I'm editing the document directly, updates are sent immediately.
Instead of me manually writing to the file, i created another process to handle that step. The issue I'm having is that when my process writes to this file, the changes are not being detected. Unless i explicitly open the file,the changes made are not being detected.
I also know that fs.watch is inconsistent, but what would be the difference between manually editing and automation?
// function to start process and check for changes in file
const start = function() {
fs.open(file, 'r', (err, fd) => {
if(err){
return setTimeout(start, 1000)
}
fs.watch(file,(event, filename) => {
if(event === "change"){
console.log('change detected');
// function that sends client messages
sendMessage(fd);
}
});
});
}
Here is the code that automates the process of writing to the file:
const fs = require('fs');
const file = 'file.txt';
const writeStream = fs.createWriteStream(file, {
flags:"a"
});
const cleanBuffer = function(len) {
let buf = Buffer.alloc(len);
buf.fill('\0');
return buf;
}
const check = function() {
let newData = `data being written`;
const buffer = cleanBuffer(newData.length);
buffer.write(newData, 'ascii');
writeStream.write(buffer);
setTimeout(check, 10000);
}
I tried to view the file from file explorer and whenever I access the folder this file is contained in, the change is detected...is this actually watching the file?
I did some research and it looks as though w/ windows this is working as expected because
On Windows systems, this feature depends on ReadDirectoryChangesW
I had to change the function to use fs.watchFile instead, which is working for me although it is recommended to use fs.watch.
More can be read here: https://nodejs.org/docs/latest-v11.x/api/fs.html#fs_availability
The code now reflects as:
// function to start process and check for changes in tweets file
const start = function() {
fs.open(file, 'r', (err, fd) => {
if(err){
return setTimeout(start, 1000)
}
fs.watchFile(file,(curr, prev) => {
if(curr.mtime !== prev.mtime){
console.log('change detected');
sendMessage(fd);
}
});
});
}

Node.js function not running in order. Error: Unhandled stream error in pipe

I updated the function to create the CSV file but now I'm getting an error:
In upload function
internal/streams/legacy.js:57
throw er; // Unhandled stream error in pipe.
^
Error: ENOENT: no such file or directory, open 'C:\Users\shiv\WebstormProjects\slackAPIProject\billingData\CSV\1548963844106output.csv'
var csvFilePath = '';
var JSONFilePath = '';
function sendBillingData(){
var message = '';
axios.get(url, {
params: {
token: myToken
}
}).then(function (response) {
message = response.data;
fields = billingDataFields;
// saveFiles(message, fields, 'billingData/');
saveFilesNew(message, fields, 'billingData/');
var file = fs.createReadStream(__dirname + '/' + csvFilePath); // <--make sure this path is correct
console.log(__dirname + '/' + csvFilePath);
uploadFile(file);
})
.catch(function (error) {
console.log(error);
});
}
The saveFilesNew function is:
function saveFilesNew(message, options, folder){
try {
const passedData = message;
var relevantData='';
if (folder == 'accessLogs/'){
const loginsJSON = message.logins;
relevantData = loginsJSON;
console.log(loginsJSON);
}
if(folder == 'billingData/'){
relevantData = passedData.members;
const profile = passedData.members[0].profile;
}
//Save JSON to the output folder
var date = Date.now();
var directoryPath = folder + 'JSON/' + date + "output";
JSONFilePath = directoryPath + '.json';
fs.writeFileSync(JSONFilePath, JSON.stringify(message, null, 4), function(err) {
if (err) {
console.log(err);
}
});
//parse JSON onto the CSV
const json2csvParser = new Json2csvParser({ fields });
const csv = json2csvParser.parse(relevantData);
// console.log(csv);
//function to process the CSV onto the file
var directoryPath = folder + 'CSV/' + date + "output";
csvFilePath = directoryPath + '.csv';
let data = [];
let columns = {
real_name: 'real_name',
display_name: 'display_name',
email: 'email',
account_type: 'account_type'
};
var id = passedData.members[0].real_name;
console.log(id);
console.log("messageLength is" +Object.keys(message.members).length);
for (var i = 0; i < Object.keys(message.members).length; i++) {
console.log("value of i is" + i);
var display_name = passedData.members[i].profile.display_name;
var real_name = passedData.members[i].profile.real_name_normalized;
var email = passedData.members[i].profile.email;
var account_type = 'undefined';
console.log("name: " + real_name);
if(passedData.members[i].is_owner){
account_type = 'Org Owner';
}
else if(passedData.members[i].is_admin){
account_type = 'Org Admin';
}
else if(passedData.members[i].is_bot){
account_type = 'Bot'
}
else account_type = 'User';
data.push([real_name, display_name, email, account_type]);
}
console.log(data);
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err;
fs.writeFileSync(csvFilePath, output, function(err) {
console.log(output);
if (err) {
console.log(err);
}
console.log('my.csv saved.');
});
});
} catch (err) {
console.error(err);
}
}
The upload file function is:
function uploadFile(file){
console.log("In upload function");
const form = new FormData();
form.append('token', botToken);
form.append('channels', 'testing');
form.append('file', file);
axios.post('https://slack.com/api/files.upload', form, {
headers: form.getHeaders()
}).then(function (response) {
var serverMessage = response.data;
console.log(serverMessage);
});
}
So I think the error is getting caused because node is trying to upload the file before its being created. I feel like this has something to do with the asynchronous nature of Node.js but I fail to comprehend how to rectify the code. Please let me know how to correct this and mention any improvements to the code structure/design too.
Thanks!
You don't wait for the callback provided to stringify to be executed, and it's where you create the file. (Assuming this stringify function really does acccept a callback.)
Using callbacks (you can make this cleaner with promises and these neat async/await controls, but let's just stick to callbacks here), it should be more like:
function sendBillingData() {
...
// this callback we'll use to know when the file writing is done, and to get the file path
saveFilesNew(message, fields, 'billingData/', function(err, csvFilePathArgument) {
// this we will execute when saveFilesNew calls it, not when saveFilesNew returns, see below
uploadFile(fs.createReadStream(__dirname + '/' + csvFilePathArgument))
});
}
// let's name this callback... "callback".
function saveFilesNew(message, options, folder, callback) {
...
var csvFilePath = ...; // local variable only instead of your global
...
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err; // or return callbcack(err);
fs.writeFile(csvFilePath , output, function(err) { // NOT writeFileSync, or no callback needed
console.log(output);
if (err) {
console.log(err);
// callback(err); may be a useful approach for error-handling at a higher level
}
console.log('my.csv saved.'); // yes, NOW the CSV is saved, not before this executes! Hence:
callback(null, csvFilePath); // no error, clean process, pass the file path
});
});
console.log("This line is executed before stringify's callback is called!");
return; // implicitly, yes, yet still synchronous and that's why your version crashes
}
Using callbacks that are called only when the expected events happen (a file is done writing, a buffer/string is done transforming...) allows JS to keep executing code in the meantime. And it does keep executing code, so when you need data from an async code, you need to tell JS you need it done before executing your piece.
Also, since you can pass data when calling back (it's just a function), here I could avoid relying on a global csvFilePath. Using higher level variables makes things monolithic, like you could not transfer saveFilesNew to a dedicated file where you keep your toolkit of file-related functions.
Finally, if your global process is like:
function aDayAtTheOffice() {
sendBillingData();
getCoffee();
}
then you don't need to wait for the billing data to be processed before starting making coffee. However, if your boss told you that you could NOT get a coffee until the billing data was settled, then your process would look like:
function aDayAtTheOffice() {
sendBillingData(function (err) {
// if (err) let's do nothing here: you wanted a coffee anyway, right?
getCoffee();
});
}
(Note that callbacks having potential error as first arg and data as second arg is a convention, nothing mandatory.)
IMHO you should read about scope (the argument callback could be accessed at a time where the call to saveFilesNew was already done and forgotten!), and about the asynchronous nature of No... JavaScript. ;) (Sorry, probably not the best links but they contain the meaningful keywords, and then Google is your buddy, your friend, your Big Brother.)

NodeJS - Read Buffer line by line synchronously => toString() failed

I have been struggeling, and searching for a long time. I know there are answers about that, but none works.
I used fs.createReadStream and readLine for this. But It's using fs.close() to close FILE READING. so it doesnt work at all when used on a buffer. The reading on all files goes on without possiblity to interrupt it...
Then I used this :
const stream = require('stream');
let bufferStream = new stream.PassThrough();
bufferStream.end(hexaviaFile.buffer);
bufferStream
.pipe(require('split')())
.pipe(es.mapSync(function(line){
// pause the readstream
bufferStream.pause();
// DO WHATEVER WITH YOUR LINE
console.log('line content = ' + line);
// resume the readstream, possibly from a callback
bufferStream.resume();
}).on('error', function(err){
console.log('Error while reading file.' + err);
}).on('end', function(){
console.log('end event !');
}).on('close', function(){
console.log('close event !');
})
);
// toString() Failed
I get the [toString() Failed] error and searched about it, apparently it appears when the buffer is large than node buffer max size.
So I checked :
var buffer = require('buffer');
console.log('buffer.kMaxLength = ', buffer.kMaxLength); // 2147483647
console.log('hexaviaFile.buffer.byteLength = ', hexaviaFile.buffer.byteLength); // => 413567671
It's not the case as you can see numbers provided:
* maxBuffer size = 2Go
* my buffer = 0.4Go
I also tried some diffeent library to do so but:
1. I want to keep memory usage as low as possible
2. I need this reading to be perfectly SYNC. In other words, I have some processings after the file reading and I need to complete all the reading before going to next steps.
I don't know what to do :) Any kind (of) help appreciated
Regards.
I forgot about this post. I found a way to achieve this without errors.
It's given here : https://github.com/request/request/issues/2826
1st create a splitter to read string chunks
class Splitter extends Transform {
constructor(options){
super(options);
this.splitSize = options.splitSize;
this.buffer = Buffer.alloc(0);
this.continueThis = true;
}
stopIt() {
this.continueThis = false;
}
_transform(chunk, encoding, cb){
this.buffer = Buffer.concat([this.buffer, chunk]);
while ((this.buffer.length > this.splitSize || this.buffer.length === 1) && this.continueThis){
try {
let chunk = this.buffer.slice(0, this.splitSize);
this.push(chunk);
this.buffer = this.buffer.slice(this.splitSize);
if (this.buffer[0] === 26){
console.log('EOF : ' + this.buffer[0]);
}
} catch (err) {
console.log('ERR OCCURED => ', err);
break;
}
}
console.log('WHILE FINISHED');
cb();
}
}
Then pipe it to your stream :
let bufferStream = new stream.PassThrough();
bufferStream.end(hugeBuffer);
let splitter = new Splitter({splitSize : 170}); // In my case I have 170 length lines, so I want to process them line by line
let lineNr = 0;
bufferStream
.pipe(splitter)
.on('data', async function(line){
line = line.toString().trim();
splitter.pause(); // pause stream so you can perform long time processing with await
lineNr++;
if (lineNr === 1){
// DO stuff with 1st line
} else {
splitter.stopIt(); // Break the stream and stop reading so we just read 1st line
}
splitter.resume() // resumestream so you can process next chunk
}).on('error', function(err){
console.log('Error while reading file.' + err);
// whatever
}).on('end', async function(){
console.log('end event');
// Stream has ended, do whatever...
});

Node.js express + busboy file type check

I'm trying to realize a file upload in Node.js 4.x using express with busboy.
I'm already able to upload files and store them in an Azure Blob Storage.
No I'd like to verify the file type, before storing it to Azure, and reject any file which is not valid.
I'd like to do the validation using magic numbers. I found
const fileType = require('file-type'); which determines the file type for me.
Now I'm trying to get this work as efficient as possible, but here is where I'm struggling:
I want to directly pipe the file stream to azure. But before that, I need to read the first 5 bytes from the stream into a buffer which is processed by file-type.
Reading from stream and then piping to azure surely does not work. After some research I found a solution by piping the file in 2 PassThrough streams. But now I'm struggling in correctly handling those 2 streams.
const fileType = require('file-type');
const pass = require('stream').PassThrough;
//...
req.busboy.on('file', function (fieldname, file, filename) {
console.log("Uploading: " + filename);
var b = new pass;
var c = new pass;
file.pipe(b);
file.pipe(c);
var type = null;
b.on('readable', function() {
b.pause();
if(type === null) {
var chunk = b.read(5);
type = fileType(chunk) || false;
b.end();
}
});
b.on('finish', function() {
if(type && ['jpg', 'png', 'gif'].indexOf(type.ext) !== -1) {
var blobStream = blobSvc.createWriteStreamToBlockBlob(storageName,
blobName,
function (error) {
if (error) console.log('blob upload error', error);
else console.log('blob upload complete')
});
c.pipe(blobStream);
}
else {
console.error("Rejected file of type " + type);
}
});
});
This solution sometimes works - and sometimes there is some "write after end" error.
Also, I think the streams are not properly closed, because normally, after a request, express logs something like this on console:
POST /path - - ms - -
But this log message now comes like 30s-60s after "blob upload complete", probably due to some timeout.
Any idea how to fix this?
You don't need to add additional streams into the mix. Just unshift() the consumed portion back onto the stream. For example:
const fileType = require('file-type');
req.busboy.on('file', function (fieldname, file, filename) {
function readFirstBytes() {
var chunk = file.read(5);
if (!chunk)
return file.once('readable', readFirstBytes);
var type = fileType(chunk);
if (type.ext === 'jpg' || type.ext === 'png' || type.ext === 'gif') {
const blobStream = blobSvc.createWriteStreamToBlockBlob(
storageName,
blobName,
function (error) {
if (error)
console.log('blob upload error', error);
else
console.log('blob upload complete');
}
);
file.unshift(chunk);
file.pipe(blobStream);
} else {
console.error('Rejected file of type ' + type);
file.resume(); // Drain file stream to continue processing form
}
}
readFirstBytes();
});

Resources