Not sure if the Title is quite right in this as I'm pretty much stumped (in over my head)...
I'm trying to pull the headers from a csv, as part of an automation test, to the validate those headers. I'm using csv-parse to read the csv file.
Once I've gathered the headers I'm then doing a simple assertion to go through and assert against each one. Using the string values I've entered into my test script.
However currently, the FOR is executing before the csv read and headers have been gathered. I'm not sure how to wait for this to finish before executing the loop.
const fs = require('fs');
const csv = require('csv-parser');
let headerArray = null;
const headerValues = values.split(',');
browser.pause(10000);
fs.createReadStream(""+global.downloadDir + "\\" + fs.readdirSync(global.downloadDir)[0])
.pipe(csv())
.on('headers', function (headers) {
return headerArray = headers
})
for(var i =0; i<headerValues.length; i++){
assert.equal(headerValues[i], headerArray[i]);
}
The solution is to run the for loop with your assertions inside the 'headers' event handler, eg:
var results = [] // we'll collect the rows from the csv into this array
var rs = fs.createReadStream(""+global.downloadDir + "\\" + fs.readdirSync(global.downloadDir)[0])
.pipe(csv())
.on('headers', function (headers) {
try {
for(var i =0; i<headerValues.length; i++){
assert.equal(headerValues[i], headers[i]);
}
} catch (err) {
// an assertion failed so let's end
// the stream (triggering the 'error' event)
rs.destroy(err)
}
}).on('data', function(data) {
results.push(data)
}).on('end', function() {
//
// the 'end' event will fire when the csv has finished parsing.
// so you can do something useful with the `results` array here...
//
}).on('error', function(err) {
// otherwise, the 'error' event will have likely fired.
console.log('something went wrong:', err)
})
Related
Running NodeJS (v8.16.2) locally on the command line. I scrape an e-commerce website, gather the relevant information into a data-structure, then try to write it into a plain-text CSV file (my records don't have a fixed set of fields) manually by creating a write stream. This last step isn't working.
// Other stuff
const exitHandler = function(options, exitCode) {
if (exitCode || exitCode !== 0) console.log(exitCode);
// Other stuff
writeToCsv();
if (options.exit) process.exit();
}
const writeToCsv = function() {
let ws = fs.createWriteStream('./final-data.csv');
const crlf = '\n\r'; // might need to reverse this
// Please ignore the weird layout
for (let seller in finalData.sellers) {
ws.write('Seller:,' + seller + crlf + ',Brands:');
for (let brand of finalData.sellers[seller].brands) {
ws.write(',' + brand);
}
ws.write(crlf + ',Addresses:');
for (let addr of finalData.sellers[seller].addrs) {
ws.write(',"' + addr + '"');
}
ws.write(crlf);
}
ws.on('finish', () => {
console.log('Wrote all data'); // never prints this
});
ws.end();
}
process.on('exit', exitHandler.bind(null,{cleanup:true}));
I suspect this is because NodeJS exits before the data has been flushed to disk, but can't figure out a way to make NodeJS flush the data synchronously.
PS: new to NodeJS
please check out below example and integrate it
as per your comment i updated code
async function writeDataInCSV(filePath, dynamicHeader, data) {
const csvWriter = createCsvWriter({
path: filePath,
header: dynamicHeader
});
await csvWriter
.writeRecords(data)
.then(()=> console.log('The CSV file was written successfully'));
}
writeDataInCSV('out.csv',dynamicHeader, Data)
here set array of header and make data and pass to writeDataInCSV method
I updated the function to create the CSV file but now I'm getting an error:
In upload function
internal/streams/legacy.js:57
throw er; // Unhandled stream error in pipe.
^
Error: ENOENT: no such file or directory, open 'C:\Users\shiv\WebstormProjects\slackAPIProject\billingData\CSV\1548963844106output.csv'
var csvFilePath = '';
var JSONFilePath = '';
function sendBillingData(){
var message = '';
axios.get(url, {
params: {
token: myToken
}
}).then(function (response) {
message = response.data;
fields = billingDataFields;
// saveFiles(message, fields, 'billingData/');
saveFilesNew(message, fields, 'billingData/');
var file = fs.createReadStream(__dirname + '/' + csvFilePath); // <--make sure this path is correct
console.log(__dirname + '/' + csvFilePath);
uploadFile(file);
})
.catch(function (error) {
console.log(error);
});
}
The saveFilesNew function is:
function saveFilesNew(message, options, folder){
try {
const passedData = message;
var relevantData='';
if (folder == 'accessLogs/'){
const loginsJSON = message.logins;
relevantData = loginsJSON;
console.log(loginsJSON);
}
if(folder == 'billingData/'){
relevantData = passedData.members;
const profile = passedData.members[0].profile;
}
//Save JSON to the output folder
var date = Date.now();
var directoryPath = folder + 'JSON/' + date + "output";
JSONFilePath = directoryPath + '.json';
fs.writeFileSync(JSONFilePath, JSON.stringify(message, null, 4), function(err) {
if (err) {
console.log(err);
}
});
//parse JSON onto the CSV
const json2csvParser = new Json2csvParser({ fields });
const csv = json2csvParser.parse(relevantData);
// console.log(csv);
//function to process the CSV onto the file
var directoryPath = folder + 'CSV/' + date + "output";
csvFilePath = directoryPath + '.csv';
let data = [];
let columns = {
real_name: 'real_name',
display_name: 'display_name',
email: 'email',
account_type: 'account_type'
};
var id = passedData.members[0].real_name;
console.log(id);
console.log("messageLength is" +Object.keys(message.members).length);
for (var i = 0; i < Object.keys(message.members).length; i++) {
console.log("value of i is" + i);
var display_name = passedData.members[i].profile.display_name;
var real_name = passedData.members[i].profile.real_name_normalized;
var email = passedData.members[i].profile.email;
var account_type = 'undefined';
console.log("name: " + real_name);
if(passedData.members[i].is_owner){
account_type = 'Org Owner';
}
else if(passedData.members[i].is_admin){
account_type = 'Org Admin';
}
else if(passedData.members[i].is_bot){
account_type = 'Bot'
}
else account_type = 'User';
data.push([real_name, display_name, email, account_type]);
}
console.log(data);
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err;
fs.writeFileSync(csvFilePath, output, function(err) {
console.log(output);
if (err) {
console.log(err);
}
console.log('my.csv saved.');
});
});
} catch (err) {
console.error(err);
}
}
The upload file function is:
function uploadFile(file){
console.log("In upload function");
const form = new FormData();
form.append('token', botToken);
form.append('channels', 'testing');
form.append('file', file);
axios.post('https://slack.com/api/files.upload', form, {
headers: form.getHeaders()
}).then(function (response) {
var serverMessage = response.data;
console.log(serverMessage);
});
}
So I think the error is getting caused because node is trying to upload the file before its being created. I feel like this has something to do with the asynchronous nature of Node.js but I fail to comprehend how to rectify the code. Please let me know how to correct this and mention any improvements to the code structure/design too.
Thanks!
You don't wait for the callback provided to stringify to be executed, and it's where you create the file. (Assuming this stringify function really does acccept a callback.)
Using callbacks (you can make this cleaner with promises and these neat async/await controls, but let's just stick to callbacks here), it should be more like:
function sendBillingData() {
...
// this callback we'll use to know when the file writing is done, and to get the file path
saveFilesNew(message, fields, 'billingData/', function(err, csvFilePathArgument) {
// this we will execute when saveFilesNew calls it, not when saveFilesNew returns, see below
uploadFile(fs.createReadStream(__dirname + '/' + csvFilePathArgument))
});
}
// let's name this callback... "callback".
function saveFilesNew(message, options, folder, callback) {
...
var csvFilePath = ...; // local variable only instead of your global
...
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err; // or return callbcack(err);
fs.writeFile(csvFilePath , output, function(err) { // NOT writeFileSync, or no callback needed
console.log(output);
if (err) {
console.log(err);
// callback(err); may be a useful approach for error-handling at a higher level
}
console.log('my.csv saved.'); // yes, NOW the CSV is saved, not before this executes! Hence:
callback(null, csvFilePath); // no error, clean process, pass the file path
});
});
console.log("This line is executed before stringify's callback is called!");
return; // implicitly, yes, yet still synchronous and that's why your version crashes
}
Using callbacks that are called only when the expected events happen (a file is done writing, a buffer/string is done transforming...) allows JS to keep executing code in the meantime. And it does keep executing code, so when you need data from an async code, you need to tell JS you need it done before executing your piece.
Also, since you can pass data when calling back (it's just a function), here I could avoid relying on a global csvFilePath. Using higher level variables makes things monolithic, like you could not transfer saveFilesNew to a dedicated file where you keep your toolkit of file-related functions.
Finally, if your global process is like:
function aDayAtTheOffice() {
sendBillingData();
getCoffee();
}
then you don't need to wait for the billing data to be processed before starting making coffee. However, if your boss told you that you could NOT get a coffee until the billing data was settled, then your process would look like:
function aDayAtTheOffice() {
sendBillingData(function (err) {
// if (err) let's do nothing here: you wanted a coffee anyway, right?
getCoffee();
});
}
(Note that callbacks having potential error as first arg and data as second arg is a convention, nothing mandatory.)
IMHO you should read about scope (the argument callback could be accessed at a time where the call to saveFilesNew was already done and forgotten!), and about the asynchronous nature of No... JavaScript. ;) (Sorry, probably not the best links but they contain the meaningful keywords, and then Google is your buddy, your friend, your Big Brother.)
I have been struggeling, and searching for a long time. I know there are answers about that, but none works.
I used fs.createReadStream and readLine for this. But It's using fs.close() to close FILE READING. so it doesnt work at all when used on a buffer. The reading on all files goes on without possiblity to interrupt it...
Then I used this :
const stream = require('stream');
let bufferStream = new stream.PassThrough();
bufferStream.end(hexaviaFile.buffer);
bufferStream
.pipe(require('split')())
.pipe(es.mapSync(function(line){
// pause the readstream
bufferStream.pause();
// DO WHATEVER WITH YOUR LINE
console.log('line content = ' + line);
// resume the readstream, possibly from a callback
bufferStream.resume();
}).on('error', function(err){
console.log('Error while reading file.' + err);
}).on('end', function(){
console.log('end event !');
}).on('close', function(){
console.log('close event !');
})
);
// toString() Failed
I get the [toString() Failed] error and searched about it, apparently it appears when the buffer is large than node buffer max size.
So I checked :
var buffer = require('buffer');
console.log('buffer.kMaxLength = ', buffer.kMaxLength); // 2147483647
console.log('hexaviaFile.buffer.byteLength = ', hexaviaFile.buffer.byteLength); // => 413567671
It's not the case as you can see numbers provided:
* maxBuffer size = 2Go
* my buffer = 0.4Go
I also tried some diffeent library to do so but:
1. I want to keep memory usage as low as possible
2. I need this reading to be perfectly SYNC. In other words, I have some processings after the file reading and I need to complete all the reading before going to next steps.
I don't know what to do :) Any kind (of) help appreciated
Regards.
I forgot about this post. I found a way to achieve this without errors.
It's given here : https://github.com/request/request/issues/2826
1st create a splitter to read string chunks
class Splitter extends Transform {
constructor(options){
super(options);
this.splitSize = options.splitSize;
this.buffer = Buffer.alloc(0);
this.continueThis = true;
}
stopIt() {
this.continueThis = false;
}
_transform(chunk, encoding, cb){
this.buffer = Buffer.concat([this.buffer, chunk]);
while ((this.buffer.length > this.splitSize || this.buffer.length === 1) && this.continueThis){
try {
let chunk = this.buffer.slice(0, this.splitSize);
this.push(chunk);
this.buffer = this.buffer.slice(this.splitSize);
if (this.buffer[0] === 26){
console.log('EOF : ' + this.buffer[0]);
}
} catch (err) {
console.log('ERR OCCURED => ', err);
break;
}
}
console.log('WHILE FINISHED');
cb();
}
}
Then pipe it to your stream :
let bufferStream = new stream.PassThrough();
bufferStream.end(hugeBuffer);
let splitter = new Splitter({splitSize : 170}); // In my case I have 170 length lines, so I want to process them line by line
let lineNr = 0;
bufferStream
.pipe(splitter)
.on('data', async function(line){
line = line.toString().trim();
splitter.pause(); // pause stream so you can perform long time processing with await
lineNr++;
if (lineNr === 1){
// DO stuff with 1st line
} else {
splitter.stopIt(); // Break the stream and stop reading so we just read 1st line
}
splitter.resume() // resumestream so you can process next chunk
}).on('error', function(err){
console.log('Error while reading file.' + err);
// whatever
}).on('end', async function(){
console.log('end event');
// Stream has ended, do whatever...
});
I am trying to extract images from a csv file by doing the following:
Parsing/streaming in a large csv file using csv-parse and the fs createReadStream method
Grabbing each line for processing using stream-transform
Extraction of image and other row data for processing using the async waterfall method.
Download and write image to server using request and the fs createWriteStream method
For some reason after the data gets piped into createWriteStream, there is some event in which an async callback never gets called. I have run this same code only using request, without piping to createWriteStream, and it works. I've also run createWriteStream w/ a drain event, and then some how it works? Can anyone explain this to me?
In the code below, request is trying to pipe 14,970 images, but the createWriteStream close or finish events only fire 14,895 times, with error firing 0 times. Could this be a draining issue? Could highWaterMark be exceeded and a write fail could be occurring undetected?
Here is my csv line getting code:
var first = true;
var parser = parse();
var transformer = transform( (line, complete) => {
if(!first)
extractData(line,complete)
else {
first = false;
complete(null);
}
},
() => {
console.log('Done: parseFile');
});
fs.createReadStream(this.upload.location).pipe(parser).pipe(transformer);
extractData function that doesn't always do a required async callback:
extractData(line,complete){
var now = new Date();
var image = {
createdAt: now,
updatedAt: now
};
async.waterfall([
next => { // Data Extraction
async.forEachOf(line, (data, i, complete) => {
if(i === 2) image.src = data;
if(i === 3) image.importSrc = data;
complete(null);
}, err => {
if(err) throw err;
next(null);
});
},
next => { // Download Image
var file = fs.createWriteStream('public/'+image.src);
var sendReq = request.get(image.importSrc);
sendReq.on('response', response => {
if (response.statusCode !== 200) {
this.upload.report.image.errors++;
return next(null);
}
});
sendReq.on('error', err => {
this.upload.report.image.errors++;
next(null);
});
sendReq.pipe(file);
file.on('finish', () => {
this.upload.report.image.inserts++;
file.close(next); // Close file and callback
});
file.on('error', err => {
this.upload.report.image.errors++;
next(null);
});
}
], err => {
if(err) throw err;
complete(null);
});
}
As suggested by #mscdex, I've also tried switching out finish for his replacement close approach.
file.close(next); is unnecessary as the file stream is closed automatically by default. What you can do instead is to listen for the close event to know when the file descriptor for the stream has been closed. So replace the entire finish event handler with:
file.on('close', () => {
this.upload.report.image.inserts++;
next(null);
});
I'm relatively new to Node and Express.js. I'm trying to create a websocket server to push CSV data in irregular intervals stored in the file itself, line after line.
The CSV structure is something like this:
[timeout [ms], data1, data2, data3 ...]
I've successfully created a websocket server which communicates with the client.
I'm looking for a best solution to effectively do something like this:
1. Read a line of the CSV file
2. Send a line with WebSockets
3. Pause the reading for a period of time stored in the first value of the row
4. Resume the reading after the interval has passed, and back to step 1.
So far, I got this far (please feel free to trash my code completely as it might be very wrong - as I said, I'm new to it. It seems like the pause() doesn't do anything.
var $ = require('jquery')
,csv = require('csv');
exports.index = function(server){
var io = require('socket.io').listen(server);
io.sockets.on('connection', function (socket) {
socket.on('startTransmission', function(msg) {
csv()
.from.path('C:/dev/node_express/csv/test.csv', { delimiter: ',', escape: '"' })
.on('record', function(row,index){
var rowArray = $.parseJSON(JSON.stringify(row));
var json = {},
that = this;
$.each(rowArray, function(i,value){
json[keys[i]] = value;
});
socket.emit('transmitDataData', json);
//this.pause(); //I guess around here is where I'd like to pause
// setTimeout(function(){
// that.resume(); //and resume here after the timeout, stored in the first value (rowArray[0])
// }, rowArray[0]);
});
});
});
};
The commented out code unfortunately does not work - All data is sent immediately, row after row, the function doesn't pause
I ran into the same sort of thing with another use case. The issue is that calling pause() on the stream pauses the underlying stream reading but not the csv record parsing, so the record event can get called with the remainder of the records that made up the last read stream chunk. I synchronized them, in my case, like this:
var rows=0, actions=0;
stream.on('record', function(row, index){
rows++;
// pause here, but expect more record events until the raw read stream is exhausted
stream.pause();
runner.do(row, function(err, result) {
// when actions have caught up to rows read, read more rows.
if (actions==rows) {
stream.resume();
}
});
});
In your case, I'd buffer the rows and release them with the timer. Here's an untested re-factoring just to give you an idea of what I mean:
var $ = require('jquery'),
csv = require('csv');
exports.index = function(server){
var io = require('socket.io').listen(server);
io.sockets.on('connection', function (socket) {
socket.on('startTransmission', function(msg) {
var timer=null, buffered=[], stream=csv().from.path('C:/dev/node_express/csv/test.csv', { delimiter: ',', escape: '"' });
function transmit(row) {
socket.emit('transmitDataData', row);
}
function drain(timeout) {
if (!timer) {
timer = setTimeout(function() {
timer = null;
if (buffered.length<=1) { // get more rows ahead of time so we don't run out. otherwise, we could skip a beat.
stream.resume(); // get more rows
} else {
var row = buffered.shift();
transmit(row);
drain(row[0]);
}
}, timeout);
}
}
stream.on('record', function(row,index){
stream.pause();
if (index == 0) {
transmit(row);
} else {
buffered.push(row);
}
drain(row[0]); // assuming row[0] contains a timeout value.
});
stream.on('end', function() {
// no more rows. wait for buffer to empty, then cleanup.
});
stream.on('error', function() {
// handle error.
});
});
};