Read newest csv records with Node - node.js

I'd like to watch a CSV file and get the newest records since it was changed. I'm running the following shell command to build a very simple csv file and append a new line every second:
rm test.csv & x=0 && while true; do echo "${x},${x},${x}" >> test.csv; x=$(($x+1)); sleep 1; done
The following code prints all the records of the file until the first change and then just emits the dashed line, as if it's not re-reading the file:
'use strict';
var fs = require('fs'),
dataFile = __dirname + '/server/data/test.csv',
csv = require('csv');
var parser = csv.parse({delimiter: ','}, function(err, data){
console.log(data);
});
var watcher = fs.watch(dataFile);
watcher.on('change', fileChange);
function fileChange(e, fn){
if (e) console.error(e)
fs.createReadStream(dataFile).pipe(parser);
console.log('-------')
}
Shouldn't the fileChange function re-read the file on every change? My ultimate plan here is to get both the previous array of lines and the current one and use lodash's difference function to return only the differences. If there's better way, I'm open to hear it though.

My guess is that fs.createReadStream() has opened the file and it's not being closed. So on the second event fs.createReadStream() fails. No bueno.
Try using fs.readFile() instead like this:
function fileChange(e, fn){
if (e) console.error(e)
fs.readFile(dataFile, function (err, data) {
if (err) throw err;
console.log(data);
console.log('-------')
});
};
See the documentation here: http://nodejs.org/api/fs.html#fs_fs_readfile_filename_options_callback

I ended up solving the issue by stating the file on change, and reading the difference in size to the stream data:
'use strict';
var fs = require('fs'),
dataFile = __dirname + '/server/data/test.csv',
readSize = 0,
csv = require('csv');
var parser = csv.parse();
parser.on('readable', function(data){
var record;
while(record = parser.read()){
console.log(record);
}
});
var watcher = fs.watch(dataFile);
watcher.on('change', fileChange);
// fires when the watched file changes
function fileChange(e, fn){
// get these syncronously
var stats = fs.statSync(dataFile);
// if it's smaller, wait half a second
if (stats.size <= readSize) {
setTimeout(fileChange, 500);
}
// read the stream offset
var stream = fs.createReadStream(dataFile, {start: readSize, end: stats.size});
stream.on('data', function(chunk){
parser.write(chunk.toString());
});
readSize = stats.size;
}
Any feedback on why this may not work would be appreciated.

Related

Does csv-parse allow you to read from file?

I'm learning how to use the csv-parse module for nodejs. I wrote this code and it works perfectly:
var fs = require('fs');
const fileName = './spreadsheet.csv';
const assert = require('assert');
const { parse } = require('csv-parse');
const records = [];
// Initialize the parser
const parser = parse({
delimiter: ','
});
// Use the readable stream api to consume records
parser.on('readable', function(){
let record;
while ((record = parser.read()) !== null) {
records.push(record);
}
});
// Catch any error
parser.on('error', function(err){
console.error(err.message);
});
fs.readFile(fileName, 'utf8', function (err, f) {
if (err) {
return console.error(err);
}
const rows = f.split("\r\n");
for(let x in rows) {
parser.write(rows[x]+"\n");
}
parser.end();
console.log(records);
});
But right now, I depend on the fs module and fs.readFile to consume my csv file. Does the csv-parse have an option to read ffrom file? I ask because as you can see in my code, I ahve to specify my own line-break characters, which could differ between csv files. I thought maybe the csv-parse module would have something that can more readily address such a situation?
The parser object will do most of the work for you. It is expecting the data to arrive on its stream interface and it will do everything else. All you have to do is open a stream and the pipe it to the parser like this:
fs.createReadStream(fileName).pipe(parser);
And, here it is combined with your code:
const fs = require('fs');
const fileName = './spreadsheet.csv';
const { parse } = require('csv-parse');
const records = [];
// Initialize the parser
const parser = parse({
delimiter: ','
});
// Use the readable stream api to consume records
parser.on('readable', function(){
let record;
while ((record = parser.read()) !== null) {
records.push(record);
}
});
// Catch any error
parser.on('error', function(err){
console.error(err.message);
});
parser.on('end', function() {
console.log(records);
});
// open the file and pipe it into the parser
fs.createReadStream(fileName).pipe(parser);
P.S. It's amazing that such a simple example of getting the CSV data from a file is not shown in the documentation (at least not anywhere I could find it). I'm also surprised, they don't offer an option where they will automatically read the data from the stream, instead requiring you to implement the readable event handler. Odd, for such an otherwise complete package.

Javascript: Write and complete an existing file

I try to write some nodejs code that completes an existing file. But each time I start the script, it over writes the content of my test file.
var fs = require('fs');
var writer = fs.createWriteStream('test.txt', {flags: 'w'});
writer.on('finish', function() {
console.log('data has been saved successfully');
});
function writeInList(id) {
console.log(id);
writer.write(id+' \n');
}
for (var id = 0; id<10; id++){
writeInList(id);
}
writer.end();
Of course I have searched for a solution:
Writing large files with Node.js
But I'm not able to make it run. Could anybody help me please?

Writing buffer response from resemble.js to file

I'm using node-resemble-js to compare two PNG images.
The comparison happens without issue and I get a successful/relevant response however I'm having trouble outputting the image diff.
var express = require('express');
var fs = require('fs');
var resemble = require('node-resemble-js');
var router = express.Router();
router.get('/compare', function(req, res, next) {
compareImages(res);
});
var compareImages = function (res) {
resemble.outputSettings({
largeImageThreshold: 0
});
var diff = resemble('1.png')
.compareTo('2.png')
.ignoreColors()
.onComplete(function(data){
console.log(data);
var png = data.getDiffImage();
fs.writeFile('diff.png', png.data, null, function (err) {
if (err) {
throw 'error writing file: ' + err;
}
console.log('file written');
});
res.render('compare');
});
};
module.exports = router;
It writes to diff.png as expected however it's not creating a valid image.
Any ideas where I'm going wrong? Feel like I'm pretty close but just unsure of final piece.
Thanks
Looks like there is a pack() method that needs to be called, which does some work and then streamifies the data. In that case you can buffer the stream and then call writeFile like this:
var png = data.getDiffImage();
var buf = new Buffer([])
var strm = png.pack()
strm.on('data', function (dat) {
buf = Buffer.concat([buf, dat])
})
strm.on('end', function() {
fs.writeFile('diff.png', buf, null, function (err) {
if (err) {
throw 'error writing file: ' + err;
}
console.log('file written');
})
})
or you can just pipe it like this, which is a little simpler:
png.pack().pipe(fs.createWriteStream('diff.png'))
Honestly, your approach made sense to me (grab the Buffer and write it) but I guess that data Buffer attached to what comes back from getDiffImage isn't really the final png. Seems like the docs are a bit thin but there's some info here: https://github.com/lksv/node-resemble.js/issues/4

How to read file to variable in NodeJs?

i'm pretty new into NodeJs. And i am trying to read a file into a variable.
Here is my code.
var fs = require("fs"),
path = require("path"),
util = require("util");
var content;
console.log(content);
fs.readFile(path.join(__dirname,"helpers","test.txt"), 'utf8',function (err,data) {
if (err) {
console.log(err);
process.exit(1);
}
content = util.format(data,"test","test","test");
});
console.log(content);
But every time i run the script i get
undefined and undefined
What am i missing? Help please!
As stated in the comments under your question, node is asynchronous - meaning that your function has not completed execution when your second console.log function is called.
If you move the log statement inside the the callback after reading the file, you should see the contents outputted:
var fs = require("fs"),
path = require("path"),
util = require("util");
var content;
console.log(content);
fs.readFile(path.join(__dirname, "helpers", "test.txt"), 'utf8', function (err, data) {
if (err) {
console.log(err);
process.exit(1);
}
content = util.format(data, "test", "test", "test");
console.log(content);
});
Even though this will solve your immediately problem, without an understanding of the async nature of node, you're going to encounter a lot of issues.
This similar stackoverflow answer goes into more details of what other alternatives are available.
The following code snippet uses ReadStream. It reads your data in separated chunks, if your data file is small it will read the data in a single chunk. However this is a asynchronous task. So if you want to perform any task with your data, you need to include them within the ReadStream portion.
var fs = require('fs');
var readStream = fs.createReadStream(__dirname + '/readMe.txt', 'utf8');
/* include the file directory and file name instead of <__dirname + '/readMe.txt'> */
var content;
readStream.on('data', function(chunk){
content = chunk;
performTask();
});
function performTask(){
console.log(content);
}
There is also another easy way by using synchronous task. As this is a synchronous task, you do not need to worry about its executions. The program will only move to the next line after execution of the current line unlike the asynchronous task.
A more clear and detailed answer is provided in the following link:
Get data from fs.readFile
var fs = require('fs');
var content = fs.readFileSync('readMe.txt','utf8');
/* include your file name instead of <'readMe.txt'> and make sure the file is in the same directory. */
or easily as follows:
const fs = require('fs');
const doAsync = require('doasync');
doAsync(fs).readFile('./file.txt')
.then((data) => console.log(data));

Piping multiple file streams using Node.js

I want to stream multiple files, one after each other, to the browser. To illustrate, think of having multiple CSS files which shall be delivered concatenated as one.
The code I am using is:
var directory = path.join(__dirname, 'css');
fs.readdir(directory, function (err, files) {
async.eachSeries(files, function (file, callback) {
if (!endsWith(file, '.css')) { return callback(); } // (1)
var currentFile = path.join(directory, file);
fs.stat(currentFile, function (err, stats) {
if (stats.isDirectory()) { return callback(); } // (2)
var stream = fs.createReadStream(currentFile).on('end', function () {
callback(); // (3)
});
stream.pipe(res, { end: false }); // (4)
});
}, function () {
res.end(); // (5)
});
});
The idea is that I
filter out all files that do not have the file extension .css.
filter out all directories.
proceed with the next file once a file has been read completely.
pipe each file to the response stream without closing it.
end the response stream once all files have been piped.
The problem is that only the first .css file gets piped, and all remaining files are missing. It's as if (3) would directly jump to (5) after the first (4).
The interesting thing is that if I replace line (4) with
stream.on('data', function (data) {
console.log(data.toString('utf8'));
});
everything works as expected: I see multiple files. If I then change this code to
stream.on('data', function (data) {
res.write(data.toString('utf8'));
});
all files expect the first are missing again.
What am I doing wrong?
PS: The error happens using Node.js 0.8.7 as well as using 0.8.22.
UPDATE
Okay, it works if you change the code as follows:
var directory = path.join(__dirname, 'css');
fs.readdir(directory, function (err, files) {
var concatenated = '';
async.eachSeries(files, function (file, callback) {
if (!endsWith(file, '.css')) { return callback(); }
var currentFile = path.join(directory, file);
fs.stat(currentFile, function (err, stats) {
if (stats.isDirectory()) { return callback(); }
var stream = fs.createReadStream(currentFile).on('end', function () {
callback();
}).on('data', function (data) { concatenated += data.toString('utf8'); });
});
}, function () {
res.write(concatenated);
res.end();
});
});
But: Why? Why can't I call res.write multiple times instead of first summing up all the chunks, and then write them all at once?
Consider also using multistream, that allows you to combine and emit multiple streams one after another.
The code was perfectly fine, it was the unit test that was wrong ...
Fixed that, and now it works like a charme :-)
May help someone else:
const fs = require("fs");
const pth = require("path");
let readerStream1 = fs.createReadStream(pth.join(__dirname, "a.txt"));
let readerStream2 = fs.createReadStream(pth.join(__dirname, "b.txt"));
let writerStream = fs.createWriteStream(pth.join(__dirname, "c.txt"));
//only readable streams have "pipe" method
readerStream1.pipe(writerStream);
readerStream2.pipe(writerStream);
I also checked Rocco's answer and its working like a charm:
//npm i --save multistream
const multi = require('multistream');
const fs = require('fs');
const pth = require("path");
let streams = [
fs.createReadStream(pth.join(__dirname, "a.txt")),
fs.createReadStream(pth.join(__dirname, "b.txt"))
];
let writerStream = fs.createWriteStream(pth.join(__dirname, "c.txt"));
//new multi(streams).pipe(process.stdout);
new multi(streams).pipe(writerStream);
and to send the results to client:
const multi = require('multistream');
const fs = require('fs');
const pth = require("path");
const exp = require("express");
const app = exp();
app.listen(3000);
app.get("/stream", (q, r) => {
new multi([
fs.createReadStream(pth.join(__dirname, "a.txt")),
fs.createReadStream(pth.join(__dirname, "b.txt"))
]).pipe(r);
});

Resources