NodeJS - read and write file causes corruption - node.js

I'm kinda new to NodeJS and I'm working on a simple file encoder.
I planned to change the very first 20kb of a file and just copy the rest of it.
So I used the following code, but it changed some bytes in the rest of the file.
Here is my code:
var fs = require('fs');
var config = require('./config');
fs.open(config.encodeOutput, 'w', function(err, fw) {
if(err) {
console.log(err);
} else {
fs.readFile(config.source, function(err, data) {
var start = 0;
var buff = readChunk(data, start);
while(buff.length) {
if(start < config.encodeSize) {
var buffer = makeSomeChanges(buff);
writeChunk(fw, buffer);
} else {
writeChunk(fw, buff);
}
start += config.ENCODE_BUFFER_SIZE;
buff = readChunk(data, start);
}
});
}
});
function readChunk(buffer, start) {
return buffer.slice(start, start + config.ENCODE_BUFFER_SIZE);
}
function writeChunk(fd, chunk) {
fs.writeFile(fd, chunk, {encoding: 'binary', flag: 'a'});
}
I opened encoded file and compared it with the original file.
I even commented these parts:
//if(start < config.encodeSize) {
// var buffer = makeSomeChanges(buff);
// writeChunk(fw, buffer);
//} else {
writeChunk(fw, buff);
//}
So my program just copies the file, but it still changes some bytes.
What is wrong?

So I checked the pattern and I realized some bytes are not in the right place and I guessed that it should be because I'm using async write function.
I changed fs.writeFile() to fs.writeFileSync() and everything is working fine now.

Since you were using asynchronous IO, you should've been waiting for a queue of operations, as multiple writes happening at the same time are likely to end up corrupting your file. This explains why your issue is solved using synchronous IO — this way, a further write cannot start before the previous one completed.
However, using synchronous APIs when asynchronous ones are available is a poor choice, due to which your program will be actually blocked while it writes to the file. You should go for async and create a queue to wait for.

Related

Replay a log file with NodeJS as if it were happening in real-time

I have a log file with about 14.000 aircraft position datapoints captured from a system called Flarm, it looks like this:
{"addr":"A","time":1531919658.578100,"dist":902.98,"alt":385,"vs":-8}
{"addr":"A","time":1531919658.987861,"dist":914.47,"alt":384,"vs":-7}
{"addr":"A","time":1531919660.217471,"dist":925.26,"alt":383,"vs":-7}
{"addr":"A","time":1531919660.623466,"dist":925.26,"alt":383,"vs":-7}
What I need to do is find a way to 'play' this file back in real-time (as if it were occuring right now, even though it's pre-recorded), and emit an event whenever a log entry 'occurs'. The file is not being added to, it's pre-recorded and the playing back would occur at a later stage.
The reason for doing this is that I don't have access to the receiving equipment when I'm developing.
The only way I can think to do it is to set a timeout for every log entry, but that doesn't seem like the right way to do it. Also, this process would have to scale to longer recordings (this one was only an hour long).
Are there other ways of doing this?
If you want to "play them back" with the actual time difference, a setTimeout is pretty much what you have to do.
const processEntry = (entry, index) => {
index++;
const nextEntry = getEntry(index);
if (nextEntry == null) return;
const timeDiff = nextEntry.time - entry.time;
emitEntryEvent(entry);
setTimeout(processEntry, timeDiff, nextEntry, index);
};
processEntry(getEntry(0), 0);
This emits the current entry and then sets a timeout based on the difference until the next entry.
getEntry could either fetch lines from a prefilled array or fetch lines individually based on the index. In the latter case only two lines of data would only be in memory at the same time.
Got it working in the end! setTimeout turned out to be the answer, and combined with the input of Lucas S. this is what I ended up with:
const EventEmitter = require('events');
const fs = require('fs');
const readable = fs.createReadStream("./data/2018-07-18_1509log.json", {
encoding: 'utf8',
fd: null
});
function read_next_line() {
var chunk;
var line = '';
// While this is a thing we can do, assign chunk
while ((chunk = readable.read(1)) !== null) {
// If chunk is a newline character, return the line
if (chunk === '\n'){
return JSON.parse(line);
} else {
line += chunk;
}
}
return false;
}
var lines = [];
var nextline;
const processEntry = () => {
// If lines is empty, read a line
if (lines.length === 0) lines.push(read_next_line());
// Quit here if we've reached the last line
if ((nextline = read_next_line()) == false) return true;
// Else push the just read line into our array
lines.push(nextline);
// Get the time difference in milliseconds
var delay = Number(lines[1].time - lines[0].time) * 1000;
// Remove the first line
lines.shift();
module.exports.emit('data', lines[0]);
// Repeat after the calculated delay
setTimeout(processEntry, delay);
}
var ready_to_start = false;
// When the stream becomes readable, allow starting
readable.on('readable', function() {
ready_to_start = true;
});
module.exports = new EventEmitter;
module.exports.start = function() {
if (ready_to_start) processEntry();
if (!ready_to_start) return false;
}
Assuming you want to visualize the flight logs, you can use fs watch as below, to watch the log file for changes:
fs.watch('somefile', function (event, filename) {
console.log('event is: ' + event);
if (filename) {
console.log('filename provided: ' + filename);
} else {
console.log('filename not provided');
}
});
Code excerpt is from here. For more information on fs.watch() check out here
Then, for seamless update on frontend, you can setup a Websocket to your server where you watch the log file and send newly added row via that socket to frontend.
After you get the data in frontend you can visualize it there. While I haven't done any flight visualization project before, I've used D3js to visualize other stuff (sound, numerical data, metric analysis and etc.) couple of times and it did the job every time.

How to reset nodejs stream?

How to reset nodejs stream?
How to read stream again in nodejs?
Thanks in advance!
var fs = require('fs');
var lineReader = require('line-reader');
// proxy.txt = only 3 lines
var readStream = fs.createReadStream('proxy.txt');
lineReader.open(readStream, function (err, reader) {
for(var i=0; i<6; i++) {
reader.nextLine(function(err, line) {
if(err) {
readStream.reset(); // ???
} else {
console.log(line);
}
});
}
});
There are two ways of solving your problem, as someone commented before you could simply wrap all that in a function and instead of resetting - simply read the file again.
Ofc this won't work well with HTTP requests for example so the other way, provided that you do take a much bigger memory usage into account, you can simply accumulate your data.
What you'd need is to implement some sort of "rewindable stream" - this means that you'd essentially need to implement a Transform stream that would keep a list of all the buffers and write them to a piped stream on a rewind method.
Take a look at the node API for streams here, the methods should look somewhat like this.
class Rewindable extends Transform {
constructor() {
super();
this.accumulator = [];
}
_transform(buf, enc, cb) {
this.accumulator.push(buf);
callback()
}
rewind() {
var stream = new PassThrough();
this.accumulator.forEach((chunk) => stream.write(chunk))
return stream;
}
And you would use this like this:
var readStream = fs.createReadStream('proxy.txt');
var rewindableStream = readStream.pipe(new Rewindable());
(...).on("whenerver-you-want-to-reset", () => {
var rewound = rewindablesteram.rewind();
/// and do whatever you like with your stream.
});
Actually I think I'll add this to my scramjet. :)
Edit
I released the logic below in rereadable-stream npm package. The upshot over the stream depicted here is that you can now control the buffer length and get rid of the read data.
At the same time you can keep a window of count items and tail a number of chunks backwards.

How to read large binary files in node js without a blocking loop?

I am trying to learn some basics of event driven programming. So for an exercise I am trying to write a program that reads a large binary file and does something with it but without ever making a blocking call. I have come up with the following:
var fs = require('fs');
var BUFFER_SIZE = 1024;
var path_of_file = "somefile"
fs.open(path_of_file, 'r', (error_opening_file, fd) =>
{
if (error_opening_file)
{
console.log(error_opening_file.message);
return;
}
var buffer = new Buffer(BUFFER_SIZE);
fs.read(fd, buffer, 0, BUFFER_SIZE, 0, (error_reading_file, bytesRead, buffer) =>
{
if (error_reading_file)
{
console.log(error_reading_file.message);
return;
}
// do something e.g. print or write to another file
})
})
I know I need to put a while loop in order to read complete file but in the above code I am reading just the first 1024 bytes of the file and cannot formulate how to continue reading the file without using a blocking loop. How could we do it?
Use fs.createReadStream instead. This will call your callback over and over again until it has finished reading the file, so you don't have to block.
var fs = require('fs');
var readStream = fs.createReadStream('./test.exe');
readStream.on('data', function (chunk) {
console.log(chunk.length);
})

How do I loop until a file with specific data inside is found in Node.js?

I'm learning a lot about Node.js by rewriting some utility tools I had in C# for the fun of it. I have either found something that is not a good idea to write in Node.js or I'm completely missing a concept that will make it work.
The goal of the program: Search a directory of files for a file with data that matches some criteria. The files are gzipped XML, and for the time being I'm just looking for one tag. Here's what I tried (files is an array of file names):
while (files.length > 0) {
var currentPath = rootDir + "\\" + files.pop();
var fileContents = fs.readFileSync(currentPath);
zlib.gunzip(fileContents, function(err, buff) {
if (buff.toString().indexOf("position") !== -1) {
console.log("The file '%s' has an odometer reading.", currentPath);
return;
}
});
if (files.length % 1000 === 0) {
console.log("%d files remain...", files.length);
}
}
I was nervous about this when I wrote it. It's clear from the console output all of the gunzip operations are asynchronous and decide to wait until the while loop is complete. That means when I finally do get some output, currentPath doesn't have the value it had when the file was read, so the program is useless. I don't see a synchronous way to decompress the data with the zlip module. I don't see a way to store the context (currentPath would do) so the callback has the right value. I originally tried streams, piping a file stream to a gunzip stream, but I had a similar problem in that all of my callbacks happened after the loop had completed and I'd lost useful context.
It's been a long day and I'm out of ideas for how to structure this. The loop is a synchronous thing, and my asynchronous stuff depends on its state. That is bad. What am I missing? If the files weren't gzipped, this would be easy because of readFileSync().
Wow. I didn't really expect no answers at all. I got in a time crunch but I spent the last couple of days looking over Node.js, hypothesizing why certain things were working like they did, and learning about control flow.
So the code as-is doesn't work because I need a closure to capture the value of currentPath. Boy does Node.js like closures and callbacks. So a better structure for the application would look like this:
function checkFile(currentPath, fileContents) {
var fileContents = fs.readFileSync(currentPath);
zlib.gunzip(fileContents, function(err, buff) {
if (buff.toString().indexOf("position") !== -1) {
console.log("The file '%s' has an odometer reading.", currentPath);
return;
}
});
}
while (files.length > 0) {
var currentPath = rootDir + "\\" + files.shift();
checkFile(currentPath);
}
But it turns out that's not very Node, since there's so much synchronous code. To do it asynchronously, I need to lean on more callbacks. The program turned out longer than I expected so I'll only post part of it for brevity, but the first bits of it look like this:
function checkForOdometer(currentPath, callback) {
fs.readFile(currentPath, function(err, data) {
unzipFile(data, function(hasReading) {
callback(currentPath, hasReading);
});
});
}
function scheduleCheck(filePath, callback) {
process.nextTick(function() {
checkForOdometer(filePath, callback);
});
}
var withReading = 0;
var totalFiles = 0;
function series(nextPath) {
if (nextPath) {
var fullPath = rootDir + nextPath;
totalFiles++;
scheduleCheck(fullPath, function(currentPath, hasReading) {
if (hasReading) {
withReading++;
console.log("%s has a reading.", currentPath);
}
series(files.shift());
});
} else {
console.log("%d files searched.", totalFiles);
console.log("%d had a reading.", withReading);
}
}
series(files.shift());
The reason for the series control flow is it seems if I set up the obvious parallel search I end running out of process memory, probably from having 60,000+ buffers worth of data sitting on the stack:
while (files.length > 0) {
var currentPath = rootDir + files.shift();
checkForOdometer(currentPath, function(callbackPath, hasReading) {
//...
});
}
I could probably set it up to schedule batches of, say, 50 files in parallel and wait to schedule 50 more when those are done. Setting up the series control flow seemed just as easy.

How to do `tail -f logfile.txt`-like processing in node.js?

tail -f logfile.txt outputs the last 10 lines of logfile.txt, and then continues to output appended data as the file grows.
What's the recommended way of doing the -f part in node.js?
The following outputs the entire file (ignoring the "show the last 10 lines") and then exits.
var fs = require('fs');
var rs = fs.createReadStream('logfile.txt', { flags: 'r', encoding: 'utf8'});
rs.on('data', function(data) {
console.log(data);
});
I understand the event-loop is exiting because after the stream end & close event there are no more events -- I'm curious about the best way of continuing to monitor the stream.
The canonical way to do this is with fs.watchFile.
Alternatively, you could just use the node-tail module, which uses fs.watchFile internally and has already done the work for you. Here is an example of using it straight from the documentation:
Tail = require('tail').Tail;
tail = new Tail("fileToTail");
tail.on("line", function(data) {
console.log(data);
});
node.js APi documentation on fs.watchFile states:
Stability: 2 - Unstable. Use fs.watch instead, if available.
Funny though that it says almost the exact same thing for fs.watch:
Stability: 2 - Unstable. Not available on all platforms.
In any case, I went ahead and did yet another small webapp, TailGate, that will tail your files using the fs.watch variant.
Feel free to check it out here:
TailGate on github.
you can try to use fs.read instead of ReadStream
var fs = require('fs')
var buf = new Buffer(16);
buf.fill(0);
function read(fd)
{
fs.read(fd, buf, 0, buf.length, null, function(err, bytesRead, buf1) {
console.log(buf1.toString());
if (bytesRead != 0) {
read(fd);
} else {
setTimeout(function() {
read(fd);
}, 1000);
}
});
}
fs.open('logfile', 'r', function(err, fd) {
read(fd);
});
Note that read calls callback even if there is no data and it just reached end of file. Without timeout you'll get 100% cpu here. You could try to use fs.watchFile to get new data immediately.
Substack has a file slice module that behaves exactly like tail -f, slice-file can stream updates after the initial slice of 10 lines.
var sf = require('slice-file');
var xs = sf('/var/log/mylogfile.txt');
xs.follow(-10).pipe(process.stdout);
Source: https://github.com/substack/slice-file#follow
https://github.com/jandre/always-tail seems a great option if you have to worry about log rotating, example from the readme:
var Tail = require('always-tail');
var fs = require('fs');
var filename = "/tmp/testlog";
if (!fs.existsSync(filename)) fs.writeFileSync(filename, "");
var tail = new Tail(filename, '\n');
tail.on('line', function(data) {
console.log("got line:", data);
});
tail.on('error', function(data) {
console.log("error:", data);
});
tail.watch();

Resources