fs.read have a different behaviour - node.js

Any idea why fs.read cannot behave as fs.readSync?
My code is very simple, just read out the songs file chunk by chunk. And I find out with fs.readSync function that the song file can read out 512 bytes everytime while with fs.read function, there is no log info printed out and if i delete the while(readPosition < fileSize), it executes only one time.
var chunkSize = 512; //the chunk size that will be read every time
var readPostion = 0; //the first byte which will be read from the file.
var fileSize =0;
var fs=require('fs');
//var Buffer = require("buffer");
//var songsBuf = Buffer.alloc(512);
var songsBuf = new Buffer(chunkSize);
fs.open('/media/sdcard/song.mp3','r',function(err,fd){
if(err)
throw err;
console.log("The file had been opened");
var fileSize = fs.fstatSync(fd).size;
console.log("The total size of this file is:%d Bytes",fileSize);
console.log("Start to read the file chunk by chunk");
//read the file in sync mode
while(readPostion<fileSize)
{
fs.readSync(fd,songsBuf,0,chunkSize,readPostion);
if(readPostion+chunkSize>fileSize)
chunkSize = fileSize-readPostion;
readPostion+=chunkSize;
console.log("the read position is %d",readPostion);
console.log("The chunk size is %d",chunkSize);
console.log(songsBuf);
}
//the code above can readout the file chunk by chunk but the below one cannot
//read the file in Async mode.
while(readPostion<fileSize)
{
// console.log("ff");
fs.read(fd,songsBuf,0,chunkSize,1,function(err,byteNum,buffer){
if(err)
throw err;
console.log("Start to read from %d byte",readPostion);
console.log("Total bytes are %d",byteNum);
console.log(buffer);
if(readPostion+chunkSize>fileSize)
chunkSize = fileSize-readPostion; //if the files to read is smaller than one chunk
readPostion+=chunkSize;
});
}
fs.close(fd);
});

You can do this with the async library.
async.whilst(
function () { return readPostion < fileSize },
function (callback) {
fs.read(fd, songsBuf, 0, chunkSize, 1, function (err, byteNum, buffer) {
if (err) return callback(err)
console.log("Start to read from %d byte",readPostion);
console.log("Total bytes are %d",byteNum);
console.log(buffer);
if(readPostion + chunkSize > fileSize)
chunkSize = fileSize - readPostion; //if the files to read is smaller than one chunk
readPostion += chunkSize
callback(null, songBuffer)
})
},
function (err, n) {
if (err) console.error(err)
fs.close(fd)
// Do something with songBuffer here
}
)

Related

Node Async ReadStream from SFTP connection

So I'm creating a class and ultimately want to create a method that takes a file on an SFTP server and produces a readstream that can be piped into other streams / functions. I'm most of the way there, except my readStream is acting strangely. Here's the relevant code:
const Client = require('ssh2').Client,
Readable = require('stream').Readable,
async = require('async');
/**
* Class Definition stuff
* ......
*/
getStream(get) {
const self = this;
const rs = new Readable;
rs._read = function() {
const read = this;
self.conn.on('ready', function(){
self.conn.sftp(function(err,sftp) {
if(err) return err;
sftp.open(get, 'r', function(err, fd){
sftp.fstat(fd, function(err, stats) {
let bufferSize = stats.size,
chunkSize = 512,//bytes
buffer = new Buffer(bufferSize),
bytesRead = 0;
async.whilst(
function () {
return bytesRead < bufferSize;
},
function (done) {
sftp.read(fd, buffer, bytesRead, chunkSize, bytesRead,
function (err, bytes, buff) {
if (err) return done(err);
// console.log(buff.toString('utf8'));
read.push(buff);
bytesRead += bytes;
done();
});
},
function (err) {
if (err) console.log(err);
read.push(null);
sftp.close(fd);
}
);
});
});
});
}).connect(self.connectionObj);
}
return rs;
}
Elsewhere, I would call this method like so:
let sftp = new SFTP(credentials);
sftp.getStream('/path/file.csv')
.pipe(toStuff);
.pipe(toOutput);
So, long story short. During the SFTP.read operation read.push(buff) keeps pushing the same first part of the file over and over. However, when I console.log(buff) it correctly streams the full file?
So I'm scratching my head wondering what I'm doing wrong with the read stream that it's only pushing the beginning of the file and not continuing on to the next part of the buffer.
Here's the docs on SSH2 SFTP client: https://github.com/mscdex/ssh2-streams/blob/master/SFTPStream.md
I used this SO question as inspiration for what I wrote above: node.js fs.read() example
This is similar/related: Reading file from SFTP server using Node.js and SSH2
Ok, after a lot of trouble, I realized I was making a couple mistakes. First, the _read function is called every time the stream is ready to read more data, which means, the SFTP connection was being started everytime _read was called. This also meant the sftp.read() function was starting over each time, reseting the starting point back to the beginning.
I needed a way to first setup the connection, then read and stream the file data, so I chose the library noms. Here's the final code if anyone is interested:
getStream (get) {
const self = this;
let connection,
fileData,
buffer,
totalBytes = 0,
bytesRead = 0;
return nom(
// _read function
function(size, next) {
const read = this;
// Check if we're done reading
if(bytesRead === totalBytes) {
connection.close(fileData);
connection.end();
self.conn.end();
console.log('done');
return read.push(null);
}
// Make sure we read the last bit of the file
if ((bytesRead + size) > totalBytes) {
size = (totalBytes - bytesRead);
}
// Read each chunk of the file
connection.read(fileData, buffer, bytesRead, size, bytesRead,
function (err, byteCount, buff, pos) {
// console.log(buff.toString('utf8'));
// console.log('reading');
bytesRead += byteCount;
read.push(buff);
next();
}
);
},
// Before Function
function(start) {
// setup the connection BEFORE we start _read
self.conn.on('ready', function(){
self.conn.sftp(function(err,sftp) {
if(err) return err;
sftp.open(get, 'r', function(err, fd){
sftp.fstat(fd, function(err, stats) {
connection = sftp;
fileData = fd;
totalBytes = stats.size;
buffer = new Buffer(totalBytes);
console.log('made connection');
start();
});
});
});
}).connect(self.connectionObj);
})
}
Always looking for feedback. This doesn't run quite as fast as I'd hope, so let me know if you have ideas on speeding up the stream.

Read a file one character at a time in node.js?

Is there a way to read one symbol at a time in nodejs from file without storing the whole file in memory?
I found an answer for lines
I tried something like this but it doesn't help:
const stream = fs.createReadStream("walmart.dump", {
encoding: 'utf8',
fd: null,
bufferSize: 1,
});
stream.on('data', function(sym){
console.log(sym);
});
Readable stream has a read() method, where you can pass the length, in bytes, of every chunk to be read. For example:
var readable = fs.createReadStream("walmart.dump", {
encoding: 'utf8',
fd: null,
});
readable.on('readable', function() {
var chunk;
while (null !== (chunk = readable.read(1) /* here */)) {
console.log(chunk); // chunk is one byte
}
});
Here's a lower-level way to do it: fs.read(fd, buffer, offset, length, position, callback)
using:
const fs = require('fs');
// open file for reading, returns file descriptor
const fd = fs.openSync('your-file.txt','r');
function readOneCharFromFile(position, cb){
// only need to store one byte (one character)
const b = new Buffer(1);
fs.read(fd, b, 0, 1, position, function(err,bytesRead, buffer){
console.log('data => ', String(buffer));
cb(err,buffer);
});
}
you will have to increment the position, as you read the file, but it will work.
here's a quick example of how to read a whole file, character by character
Just for fun I wrote this complete script to do it, just pass in a different file path, and it should work
const async = require('async');
const fs = require('fs');
const path = require('path');
function read(fd, position, cb) {
let isByteRead = null;
let ret = new Buffer(0);
async.whilst(
function () {
return isByteRead !== false;
},
function (cb) {
readOneCharFromFile(fd, position++, function (err, bytesRead, buffer) {
if(err){
return cb(err);
}
isByteRead = !!bytesRead;
if(isByteRead){
ret = Buffer.concat([ret,buffer]);
}
cb(null);
});
},
function (err) {
cb(err, ret);
}
);
}
function readOneCharFromFile(fd, position, cb) {
// only need to store one byte (one character)
const b = new Buffer(1);
fs.read(fd, b, 0, 1, position, cb);
}
/// use your own file here
const file = path.resolve(__dirname + '/fixtures/abc.txt');
const fd = fs.openSync(file, 'r');
// start reading at position 0, position will be incremented
read(fd, 0, function (err, data) {
if (err) {
console.error(err.stack || err);
}
else {
console.log('data => ', String(data));
}
fs.closeSync(fd);
});
As you can see we increment the position integer every time we read the file. Hopefully the OS keeps the file in memory as we go. Using async.whilst() is OK, but I think for a more functional style it's better not to keep the state in the top of the function (ret and isByteRead). I will leave it as an exercise to the reader to implement this without using those stateful variables.

How can I use fs.read() in Node js

I try to use Nodejs fs.read Method in Mac OS. However it doesn't work..
I use below source code
var fs = require('fs');
fs.open('helloworld.txt', 'r', function(err, fd) {
fs.fstat(fd, function(err, stats) {
var bufferSize=stats.size ,
chunkSize=512,
buffer=new Buffer(bufferSize),
bytesRead = 0;
while (bytesRead < bufferSize) {
if ((bytesRead + chunkSize) > bufferSize) {
chunkSize = (bufferSize - bytesRead);
}
fs.read(fd, buffer, bytesRead, chunkSize, bytesRead, testCallback);
bytesRead += chunkSize;
}
console.log(buffer.toString('utf8'));
});
fs.close(fd);
});
var testCallback = function(err, bytesRead, buffer){
console.log('err : ' + err);
};
Actually, I use some example in stackoverflow.
When I execute the source,
err : Error: EBADF, read
this err is returned.
However if I use readFile method, it works well.
fs.readFile('helloworld.txt', function (err, data) {
if (err) throw err;
console.log(data.toString('utf8'));
});
result is
Hello World!
Of course, it's same file.
Please, let me know what the problem is.
Thank you.
The difference is not int the functions you are using, but in the way you are using them.
All those fs.* functions you are using are asynchronous, that means they run in parallel. So, when you run fs.close, the others have not finished yet.
You should close it inside the fs.stat block:
var fs = require('fs');
fs.open('helloworld.txt', 'r', function(err, fd) {
fs.fstat(fd, function(err, stats) {
var bufferSize=stats.size ,
chunkSize=512,
buffer=new Buffer(bufferSize),
bytesRead = 0;
while (bytesRead < bufferSize) {
if ((bytesRead + chunkSize) > bufferSize) {
chunkSize = (bufferSize - bytesRead);
}
fs.read(fd, buffer, bytesRead, chunkSize, bytesRead, testCallback);
bytesRead += chunkSize;
}
console.log(buffer.toString('utf8'));
fs.close(fd);
});
});
var testCallback = function(err, bytesRead, buffer){
console.log('err : ' + err);
};
fs.read and fs.fstat are async functions.
And just after calling, fstat, you are closing the file (fs.close(fd);).
That could be the reason for this error.
The error in the code is that you are currently using asynchronus functions in your application. Which simply means that before performing the first operation the other one starts in parallel i.e. fs.close() closes your file before performing the operation. You can simple replace them with synchronus functions provided by the same fs module from node.js.

Node.js: Read large text file only partialy

I have to read a very large csv file (> 80MB and growing).
I usually only have to parse the last 1% of the file. But getting to that part takes a few minutes.
Is there a way that I only start reading on line N?
Or alternatively could I read the stream from end to start?
I'm currently using fast-csv to read the file:
// convert csv into postgres copy file
csv.fromPath(filepath, {
headers: false
}).transform(function(data) {
// check if record meets condition
var dt = parseInt(data[0]);
var date = new Date(dt * 1000);
var mom = moment(date);
if (mom.isAfter('2014-01-01 00:00')) {
// transform data and return object
return transform(data);
}
return null;
}).pipe(csv.createWriteStream({
headers: true
})).pipe(fs.createWriteStream(outpath, {
encoding: "utf8"
})).on('finish', function() {
// do postgres import
});
Using a combination of node's fs.stat, fs.open, fs.read, you could find the size of the file and just read the last 1% into a buffer:
var fs = require('fs');
var filename = 'csv.csv';
fs.stat(filename, function(err, stat) {
if(err) throw err;
var bytesToRead = Math.ceil(0.01 * stat.size); // last 1%
var startingPosition = stat.size - bytesToRead;
var readBuffer = new Buffer(bytesToRead);
fs.open(filename, 'r', function(err, fd){
if(err) throw err;
fs.read(fd, readBuffer, 0, bytesToRead, startingPosition,
function(err, bytesRead){
if(err) throw err;
console.log(readBuffer.toString());
});
});
});
You couldn't start reading from line N because you would have to read it all to know where the newline characters are.

ReadStream: Internal buffer does not fill up anymore

I have a fs.ReadStream object that points to a pretty big file. Now I would like to read 8000 bytes from the ReadStream, but the internal buffer is only 6000 bytes. So my approach would be to read those 6000 bytes and wait for the internal buffer to fill up again by using a while-loop that checks whether the internal buffer length is not 0 anymore.
Something like this:
BinaryObject.prototype.read = function(length) {
var value;
// Check whether we have enough data in the internal buffer
if (this.stream._readableState.length < length) {
// Not enough data - read the full internal buffer to
// force the ReadStream to fill it again.
value = this.read(this.stream._readableState.length);
while (this.stream._readableState.length === 0) {
// Wait...?
}
// We should have some more data in the internal buffer
// here... Read the rest and add it to our `value` buffer
// ... something like this:
//
// value.push(this.stream.read(length - value.length))
// return value
} else {
value = this.stream.read(length);
this.stream.position += length;
return value;
}
};
The problem is, that the buffer is not filled anymore - the script will just idle in the while loop.
What is the best approach to do this?
It's quite simple. You don't need to do any buffering on your side:
var fs = require('fs'),
rs = fs.createReadStream('/path/to/file');
var CHUNK_SIZE = 8192;
rs.on('readable', function () {
var chunk;
while (null !== (chunk = rs.read(CHUNK_SIZE))) {
console.log('got %d bytes of data', chunk.length);
}
});
rs.on('end', function () {
console.log('end');
});
If CHUNK_SIZE is larger than the internal buffer, node will return null and buffer some more before emitting readable again. You can even configure the initial size of the buffer by passing:
var rs = fs.createReadStream('/path/to/file', {highWatermark: CHUNK_SIZE});
Below is the sample for reading file in streams.
var fs = require('fs'),
readStream = fs.createReadStream(srcPath);
readStream.on('data', function (chunk) {
console.log('got %d bytes of data', chunk.length);
});
readStream.on('readable', function () {
var chunk;
while (null !== (chunk = readStream.read())) {
console.log('got %d bytes of data', chunk.length);
}
});
readStream.on('end', function () {
console.log('got all bytes of data');
});

Resources