Why is the data not being written to the file? - node.js

I am trying to create a file with dummy data. Since the file will be huge with 32^5 data points, I was using the write-stream. But I cannot see any data being written to the file. What could be the reason for this?
const faker = require('faker');
const fs = require('fs');
const os = require('os');
const MAX_DATA_POINTS = Math.pow(32, 5)
const stream = fs.createWriteStream('sample-data.csv', { flags : 'a' });
for(let i = 0; i < MAX_DATA_POINTS; i++) {
console.log(i)
stream.write(`${faker.name.findName()}, ${i} ${os.EOL}`);
}
console.log(`Written ${MAX_DATA_POINTS} .. `);

The write method returns false if the stream wishes for the calling code to wait for the drain event to be emitted before continuing to write additional data and true otherwise. It is quite possible that the stream is not draining, and calls to write() are just buffering chunks and returning false.
You will need to wait till all the buffered chunks are drained (accepted for delivery by the operating system). When that happens, the drain event will be emitted.
Note: It is recommended that once write() returns false, no more chunks be written until the 'drain' event is emitted.
You could modify your dummy-csv file creator code in the way I have given.
const faker = require('faker');
const fs = require('fs');
const os = require('os');
const MAX_DATA_POINTS = Math.pow(32, 5);
let c = 0;
const stream = fs.createWriteStream('sample-data.csv', { flags : 'a' });
function write() {
let ok = true;
do {
c++;
ok = stream.write(`${faker.name.findName()}, ${c} ${os.EOL}`);
} while (c < MAX_DATA_POINTS && ok);
if(c < MAX_DATA_POINTS) {
stream.once('drain', write)
}
}
write()

Related

How Can I set a interval or timeout

Error: 'The write action you are performing on the channel has hit the write rate limit.',
How can I make my loop send at a slower rate. It seems to be sending all at once, although I'm trying to use an increment in a loop one by one.
Its still causing the sendrate to throttle and break. Is using an interval or timeout a good idea. But I'm not sure how I should set it up.
Simple index.js using node.js
const autosend = require("discord-autosender")
const fs = require("fs");
function send(){
var channelID = ""
var tokenID = ""
const data = fs.readFileSync('mr_robot.txt', 'UTF-8');
const lines = data.split(/\r?\n/);
for (let l_indx = 0; l_indx < lines.length; l_indx++) {
var message = lines[l_indx];
autosend.Post(message, channelID, tokenID)
}
}
send();
Mr.Robot.txt
A dog of that house shall move me to stand. I
will take the wall of any man or maid of Montague’s.
That shows thee a weak slave, for the weakest
goes to the wall.
’Tis true, and therefore women, being the
weaker vessels, are ever thrust to the wall. Therefore
I will push Montague’s men from the wall and
thrust his maids to the wall.
The quarrel is between our masters and us
their men.
You can combine Promise, setTimeout, and async-await to reduce the speed of the loop.
function send() {
var channelID = '';
var tokenID = '';
const data = fs.readFileSync('mr_robot.txt', 'UTF-8');
const lines = data.split(/\r?\n/);
const newFun = async () => {
for (let l_indx = 0; l_indx < lines.length; l_indx++) {
var message = lines[l_indx];
await new Promise((resolve) => setTimeout(resolve, 1000));
autosend.Post(message, channelID, tokenID);
}
};
newFun();
}
send();

Set rate using RxJS5

I have this code which just reads in data from a .csv file and converts it to json and logs the data:
const fs = require('fs');
const path = require('path');
const sd = path.resolve(__dirname + '/fixtures/SampleData.csv');
const strm = fs.createReadStream(sd).setEncoding('utf8');
const Rx = require('rxjs/Rx');
const csv2json = require('csv2json');
const dest = strm
.pipe(csv2json({
separator: ','
}));
dest.on('error', function(e){
console.error(e.stack || e);
})
const obs = Rx.Observable.fromEvent(dest, 'data')
.flatMap(d => Rx.Observable.timer(100).mapTo(d))
obs.subscribe(v => {
console.log(String(v));
})
What the code is doing is logging all the data after a 100 ms delay. I actually want to delay on each line of data and log each line after a small delay.
The above code doesn't achieve that - what is the best way to control the rate at which the data is logged?
Hypothesis: All the lines of data come in approximately at the same time, so all are delayed 100 ms, so they end up getting printed at pretty much the same time. I need to only start delaying the next line after the previous as been logged.
the following code seems to do the same thing as using the timer above:
const obs = Rx.Observable.fromEvent(dest, 'data')
.delay(100)
Hypothesis: All the lines of data come in approximately at the same
time, so all are delayed 100 ms, so they end up getting printed at
pretty much the same time. I need to only start delaying the next line
after the previous as been logged.
Your hypothesis is correct
Solution
Swap out the .flatMap() in your original solution with .concatMap()
Rx.Observable.from([1,2,3,4])
.mergeMap(i => Rx.Observable.timer(500).mapTo(i))
.subscribe(val => console.log('mergeMap value: ' + val));
Rx.Observable.from([1,2,3,4])
.concatMap(i => Rx.Observable.timer(500).mapTo(i))
.subscribe(val => console.log('concatMap value: ' + val));
<script src="https://cdnjs.cloudflare.com/ajax/libs/rxjs/5.0.3/Rx.js"></script>
This will ensure that every emission completes before the next emission is subscribed to and starts delaying its value.
I couldn't find the functionality I needed in the RxJS library (although it might be there, I just couldn't find it, let me know if there is a better, more idiomatic, way).
So I wrote this, which seems to do the job:
const fs = require('fs');
const path = require('path');
const sd = path.resolve(__dirname + '/fixtures/SampleData.csv');
const strm = fs.createReadStream(sd).setEncoding('utf8');
const Rx = require('rxjs/Rx');
const csv2json = require('csv2json');
const p = Rx.Observable.prototype;
p.eachWait = function(timeout){
const source = this;
const values = [];
let flipped = true;
const onNext = function (sub){
flipped = false;
setTimeout(() => {
var c = values.pop();
if(c) sub.next(c);
if(values.length > 0){
onNext(sub);
}
else{
flipped = true;
}
}, timeout);
}
return Rx.Observable.create(sub => {
return source.subscribe(
function next(v){
values.unshift(v);
if(flipped){
onNext(sub);
}
},
sub.error.bind(sub),
sub.complete.bind(sub)
);
});
}
const dest = strm
.pipe(csv2json({
separator: ','
}));
dest.on('error', function(e){
console.error(e.stack || e);
});
const obs = Rx.Observable.fromEvent(dest, 'data')
.eachWait(1000)
obs.subscribe(v => {
console.log(String(v));
});
I assume this is as about as performant as you can make it - only one timer should be running at any given moment.

wait for previous stream to be empty before allowing reading

Say I have a file that contains a list of integers, one per line. I use fs.createReadStream and pipe that into split (so that each chunk is an integer). Then I pipe that into a duplex stream that is supposed to add the numbers and write the sum by piping into fs.createWriteStream.
var fs = require('fs');
var stream = require('stream');
var split = require('split');
var addIntegers = new stream.Duplex();
addIntegers.sum = 0;
addIntegers._read = function(size) {
this.push(this.sum + '\n');
}
addIntegers._write = function(chunk, encoding, done) {
this.sum += +chunk;
done();
}
fs.createReadStream('list-of-integers.txt')
.pipe(split())
.pipe(addIntegers)
.pipe(fs.createWriteStream('sum.txt'));
When I run this, sum.txt just gets continually filled with zeroes and the program never terminates (as expected). How do I wait for the input stream (split) to be empty before allowing the ouput stream (fs.createWriteStream) to read from addIntegers?
I figured it out.
I decided to use a Transform stream instead (thanks mscdex) because it has a method (_flush) that gets called after all written data is consumed. The working code is below. Don't forget to npm i split :)
var fs = require('fs');
var stream = require('stream');
var split = require('split');
var addIntegers = new stream.Transform();
addIntegers.sum = 0;
addIntegers._transform = function(chunk, encoding, done) {
this.sum += +chunk;
done();
}
addIntegers._flush = function(done) {
this.push(this.sum + '\n');
}
fs.createReadStream('list-of-integers.txt')
.pipe(split())
.pipe(addIntegers)
.pipe(fs.createWriteStream('sum.txt'));

Is it possible to register multiple listeners to a child process's stdout data event? [duplicate]

I need to run two commands in series that need to read data from the same stream.
After piping a stream into another the buffer is emptied so i can't read data from that stream again so this doesn't work:
var spawn = require('child_process').spawn;
var fs = require('fs');
var request = require('request');
var inputStream = request('http://placehold.it/640x360');
var identify = spawn('identify',['-']);
inputStream.pipe(identify.stdin);
var chunks = [];
identify.stdout.on('data',function(chunk) {
chunks.push(chunk);
});
identify.stdout.on('end',function() {
var size = getSize(Buffer.concat(chunks)); //width
var convert = spawn('convert',['-','-scale',size * 0.5,'png:-']);
inputStream.pipe(convert.stdin);
convert.stdout.pipe(fs.createWriteStream('half.png'));
});
function getSize(buffer){
return parseInt(buffer.toString().split(' ')[2].split('x')[0]);
}
Request complains about this
Error: You cannot pipe after data has been emitted from the response.
and changing the inputStream to fs.createWriteStream yields the same issue of course.
I don't want to write into a file but reuse in some way the stream that request produces (or any other for that matter).
Is there a way to reuse a readable stream once it finishes piping?
What would be the best way to accomplish something like the above example?
You have to create duplicate of the stream by piping it to two streams. You can create a simple stream with a PassThrough stream, it simply passes the input to the output.
const spawn = require('child_process').spawn;
const PassThrough = require('stream').PassThrough;
const a = spawn('echo', ['hi user']);
const b = new PassThrough();
const c = new PassThrough();
a.stdout.pipe(b);
a.stdout.pipe(c);
let count = 0;
b.on('data', function (chunk) {
count += chunk.length;
});
b.on('end', function () {
console.log(count);
c.pipe(process.stdout);
});
Output:
8
hi user
The first answer only works if streams take roughly the same amount of time to process data. If one takes significantly longer, the faster one will request new data, consequently overwriting the data still being used by the slower one (I had this problem after trying to solve it using a duplicate stream).
The following pattern worked very well for me. It uses a library based on Stream2 streams, Streamz, and Promises to synchronize async streams via a callback. Using the familiar example from the first answer:
spawn = require('child_process').spawn;
pass = require('stream').PassThrough;
streamz = require('streamz').PassThrough;
var Promise = require('bluebird');
a = spawn('echo', ['hi user']);
b = new pass;
c = new pass;
a.stdout.pipe(streamz(combineStreamOperations));
function combineStreamOperations(data, next){
Promise.join(b, c, function(b, c){ //perform n operations on the same data
next(); //request more
}
count = 0;
b.on('data', function(chunk) { count += chunk.length; });
b.on('end', function() { console.log(count); c.pipe(process.stdout); });
You can use this small npm package I created:
readable-stream-clone
With this you can reuse readable streams as many times as you need
For general problem, the following code works fine
var PassThrough = require('stream').PassThrough
a=PassThrough()
b1=PassThrough()
b2=PassThrough()
a.pipe(b1)
a.pipe(b2)
b1.on('data', function(data) {
console.log('b1:', data.toString())
})
b2.on('data', function(data) {
console.log('b2:', data.toString())
})
a.write('text')
I have a different solution to write to two streams simultaneously, naturally, the time to write will be the addition of the two times, but I use it to respond to a download request, where I want to keep a copy of the downloaded file on my server (actually I use a S3 backup, so I cache the most used files locally to avoid multiple file transfers)
/**
* A utility class made to write to a file while answering a file download request
*/
class TwoOutputStreams {
constructor(streamOne, streamTwo) {
this.streamOne = streamOne
this.streamTwo = streamTwo
}
setHeader(header, value) {
if (this.streamOne.setHeader)
this.streamOne.setHeader(header, value)
if (this.streamTwo.setHeader)
this.streamTwo.setHeader(header, value)
}
write(chunk) {
this.streamOne.write(chunk)
this.streamTwo.write(chunk)
}
end() {
this.streamOne.end()
this.streamTwo.end()
}
}
You can then use this as a regular OutputStream
const twoStreamsOut = new TwoOutputStreams(fileOut, responseStream)
and pass it to to your method as if it was a response or a fileOutputStream
If you have async operations on the PassThrough streams, the answers posted here won't work.
A solution that works for async operations includes buffering the stream content and then creating streams from the buffered result.
To buffer the result you can use concat-stream
const Promise = require('bluebird');
const concat = require('concat-stream');
const getBuffer = function(stream){
return new Promise(function(resolve, reject){
var gotBuffer = function(buffer){
resolve(buffer);
}
var concatStream = concat(gotBuffer);
stream.on('error', reject);
stream.pipe(concatStream);
});
}
To create streams from the buffer you can use:
const { Readable } = require('stream');
const getBufferStream = function(buffer){
const stream = new Readable();
stream.push(buffer);
stream.push(null);
return Promise.resolve(stream);
}
What about piping into two or more streams not at the same time ?
For example :
var PassThrough = require('stream').PassThrough;
var mybiraryStream = stream.start(); //never ending audio stream
var file1 = fs.createWriteStream('file1.wav',{encoding:'binary'})
var file2 = fs.createWriteStream('file2.wav',{encoding:'binary'})
var mypass = PassThrough
mybinaryStream.pipe(mypass)
mypass.pipe(file1)
setTimeout(function(){
mypass.pipe(file2);
},2000)
The above code does not produce any errors but the file2 is empty

Nodejs: Transport stream is not pumping data out completely?

I was trying to learn the streaming in Nodejs by writing a small script. But after executing this one the last stream is not pushing all the data.
var stream = require('stream');
var fs = require('fs');
var util = require('util');
function Newliner () {
stream.Transform.call(this);
}
util.inherits(Newliner, stream.Transform);
Newliner.prototype._transform = function(chunk, enc, done) {
var split = 0;
for( var i =0; i <chunk.length; i++){
if(chunk[i] == 10) {
this.push(chunk.slice(split,i));
split = i+1;
}
}
}
function Greper(options) {
stream.Transform.call(this);
this.regex = new RegExp(options);
}
util.inherits(Greper, stream.Transform);
Greper.prototype._transform = function(chunk, enc, done) {
this.push(chunk); //Even this is not working.
/*
var a = chunk.toString();
if(this.regex.test(a)){
this.push(chunk);
}
*/
}
var n = new Newliner();
var g = new Greper("line");
var f = fs.createReadStream('a.txt');
f.pipe(n).pipe(g).pipe(process.stdout);
Input file a.txt is,
This is line one.
Another line.
Third line.
While executing only one line is displayed. What is the reason for this?
$ node test.js
This is line one.
Note: When i am piping the File read stream directely to the 'g' it works correctly.
You need to call the callback of the _transform() function when you are done processing the chunk. From the documentation:
callback (Function) Call this function (optionally with an error argument) when you are done processing the supplied chunk.
No more data will be pushed into the stream until the callback is called. If you don't call it, then the chunk will not be considered as processed… which is why your program stops after processing only one line.
Just add:
done();
at the end of the Newliner.prototype._transform and Greper.prototype._transform functions.

Resources