readstream pipe does not close - node.js

I am using sax-js to read large xml files. I cannot get the program to exit when the parser is finished. Here is the shape of the script, with parser logic removed.
var fs = require('fs');
var sax = require('sax');
var feedFile = 'foo.xml';
var saxStream = sax.createStream(true)
.on('opentag', function(node) { // do stuff })
.on('end', function() {
console.log("parser end event");
});
var options = {
flags: 'r',
encoding: 'utf8',
mode: 0666,
bufferSize: 1024
};
fs.createReadStream(feedFile, options, function(err) {
throw err;
})
.on('end', function() {
console.log("read stream end event");
})
.pipe(saxStream);
Everything works properly. And the console gets the messages like this
read stream end event
parser end event
And then the process should exit. It does not. What am I missing?
EDIT: What I am missing.
There was a resource still open so that node could not exit. The temptation is to start trying to call process.exit() somewhere, anywhere. I tried it myself in this case and it broke stuff.
My script could not exit because I had an open mongodb connection (managed by mongoose). Here is essentially (not in detail) what fixed it:
saxStream.on('end', function() {
console.log("parser end event");
mongoose.disconnect();
};

Related

How to cancel a requestjs download

I've code similar to the following stripped down example
const req = request('http://www.my.url.here.com/file.bin') // 80 MB file
const decripher = .... // decipher from nodejs's crypto module
const output = fs.createWriteStream('result.zip');
const archive = archiver('zip', {zlib: { level: 9 } });
archive.pipe(output);
const stream = req
.pipe(decipher)
.on('error', (error) => {
console.error(`Error deciphering file`)
req.abort() // Does nothing
decipher.unpipe() // Unpiping to prevent the next step producing a [ERR_STREAM_WRITE_AFTER_END] error
stream.end() // End the stream as an error does not end it automatically
})
archive.append(stream, { name: 'file.bin' });
Once an error occurs deciphering the file I don't want to download any more data. But I've noticed that in these scenarios a req.abort() does nothing.
In the end I have a file partially decrypted in the archive but it's still ~80 MBs. i.e. The entire file has been downloaded despite the error (which I setup to fire near the start of the file).
Why would this occur? How can I prevent the entire file from downloading?
You can destroy the underlying socket. You can get the socket in socket or response event.
const req = request(options);
req.on('response', function(response) {
....
res.socket.end(); // or res.socket.destroy();
....
});
req.pipe(...);
Maybe in your case, modify a bit, this is basically a theory but you can do:
const req = request(options);
let sock = null;
req.on('socket', function(socket) {
sock = socket;
}).on('error', ()=>{
sock.destroy()//or end();
});
req.pipe(...);

Deleting file in node.js not working

I am using Node.js with Express.
I am trying to delete a file after sending it to client with express js.
function deleteFile (file) {
fs.unlink(file, function (err) {
if (err) {
logger.error(err);
}
});
}
app.get("/deleteFileAfterDownload", function (req, res){
var fileName = "a.pdf"
var stream = fs.createReadStream(fileName);
var streamClosed = false;
req.on('end',function(){
if (!streamClosed){
stream.emit('close');
// I tried stream.destroy() but that is also not working
}
});
stream.on('close', function () {
streamClosed = true;
deleteFile(fileName);
});
req.on('data', function(){});
stream.pipe(res);
});
But the file is not getting deleted. it seems the process is still using file because just after I end the process, the file is getting deleted.
Can anybody tell me why?
If I am doing it wrong, please tell me a good way.
Please add a log in deleteFile, makesure it is called.
Try simplify it:
var fileName = "a.pdf"
var stream = fs.createReadStream(fileName);
stream.pipe(res);
res.once("finish", function () {
deleteFile(fileName);
});
The previous example only delete file if download finished,
if you want delete file unconditionly, try the following:
var fileName = "a.pdf";
var stream = fs.createReadStream(fileName);
stream.pipe(res).once("close", function () {
stream.close();
deleteFile(fileName);
});
stream.close() is important here, because stream not close if pipe aborted.

How i can resume after error event in piped stream in nodejs?

After i emit error event in MyWritableStream, data transmission stops. What i need to do to resume data transfer?
var readable = fs.createReadStream('test.txt');
var writable = new MyWritableStream();
writable.on('error', function(error) {
console.log('error', error);
// How i can resume?
});
writable.on('finish', function(){
console.log('finished');
})
readable.pipe(writable);
I know this question is old, but you might wanna check out https://github.com/miraclx/xresilient
I built this for this exact same reason (works best with seekable streams).
You define a function that returns a readable stream, the library measures the number of bytes that have passed through until an error is met.
Once the readable stream encounters an error event, it recalls the defined function with the number of bytes read so you can index the stream source.
Example:
const fs = require('fs');
const xresilient = require('xresilient');
const readable = xresilient(({bytesRead}) => {
return generateSeekableStreamSomehow({start: bytesRead});
}, {retries: 5});
const writable = fs.createWriteStream('file.test');
readable.pipe(writable);
File streams are indexable with the start option of the fs.createReadStream() function.
HTTP Requests are indexable with the Range HTTP Header.
Check it out.
https://www.npmjs.com/package/xresilient
I am not sure, if it is a normal practice, but i can't see another solution for now & it works for me. If you can advise more accurate solution, please do it.
We can track readable stream instance using pipe event in writeable one:
function WriteableStream(options) {
Writable.call(this, options);
this.source = null;
var instance = this;
this.on('pipe', function(source){
instance.source = source;
});
}
util.inherits(WriteableStream, Writable);
So, when we emit error event, and readable stream is unpiped automatically, we can re-pipe it ourself:
WriteableStream.prototype._write = function(chunk, encoding, done) {
this.emit('error', new Error('test')); // unpipes readable
done();
};
WriteableStream.prototype.resume = function() {
this.source.pipe(this); // re-pipes readable
}
Finally, we will use it the following way:
var readable = fs.createReadStream(file);
var writeable = new WriteableStream();
writeable.on('error', function(error) {
console.log('error', error);
writeable.resume();
});
readable.pipe(writeable);

callback to handle completion of pipe

I am using the following node.js code to download documents from some url and save it in the disk.
I want to be informed about when the document is downloaded. i have not seen any callback with pipe.Or, Is there any 'end' event that can be captured on completion of download ?
request(some_url_doc).pipe(fs.createWriteStream('xyz.doc'));
Streams are EventEmitters so you can listen to certain events. As you said there is a finish event for request (previously end).
var stream = request(...).pipe(...);
stream.on('finish', function () { ... });
For more information about which events are available you can check the stream documentation page.
Based nodejs document, http://nodejs.org/api/stream.html#stream_event_finish,
it should handle writableStream's finish event.
var writable = getWriteable();
var readable = getReadable();
readable.pipe(writable);
writable.on('finish', function(){ ... });
Code snippet for piping content from web via http(s) to filesystem. As #starbeamrainbowlabs noticed event finish does job
var tmpFile = "/tmp/somefilename.doc";
var ws = fs.createWriteStream(tmpFile);
ws.on('finish', function() {
// pipe done here, do something with file
});
var client = url.slice(0, 5) === 'https' ? https : http;
client.get(url, function(response) {
return response.pipe(ws);
});
I found an a bit different solution of my problem regarding this context. Thought worth sharing.
Most of the example create readStreams from file. But in my case readStream has to be created from JSON string coming from a message pool.
var jsonStream = through2.obj(function(chunk, encoding, callback) {
this.push(JSON.stringify(chunk, null, 4) + '\n');
callback();
});
// message.value --> value/text to write in write.txt
jsonStream.write(JSON.parse(message.value));
var writeStream = sftp.createWriteStream("/path/to/write/write.txt");
//"close" event didn't work for me!
writeStream.on( 'close', function () {
console.log( "- done!" );
sftp.end();
}
);
//"finish" event didn't work for me either!
writeStream.on( 'close', function () {
console.log( "- done!"
sftp.end();
}
);
// finally this worked for me!
jsonStream.on('data', function(data) {
var toString = Object.prototype.toString.call(data);
console.log('type of data:', toString);
console.log( "- file transferred" );
});
jsonStream.pipe( writeStream );
Here's a solution that handles errors in requests and calls a callback after the file is written:
request(opts)
.on('error', function(err){ return callback(err)})
.pipe(fs.createWriteStream(filename))
.on('finish', function (err) {
return callback(err);
});

Setting process.stdout to a file per node.js core cluster worker

I'm trying to use node core's cluster feature.
I would like the stdout and stderr streams to output to a file, one for each worker id.
Something much like the following:
var fs = require('fs'),
env = process.env,
workerId = env.NODE_WORKER_ID || env.NODE_UNIQUE_ID;
process.stdout = fs.createWriteStream(__dirname + '/app#' + workerId + '.log', {
encoding: 'utf8'
});
Unfortunately, it does not seem to rewrite process.stdout like this.
Is there a way to achieve this, or should this be done differently? Currently when I run my cluster I am getting all output from all processes in one console, which is extremely messy.
I ended up doing the following:
//create a new stdout file stream
var stdoutFS = fs.createWriteStream(stdoutFile, {
encoding: 'utf8',
flags : 'a+'
});
//create a new stderr file stream
var stderrFS = fs.createWriteStream(stderrFile, {
encoding: 'utf8',
flags : 'a+'
});
//pipe stdout to a worker file
var unhookStdout = hookWriteStream(stdout, function(string, encoding, fd) {
stdoutFS.write(string, encoding || 'utf8');
});
console.log('\n\nPrepared new stdout hook to worker file.');
//pipe stderr to a worker file
var unhookStderr = hookWriteStream(stderr, function(string, encoding, fd) {
stderrFS.write(string, encoding || 'utf8');
});
console.log('Prepared new stderr hook to worker file.');
//unhook when things go wrong
stdoutFS.once('close', function() {
unhookStdout();
console.log('Unhooked stdout.');
});
stdoutFS.once('error', function(err) {
unhookStdout();
console.error('Error: Unhooked stdout due to error %j.', err);
});
stderrFS.once('close', function() {
unhookStderr();
console.log('Unhooked stderr.');
});
stderrFS.once('error', function(err) {
unhookStderr();
console.error('Error: Unhooked stderr due to error %j.', err);
});
});
function hookWriteStream(stream, callback) {
var oldWrite = stream.write;
stream.write = (function(write) {
return function(string, encoding, fd) {
write.apply(stream, arguments);
callback(string, encoding, fd);
};
})(stream.write);
return function() {
stream.write = oldWrite;
};
}
It may not be very elegant, but so far this is the best solution I've found.
Looks like my idea works to some degree. As long as most of the logging is done using console.log and not written directly to stdout, you'll be good.
Like I said in the comments below, use a script like this:
fs = require 'fs'
{exec} = require 'child_process'
execAndPipe = (execString) ->
piper = exec execString
piper.stdout.on 'data', (data) ->
if data[0...'PROCESS'.length] == 'PROCESS'
# extract the worker ID and output
# to a corresponding file
piper.stderr.on 'data', (data) ->
if data[0...'PROCESS'.length] == 'PROCESS'
# extract the worker ID and output
# to a corresponding file
task 'run', 'Run the server', ->
execAndPipe 'node blah.js'
to run your server. Then just redefine console.log like:
console.log = function (d) {
process.stdout.write('PROCESS' + WORKERID + d + '\n');
};
I kinda doubt you'll be able to rebind stdout directly, so this might be amongst your best options.
If you don't want anything to output to the console at ALL, you could rebind console.log like:
console.log = function (d) {
var str = fs.createWriteStream(__dirname + '/app#' + workerId + '.log', {
encoding: 'utf8'
});
process.stdout.pipe(str);
};
And forget about the external script.

Resources