I am using the following node.js code to download documents from some url and save it in the disk.
I want to be informed about when the document is downloaded. i have not seen any callback with pipe.Or, Is there any 'end' event that can be captured on completion of download ?
request(some_url_doc).pipe(fs.createWriteStream('xyz.doc'));
Streams are EventEmitters so you can listen to certain events. As you said there is a finish event for request (previously end).
var stream = request(...).pipe(...);
stream.on('finish', function () { ... });
For more information about which events are available you can check the stream documentation page.
Based nodejs document, http://nodejs.org/api/stream.html#stream_event_finish,
it should handle writableStream's finish event.
var writable = getWriteable();
var readable = getReadable();
readable.pipe(writable);
writable.on('finish', function(){ ... });
Code snippet for piping content from web via http(s) to filesystem. As #starbeamrainbowlabs noticed event finish does job
var tmpFile = "/tmp/somefilename.doc";
var ws = fs.createWriteStream(tmpFile);
ws.on('finish', function() {
// pipe done here, do something with file
});
var client = url.slice(0, 5) === 'https' ? https : http;
client.get(url, function(response) {
return response.pipe(ws);
});
I found an a bit different solution of my problem regarding this context. Thought worth sharing.
Most of the example create readStreams from file. But in my case readStream has to be created from JSON string coming from a message pool.
var jsonStream = through2.obj(function(chunk, encoding, callback) {
this.push(JSON.stringify(chunk, null, 4) + '\n');
callback();
});
// message.value --> value/text to write in write.txt
jsonStream.write(JSON.parse(message.value));
var writeStream = sftp.createWriteStream("/path/to/write/write.txt");
//"close" event didn't work for me!
writeStream.on( 'close', function () {
console.log( "- done!" );
sftp.end();
}
);
//"finish" event didn't work for me either!
writeStream.on( 'close', function () {
console.log( "- done!"
sftp.end();
}
);
// finally this worked for me!
jsonStream.on('data', function(data) {
var toString = Object.prototype.toString.call(data);
console.log('type of data:', toString);
console.log( "- file transferred" );
});
jsonStream.pipe( writeStream );
Here's a solution that handles errors in requests and calls a callback after the file is written:
request(opts)
.on('error', function(err){ return callback(err)})
.pipe(fs.createWriteStream(filename))
.on('finish', function (err) {
return callback(err);
});
Related
I am making use of "socket.io-client" and "socket.io stream" to make a request and then stream some data. I have the following code that handles this logic
Client Server Logic
router.get('/writeData', function(req, res) {
var io = req.app.get('socketio');
var nameNodeSocket = io.connect(NAMENODE_ADDRESS, { reconnect: true });
var nameNodeData = {};
async.waterfall([
checkForDataNodes,
readFileFromS3
], function(err, result) {
if (err !== null) {
res.json(err);
}else{
res.json("Finished Writing to DN's");
}
});
function checkForDataNodes(cb) {
nameNodeSocket.on('nameNodeData', function(data) {
nameNodeData = data;
console.log(nameNodeData);
cb(null, nameNodeData);
});
if (nameNodeData.numDataNodes === 0) {
cb("No datanodes found");
}
}
function readFileFromS3(nameNodeData, cb) {
for (var i in nameNodeData['blockToDataNodes']) {
var IP = nameNodeData['blockToDataNodes'][i]['ipValue'];
var dataNodeSocket = io.connect('http://'+ IP +":5000");
var ss = require("socket.io-stream");
var stream = ss.createStream();
var byteStartRange = nameNodeData['blockToDataNodes'][i]['byteStart'];
var byteStopRange = nameNodeData['blockToDataNodes'][i]['byteStop'];
paramsWithRange['Range'] = "bytes=" + byteStartRange.toString() + "-" + byteStopRange.toString();
//var file = require('fs').createWriteStream('testFile' + i + '.txt');
var getFileName = nameNodeData['blockToDataNodes'][i]['key'].split('/');
var fileData = {
'mainFile': paramsWithRange['Key'].split('/')[1],
'blockName': getFileName[1]
};
ss(dataNodeSocket).emit('sendData', stream, fileData);
s3.getObject(paramsWithRange).createReadStream().pipe(stream);
//dataNodeSocket.disconnect();
}
cb(null);
}
});
Server Logic (that gets the data)
var dataNodeIO = require('socket.io')(server);
var ss = require("socket.io-stream");
dataNodeIO.on('connection', function(socket) {
console.log("Succesfully connected!");
ss(socket).on('sendData', function(stream, data) {
var IP = data['ipValue'];
var blockName = data['blockName'];
var mainFile = data['mainFile'];
dataNode.makeDir(mainFile);
dataNode.addToReport(mainFile, blockName);
stream.pipe(fs.createWriteStream(mainFile + '/' + blockName));
});
});
How can I properly disconnect the connections in function readFileFromS3. I have noticed using dataNodeSocket.disconnect() at the end does not work as I cannot verify the data was received on the 2nd server. But if I comment it out, I can see the data being streamed to the second server.
My objective is to close the connections in Client Server side
It appears that the main problem with closing the socket is that you weren't waiting for the stream to be done writing before trying to close the socket. So, because the writing is all asynchronous and finishes sometime later, you were trying to close the socket before the data had been written.
Also because you were putting asynchronous operations inside a for loop, you were also running all your operations in parallel which may not be exactly what you want as it makes error handling more difficult and server load more difficult.
Here's the code I would suggest that does the following:
Create a function streamFileFromS3() that streams a single file and returns a promise that will notify when it's done.
Use await in a for loop with that streamFileFromS3() to serialize the operations. You don't have to serialize them, but then you would have to change your error handling to figure out what to do if one errors while the others are already running and you'd have to be more careful about concurrency issues.
Use try/catch to catch any errors from streamFileFromS3().
Add error handling on the stream.
Change all occurrences of data['propertyName'] to data.propertyName. The only time you need to use brackets is if the property name contains a character that is not allowed in a Javascript identifier or if the property name is in a variable. Otherwise, the dot notation is preferred.
Add socket.io connection error handling logic for both socket.io connections.
Set returned status to 500 when there's an error processing the request
So, here's the code for that:
const ss = require("socket.io-stream");
router.get('/writeData', function(req, res) {
const io = req.app.get('socketio');
function streamFileFromS3(ip, data) {
return new Promise((resolve, reject) => {
const dataNodeSocket = io.connect(`http://${ip}:5000`);
dataNodeSocket.on('connect_error', reject);
dataNodeSocket.on('connect_timeout', () {
reject(new Error(`timeout connecting to http://${ip}:5000`));
});
dataNodeSocket.on('connection', () => {
// dataNodeSocket connected now
const stream = ss.createStream().on('error', reject);
paramsWithRange.Range = `bytes=${data.byteStart}-${data.byteStop}`;
const filename = data.key.split('/')[1];
const fileData = {
'mainFile': paramsWithRange.Key.split('/')[1],
'blockName': filename
};
ss(dataNodeSocket).emit('sendData', stream, fileData);
// get S3 data and pipe it to the socket.io stream
s3.getObject(paramsWithRange).createReadStream().on('error', reject).pipe(stream);
stream.on('close', () => {
dataNodeSocket.disconnect();
resolve();
});
});
});
}
function connectError(msg) {
res.status(500).send(`Error connecting to ${NAMENODE_ADDRESS}`);
}
const nameNodeSocket = io.connect(NAMENODE_ADDRESS, { reconnect: true });
nameNodeSocket.on('connect_error', connectError).on('connect_timeout', connectError);
nameNodeSocket.on('nameNodeData', async (nameNodeData) => {
try {
for (let item of nameNodeData.blockToDataNodes) {
await streamFileFromS3(item.ipValue, item);
}
res.json("Finished Writing to DN's");
} catch(e) {
res.status(500).json(e);
}
});
});
Other notes:
I don't know what paramsWithRange is as it is not declared here and when you were doing everything in parallel, it was getting shared among all the connections which is asking for a concurrency issue. In my serialized implementation, it's probably safe to share it, but the way it is now bothers me as it's a concurrency issue waiting to happen.
I am using Node.js with Express.
I am trying to delete a file after sending it to client with express js.
function deleteFile (file) {
fs.unlink(file, function (err) {
if (err) {
logger.error(err);
}
});
}
app.get("/deleteFileAfterDownload", function (req, res){
var fileName = "a.pdf"
var stream = fs.createReadStream(fileName);
var streamClosed = false;
req.on('end',function(){
if (!streamClosed){
stream.emit('close');
// I tried stream.destroy() but that is also not working
}
});
stream.on('close', function () {
streamClosed = true;
deleteFile(fileName);
});
req.on('data', function(){});
stream.pipe(res);
});
But the file is not getting deleted. it seems the process is still using file because just after I end the process, the file is getting deleted.
Can anybody tell me why?
If I am doing it wrong, please tell me a good way.
Please add a log in deleteFile, makesure it is called.
Try simplify it:
var fileName = "a.pdf"
var stream = fs.createReadStream(fileName);
stream.pipe(res);
res.once("finish", function () {
deleteFile(fileName);
});
The previous example only delete file if download finished,
if you want delete file unconditionly, try the following:
var fileName = "a.pdf";
var stream = fs.createReadStream(fileName);
stream.pipe(res).once("close", function () {
stream.close();
deleteFile(fileName);
});
stream.close() is important here, because stream not close if pipe aborted.
I am trying to extract images from a csv file by doing the following:
Parsing/streaming in a large csv file using csv-parse and the fs createReadStream method
Grabbing each line for processing using stream-transform
Extraction of image and other row data for processing using the async waterfall method.
Download and write image to server using request and the fs createWriteStream method
For some reason after the data gets piped into createWriteStream, there is some event in which an async callback never gets called. I have run this same code only using request, without piping to createWriteStream, and it works. I've also run createWriteStream w/ a drain event, and then some how it works? Can anyone explain this to me?
In the code below, request is trying to pipe 14,970 images, but the createWriteStream close or finish events only fire 14,895 times, with error firing 0 times. Could this be a draining issue? Could highWaterMark be exceeded and a write fail could be occurring undetected?
Here is my csv line getting code:
var first = true;
var parser = parse();
var transformer = transform( (line, complete) => {
if(!first)
extractData(line,complete)
else {
first = false;
complete(null);
}
},
() => {
console.log('Done: parseFile');
});
fs.createReadStream(this.upload.location).pipe(parser).pipe(transformer);
extractData function that doesn't always do a required async callback:
extractData(line,complete){
var now = new Date();
var image = {
createdAt: now,
updatedAt: now
};
async.waterfall([
next => { // Data Extraction
async.forEachOf(line, (data, i, complete) => {
if(i === 2) image.src = data;
if(i === 3) image.importSrc = data;
complete(null);
}, err => {
if(err) throw err;
next(null);
});
},
next => { // Download Image
var file = fs.createWriteStream('public/'+image.src);
var sendReq = request.get(image.importSrc);
sendReq.on('response', response => {
if (response.statusCode !== 200) {
this.upload.report.image.errors++;
return next(null);
}
});
sendReq.on('error', err => {
this.upload.report.image.errors++;
next(null);
});
sendReq.pipe(file);
file.on('finish', () => {
this.upload.report.image.inserts++;
file.close(next); // Close file and callback
});
file.on('error', err => {
this.upload.report.image.errors++;
next(null);
});
}
], err => {
if(err) throw err;
complete(null);
});
}
As suggested by #mscdex, I've also tried switching out finish for his replacement close approach.
file.close(next); is unnecessary as the file stream is closed automatically by default. What you can do instead is to listen for the close event to know when the file descriptor for the stream has been closed. So replace the entire finish event handler with:
file.on('close', () => {
this.upload.report.image.inserts++;
next(null);
});
I am trying to register event listener at the end of the data in pipe transformation. I was
trying register the event to all streams in a pipe:
a) my custom transform stream (StreamToBuffer)
b) standard file read stream
c) standard gunzip stream.
But unfortunately, none of them works (see code below). As far as I
try, only 'data' event works, but it does not help.
What I need is to continue with processing of the tailBuffer in StreamToBuffer class after the transformation is finished.
Can you suggest how to achive this?
The code (simplified for brevity):
function samplePipe() {
var streamToBuffer = new StreamToBuffer();
var readStream = fs.createReadStream(bgzFile1, { flags: 'r',
encoding: null,
fd: null,
mode: '0666',
autoClose: true
});
var gunzipTransform = zlib.createGunzip();
readStream.on('end', function() {
//not fired
console.log('end event readStream');
});
streamToBuffer.on('end', function() {
//not fired
console.log('end event streamBuffer');
});
gunzipTransform.on('end', function() {
//not fired
console.log('end event gunzipTransform');
});
readStream
.pipe(gunzipTransform)
.pipe(streamToBuffer)
;
}
StreamToBuffer:
function StreamToBuffer() {
stream.Transform.call(this);
this.tailBuffer = new Buffer(0);
}
util.inherits(StreamToBuffer, stream.Transform);
StreamToBuffer.prototype._transform = function(chunk, encoding, callback) {
this.tailBuffer = Buffer.concat([this.tailBuffer, chunk]);
console.log('streamToBuffer');
}
StreamToBuffer.prototype._flush = function(callback) {
callback();
}
module.exports = StreamToBuffer;
EDITED:
After playing a little with passing callback function to StreamToBuffer constructor, I have discovered the mistake - missing callback(); in _transform() method. After adding it, the event 'end' listener works, at least on standard read stream.
StreamToBuffer.prototype._transform = function(chunk, encoding, callback) {
this.tailBuffer = Buffer.concat([this.tailBuffer, chunk]);
console.log('streamToBuffer');
callback();
}
Another way is to pass callback function to StreamToBuffer constructor and then call it in _flush method. This has the advantage that we can be sure that the transformation is completed.
function samplePipe() {
var streamToBuffer = new StreamToBuffer(processBuffer);
.....
}
function processBuffer(buffer) {
console.log('processBuffer');
}
StreamToBuffer:
function StreamToBuffer(callback) {
stream.Transform.call(this);
this.tailBuffer = new Buffer(0);
this.finishCallback = callback;
}
util.inherits(StreamToBuffer, stream.Transform);
StreamToBuffer.prototype._transform = function(chunk, encoding, callback) {
this.tailBuffer = Buffer.concat([this.tailBuffer, chunk]);
console.log('streamToBuffer');
callback();
}
StreamToBuffer.prototype._flush = function(callback) {
console.log('flushed');
callback();
this.finishCallback(this.tailBuffer);
}
module.exports = StreamToBuffer;
ALthough I did not receive any answer yet (thanks for other comments, anyway), I think this question can be useful for the people like me, who are learning node. If you know better solution, pls answer. Thank you.
For writable stream, try finish event instead of end event:
streamToBuffer.on('finish', function() {
// finish event fired
});
I am using sax-js to read large xml files. I cannot get the program to exit when the parser is finished. Here is the shape of the script, with parser logic removed.
var fs = require('fs');
var sax = require('sax');
var feedFile = 'foo.xml';
var saxStream = sax.createStream(true)
.on('opentag', function(node) { // do stuff })
.on('end', function() {
console.log("parser end event");
});
var options = {
flags: 'r',
encoding: 'utf8',
mode: 0666,
bufferSize: 1024
};
fs.createReadStream(feedFile, options, function(err) {
throw err;
})
.on('end', function() {
console.log("read stream end event");
})
.pipe(saxStream);
Everything works properly. And the console gets the messages like this
read stream end event
parser end event
And then the process should exit. It does not. What am I missing?
EDIT: What I am missing.
There was a resource still open so that node could not exit. The temptation is to start trying to call process.exit() somewhere, anywhere. I tried it myself in this case and it broke stuff.
My script could not exit because I had an open mongodb connection (managed by mongoose). Here is essentially (not in detail) what fixed it:
saxStream.on('end', function() {
console.log("parser end event");
mongoose.disconnect();
};