I picked up some old stream code recently (written when 8.x was LTS) and attempted to update it to 12.x. This led to an interesting break in the way I dealt with ENOENT file errors.
Here's a simplification:
const { createServer } = require('http')
const { createReadStream } = require('fs')
const PORT = 3000
const server = createServer((req, res) => {
res.writeHead(200, {
'Content-Type': 'application/json'
})
const stream = createReadStream(`not-here.json`, {encoding: 'utf8'})
stream.on('error', err => {
stream.push(JSON.stringify({data: [1,2,3,4,5]}))
stream.push(null)
})
stream.pipe(res)
})
server.listen(PORT)
server.on('listening', () => {
console.log(`Server running at http://localhost:${PORT}/`)
})
In Node 8, the above code works fine. I'm able to intercept the error, write something to the stream and let it close normally.
In Node 10+ (tested 10, 12, and 13) the stream is already destroyed when my error callback is called. I can't push new things on the stream and handle the error gracefully for the client side.
Was this an intentional change and can I still handle this error in a nice way for the clint side?
One possibility. Open the file yourself and only create the stream with that already successfully opened file. That will allow you to handle ENOENT (or any other errors upon opening the file) before you get into the messy stream error handling mechanics. The stream architecture seems most aligned with aborting upon error, not recovering with some alternate behavior.
const { createServer } = require('http');
const fs = require('fs');
const PORT = 3000;
const server = createServer((req, res) => {
res.writeHead(200, {'Content-Type': 'application/json'});
fs.open('not-here.json', {encoding: 'utf8'}, (err, fd) => {
if (err) {
// send alternative response here
res.end(JSON.stringify({data: [1,2,3,4,5]}));
} else {
const stream = fs.createReadStream(null, {fd, encoding: 'utf8'});
stream.pipe(res);
}
});
});
server.listen(PORT);
server.on('listening', () => {
console.log(`Server running at http://localhost:${PORT}/`)
});
You could also try experimenting with the autoDestroy or autoClose options on your stream to see if any of those flags will allow the stream to still be open for you to push data into it, even if the file created an error opening or reading. The doc on those flags is not very complete so some combination of programming experiements and studying the code would be required to see if they could be manipulated to still add data to the stream after your stream got an error.
The answer by jfriend00 pointed me in the right direction.
Here are two different ways I solved this. I wanted a function that returned a stream rather than handle the error in the req handler function. This is more like what I'm actually doing in real code.
Handling error from stream:
Just like above except I took care to manually destroy the stream. Does this correctly take care of the internal file descriptor? I think it does.
const server = createServer((req, res) => {
res.writeHead(200, {
'Content-Type': 'application/json'
})
getStream().pipe(res)
})
function getStream() {
const stream = createReadStream(`not-here.json`, {
autoClose: false,
encoding: 'utf8'
})
stream.on('error', err => {
// handling "no such file" errors
if (err.code === 'ENOENT') {
// push JSON data to stream
stream.push(JSON.stringify({data: [1,2,3,4,5]}))
// signal the end of stream
stream.push(null)
}
// destory/close the stream regardless of error
stream.destroy()
console.error(err)
})
return stream
}
Handling the error during file open:
Like jfriend00 suggests.
const { promisify } = require('util')
const { Readable } = require('stream')
const { open, createReadStream } = require('fs')
const openAsync = promisify(open)
const server = createServer(async (req, res) => {
res.writeHead(200, {
'Content-Type': 'application/json'
})
const stream = await getStream()
stream.pipe(res)
})
async function getStream() {
try {
const fd = await openAsync(`not-here.json`)
return createReadStream(null, {fd, encoding: 'utf8'})
} catch (error) {
console.log(error)
// setup new stream
const stream = new Readable()
// push JSON data to stream
stream.push(JSON.stringify({data: [1,2,3,4,5]}))
// signal the end of stream
stream.push(null)
return stream
}
}
I still like handling in the stream better but would love to hear reasons why you might do it one way or the other.
Related
I'm learning Node.js and while touching events and streams topic I encountered this example:
const http = require('http');
const fs = require('fs');
http
.createServer((req, res) => {
const fileStream = fs.createReadStream('./some-data.txt', 'utf8');
fileStream.on('open', () => {
fileStream.pipe(res);
});
fileStream.on('error', (err) => {
res.end(err);
});
})
.listen(5000);
While I understand the logic behind this code (server transfers some big data in chunks with the help of streams), I don't understand the benefit of using 'open' event.
Same code, where I just pipe data from read stream into write one (res object) right after creating fileStream without listening to 'open', produces exactly same result. What is the difference and in which cases should I listen to 'open'?
const http = require('http');
const fs = require('fs');
http
.createServer((req, res) => {
const fileStream = fs.createReadStream('./some-data.txt', 'utf8');
fileStream.pipe(res);
fileStream.on('error', (err) => {
res.end(err);
});
})
.listen(5000);
In this particular case, it does not really make any difference if you use the open event or not. If the file does not exist, the error event will be emitted anyway and you will send it via res.end(err.message) (not that res.end(err) fails because the err is an object).
The open event tells us that the stream is open so that we can handle that in the code.
I am looking at a few, currently widely used request libraries and how I could use them to automate file downloads + make them reliable enough.
I stumbled over download (npm), but since its based on got (npm) I thought I would try got first directly.
Problem
One problem I could encounter while downloading a file, is that the source file (on the server) could be overwritten during download. When I try reproduce this behaviour with got, got just stops the download process without rising any errors.
What I have so far
The only solution I could come up with, was to use got.stream - piping the request into a FileWriter, and compare total with transferred after the request has ended.
const app = require('express')();
const fs = require('fs');
const stream = require('stream');
const { promisify } = require('util');
const got = require('got');
const pipeline = promisify(stream.pipeline);
app.use('/files', require('express').static('files'));
app.listen(8080);
(async () => {
try {
let progress = null;
// Setup Got Request + EventHandlers
const request = got.stream('http://localhost:8080/files/1gb.test')
.on('downloadProgress', (p) => { progress = p; })
.on('end', () => {
console.log("GOT END");
console.log(progress && progress.transferred === progress.total ? "COMPLETE" : "NOTCOMPLETE");
})
// this does not get fired when source file is overwritten
.on('error', (e) => {
console.log("GOT ERROR");
console.log(e.message);
});
// WriteStream + EventHandlers
const writer = fs.createWriteStream('./downloaded/1gb_downloaded.test')
.on('finish', () => {
console.log("WRITER FINISH");
})
.on('error', (error) => {
console.log("WRITER ERROR", error.message);
})
.on('end', () => {
console.log("WRITER END");
})
.on('close', () => {
console.log("WRITER CLOSE");
});
await pipeline(request, writer);
} catch (e) {
console.error(e.name, e.message);
}
})();
Where do the files come from
In the real world the files i am trying to download are coming from a server which I do not have access to, I don't own it. I don't have any information how this server is setup. However I added a simple local express server to the example code above to try things out.
const app = require('express')();
app.use('/files', require('express').static('files'));
app.listen(8080);
Question
Is this solution reliable enough to detect a "none-finished" download ( so for the case the source file gets overwritten during download ) ? Or are there any othere events I could listen to which I missed ?
The got Request stream emits a error event whenever something goes wrong.
const request = got('http://localhost:8080/files/1gb.test')
.on('downloadProgress', (p) => { progress = p; })
.on('end', (e) => {
console.log("GOT END");
})
.on('error', (err) => {
// Handle error here
});
Various properties in the error object are available here
progress.total will not be available unless the server explicity sets the Content-Length (Most servers do, but you might want to have a look out for that)
It seems there is no inbuilt way to safely check if a download has been completed 100% using got. I came to the conclusion that my best option for now would be to use the NodeJS Module http, which includes, on the ClientRequest Object, an aborted property. When the ReadStream emits an end event I can check whether aborted is true or false.
I tested this method in the case when the source file gets overwritten during download, it works !
const http = require('http');
const app = require('express')();
const fs = require('fs');
app.use('/files', require('express').static('files'));
app.listen(8080);
http.get('http://localhost:8080/files/1gb.test', function (response) {
// WriteStream + EventHandlers
const writer = fs.createWriteStream('./downloaded/1gb_downloaded.test')
.on('finish', () => {
console.log("WRITER FINISH");
})
.on('error', (error) => {
console.log("WRITER ERROR", error.message);
})
.on('end', () => {
console.log("WRITER END");
})
.on('close', () => {
console.log("WRITER CLOSE");
});
// ReadStream + EventHandlers
response
.on('error', (e) => {
console.log("READER ERROR", e.message)
})
.on('end', () => {
console.log("READER END")
console.log(response.aborted ? "DOWNLOAD NOT COMPLETE" : "DOWNLOAD COMPLETE")
})
.on('close', () => {
console.log("READER CLOSE")
})
response.pipe(writer);
});
On the upside, this gives me -1 on dependencies :) , since I don't need got.
On the downside, this just assures me, that a running download was not aborted due to the source file being overwritten. When using http module I need to include more error handling when for example the file was not found to begin with, which had been more convenient using a request library like axios or got.
UPDATE
Realizing that the ReadableStream from http has something like the aborted property made me wonder why none of the request wrapper libraries like got does offer something similar. So I tried axios again, with :
axios({
method: 'get',
url: 'http://localhost:8080/files/1gb.test',
responseType: 'stream'
}).then( function ( response ) {
});
Here the ReadableStream comes in response.data and it has the same aborted property ! 🎉 .
I am trying to fetch pdf url as stream from axios. I need to further upload that file to another location and return the hash of the uploaded file. I have third party function which accepts the stream, and upload file to target location. How can I use same stream to get the hash of the file?
I am trying to run below code:
const getFileStream = await axios.get<ReadStream>(externalUrl, {
responseType: "stream"
});
const hashStream = crypto.createHash("md5");
hashStream.setEncoding("hex");
const pHash = new Promise<string>(resolve => {
getFileStream.data.on("finish", () => {
resolve(hashStream.read());
});
});
const pUploadedFile = externalUploader({
stream: () => getFileStream.data
});
getFileStream.data.pipe(hashStream);
const [hash, uploadedFile] = await Promise.all([pHash, pUploadedFile]);
return { hash, id: uploadedFile.id };
After running this code, when I download the same pdf, I am getting corrupted file
You can reuse the same axios getFileStream.data to pipe to multiple sinks as long as they are consumed simultaneously.
Below is an example of downloading a file using an axios stream and "concurrently" calculating the MD5 checksum of the file while uploading it to a remote server.
The example will output stdout:
Incoming file checksum: 82c12f208ea18bbeed2d25170f3669a5
File uploaded. Awaiting server response...
File uploaded. Done.
Working example:
const { Writable, Readable, Transform, pipeline } = require('stream');
const crypto = require('crypto');
const https = require('https');
const axios = require('axios');
(async ()=>{
// Create an axios stream to fetch the file
const axiosStream = await axios.get('https://upload.wikimedia.org/wikipedia/commons/thumb/8/86/Map_icon.svg/128px-Map_icon.svg.png', {
responseType: "stream"
});
// To re-upload the file to a remote server, we can use multipart/form-data which will require a boundary key
const key = crypto.randomBytes(16).toString('hex');
// Create a request to stream the file as multipart/form-data to another server
const req = https.request({
hostname: 'postman-echo.com',
path: '/post',
method: 'POST',
headers: {
'content-type': `multipart/form-data; boundary=--${key}`,
'transfer-encoding': 'chunked'
}
});
// Create a promise that will be resolved/rejected when the remote server has completed the HTTP(S) request
const uploadRequestPromise = new Promise(resolve => req.once('response', (incomingMessage) => {
incomingMessage.resume(); // prevent response data from queuing up in memory
incomingMessage.on('end', () => {
if(incomingMessage.statusCode === 200){
resolve();
}
else {
reject(new Error(`Received status code ${incomingMessage.statusCode}`))
}
});
}));
// Construct the multipart/form-data delimiters
const multipartPrefix = `\r\n----${key}\r\n` +
'Content-Disposition: form-data; filename="cool-name.png"\r\n' +
'Content-Type: image/png\r\n' +
'\r\n';
const multipartSuffix = `\r\n----${key}--`;
// Write the beginning of a multipart/form-data request before streaming the file content
req.write(multipartPrefix);
// Create a promise that will be fulfilled when the file has finished uploading
const uploadStreamFinishedPromise = new Promise(resolve => {
pipeline(
// Use the axios request as a stream source
axiosStream.data,
// Piggyback a nodejs Transform stream because of the convenient flush() call that can
// add the multipart/form-data suffix
new Transform({
objectMode: false,
transform( chunk, encoding, next ){
next( null, chunk );
},
flush( next ){
this.push( multipartSuffix );
next();
}
}),
// Write the streamed data to a remote server
req,
// This callback is executed when all data from the stream pipe has been processed
(error) => {
if( error ){
reject( error );
}
else {
resolve();
}
}
)
});
// Create a MD5 stream hasher
const hasher = crypto.createHash("md5");
// Create a promise that will be resolved when the hash function has processed all the stream
// data
const hashPromise = new Promise(resolve => pipeline(
// Use the axios request as a stream source.
// Note that it's OK to use the same stream to pipe into multiple sinks. In this case, we're
// using the same axios stream for both calculating the haas, and uploading the file above
axiosStream.data,
// The has function will process stream data
hasher,
// This callback is executed when all data from the stream pipe has been processed
(error) => {
if( error ){
reject( error );
}
else {
resolve();
}
}
));
/**
* Note that there are no 'awaits' before both stream sinks have been established. That is
* important since we want both sinks to process data from the beginning of stream
*/
// We must wait to call the hash function's digest() until all the data has been processed
await hashPromise;
const hash = hasher.digest("hex");
console.log("Incoming file checksum:", hash);
await uploadStreamFinishedPromise;
console.log("File uploaded. Awaiting server response...");
await uploadRequestPromise;
console.log("File uploaded. Done.");
})()
.catch( console.error );
Is there a way that using express a route consumer can send an input stream to the endpoint and read it?
In short I want the endpoint user upload a file by streaming it instead of the multipart/form way. Something like:
app.post('/videos/upload', (request, response) => {
const stream = request.getInputStream();
const file = stream.read();
stream.on('done', (file) => {
//do something with the file
});
});
Is it possible to do it?
In Express, the request object is an enhanced version of http.IncomingMessage, which "...implements the Readable Stream interface".
In other words, request is already a stream:
app.post('/videos/upload', (request, response) => {
request.on('data', data => {
...do something...
}).on('close', () => {
...do something else...
});
});
If your intention is to first read the entire file into memory (probably not), you can also use bodyParser.raw():
const bodyParser = require('body-parser');
...
app.post('/videos/upload', bodyParser.raw({ type : '*/*' }), (request, response) => {
let data = req.body; // a `Buffer` containing the entire uploaded data
...do something...
});
My client sends an image file to the server. It works 5 times and then it suddenly stops. I am pretty new using streams and pipe so I am not sure what I am doing wrong.
Server Code
http.createServer(function(req, res) {
console.log("File received");
// This opens up the writeable stream to `output`
var name = "./test"+i+".jpg";
var writeStream = fs.createWriteStream(name);
// This pipes the POST data to the file
req.pipe(writeStream);
req.on('end', function () {
console.log("File saved");
i++;
});
// This is here incase any errors occur
writeStream.on('error', function (err) {
console.log(err);
});
}).listen(3000);
Client code
var request = require('request');
var fs = require('fs');
setInterval(function () {
var readStream = fs.createReadStream('./test.jpg');
readStream.on('open', function () {
// This just pipes the read stream to the response object (which goes to the client)
readStream.pipe(request.post('http://192.168.1.100:3000/test'));
console.log("Send file to server");
});
}, 1000);
Behaves like a resource exhaustion issue. Not sure which calls throw errors and which just return. Does the server connect on the 6th call? Does the write stream open? Does the pipe open?
Try ending the connection and closing the pipe after the image is saved. Maybe close the write stream too, don't remember if node garbage collects file descriptors.
I had to do the following on the server side to make this work :
res.statusCode = 200;
res.end();