Node.js - Wait until file is completely written - node.js

I'm using child_process to run wkhtmltopdf to build a PDF from an html document. I want to wait until wkhtmltopdf is finished processing the document into a PDF before I proceed. I think that reading from stdout to capture when wkhtmltopdf sends its done signal is the best way to do this, but the following code reports stdout to be empty at res.send(). How do I set up an event to fire when stdout gives me data?
Code:
var buildPdf = function(error){
var pdfLetter;
var child = exec('wkhtmltopdf temp.html compensation.pdf', function(error, stdout, stderr) {
if (error)
res.send({
err:error.message
});
else
res.send({
output : stdout.toString()
});
// sendEmail();
});
};

You've encountered a wkhtmltopdf gotcha. It doesn't write status information to STDOUT, it writes it to STDERR.
$ node -e \
"require('child_process').exec( \
'wkhtmltopdf http://stackoverflow.com/ foo.pdf', \
function(err, stdout, stderr) { process.stdout.write( stderr ); } );"
Loading pages (1/6)
content-type missing in HTTP POST, defaulting to application/octet-stream
Counting pages (2/6)
Resolving links (4/6)
Loading headers and footers (5/6)
Printing pages (6/6)
Done

I've just finished getting this to work via Node.js on Heroku and wanted to post since I had to get over a couple of tiny hurdles.
// Spin up a new child_process to handle wkhtmltopdf.
var spawn = require('child_process').spawn;
// stdin/stdout, but see below for writing to tmp storage.
wkhtmltopdf = spawn('./path/to/wkhtmltopdf', ['-', '-']);
// Capture stdout (the generated PDF contents) and append it to the response.
wkhtmltopdf.stdout.on('data', function (data) {
res.write(data);
});
// On process exit, determine proper response depending on the code.
wkhtmltopdf.on('close', function (code) {
if (code === 0) {
res.end();
} else {
res.status(500).send('Super helpful explanation.');
}
});
res.header('Content-Type', 'application/octet-stream');
res.header('Content-Disposition', 'attachment; filename=some_file.pdf');
res.header('Expires', '0');
res.header('Cache-Control', 'must-revalidate, post-check=0, pre-check=0');
// Write some markup to wkhtmltopdf and then end the process. The .on
// event above will be triggered and the response will get sent to the user.
wkhtmltopdf.stdin.write(some_markup);
wkhtmltopdf.stdin.end();
On the Cedar-14 stack at Heroku, I couldn't get wkhtmltopdf to write to stdout. The server always responded with Unable to write to destination. The trick there was to write to ./.tmp and then stream the written file back out to the user – easy enough:
wkhtmltopdf = spawn('./path/to/wkhtmltopdf', ['-', './.tmp/some_file.pdf']);
wkhtmltopdf.on('close', function (code) {
if (code === 0) {
// Stream the file.
fs.readFile('./.tmp/some_file.pdf', function(err, data) {
res.header('Content-Type', 'application/octet-stream');
res.header('Content-Disposition', 'attachment; filename=' + filename);
res.header('Expires', '0');
res.header('Cache-Control', 'must-revalidate, post-check=0, pre-check=0');
res.send(data);
});
} else {
res.status(500).send('Super helpful explanation.');
}
});
res.header('Content-Type', 'application/octet-stream');
res.header('Content-Disposition', 'attachment; filename=' + filename);
res.header('Expires', '0');
res.header('Cache-Control', 'must-revalidate, post-check=0, pre-check=0');

Related

Node.js Streaming/Piping Error Handling (Change Response Status on Error)

I have millions of rows in my Cassandra db that I want to stream to the client in a zip file (don't want a potentially huge zip file in memory). I am using the stream() function from the Cassandra-Node driver, piping to a Transformer which extracts the one field from each row that I care about and appends a newline, and pipes to archive which pipes to the Express Response object. This seems to work fine but I can't figure out how to properly handle errors during streaming. I have to set the appropriate headers/status before streaming for the client, but if there is an error during the streaming, on the dbStream for example, I want to clean up all of the pipes and reset the response status to be something like 404. But If I try to reset the status after the headers are set and the streaming starts, I get Can't set headers after they are sent. I've looked all over and can't find how to properly handle errors in Node when piping/streaming to the Response object. How can the client tell if valid data was actually streamed if I can't send a proper response code on error? Can anyone help?
function streamNamesToWriteStream(query, res, options) {
return new Promise((resolve, reject) => {
let success = true;
const dbStream = db.client.stream(query);
const rowTransformer = new Transform({
objectMode: true,
transform(row, encoding, callback) {
try {
const vote = row.name + '\n';
callback(null, vote);
} catch (err) {
callback(null, err.message + '\n');
}
}
});
// Handle res events
res.on('error', (err) => {
logger.error(`res ${res} error`);
return reject(err);
});
dbStream.on('error', function(err) {
res.status(404).send() // Can't set headers after they are sent.
logger.debug(`dbStream error: ${err}`);
success = false;
//res.end();
//return reject(err);
});
res.writeHead(200, {
'Content-Type': 'application/zip',
'Content-disposition': 'attachment; filename=myFile.zip'
});
const archive = archiver.create('zip');
archive.on('error', function(err) { throw err; });
archive.on('end', function(err) {
logger.debug(`Archive done`);
//res.status(404).end()
});
archive.pipe(res, {
//end:false
});
archive.append(dbStream.pipe(rowTransformer), { name: 'file1.txt' });
archive.append(dbStream.pipe(rowTransformer), { name: 'file1.txt' });
archive.finalize();
});
}
Obviously it's too late to change the headers, so there's going to have to be application logic to detect a problem. Here's some ideas I have:
Write an unambiguous sentinel of some kind at the end of the stream when an error occurs. The consumer of the zip file will then need to look for that value to check for a problem.
Perhaps more simply, have the consumer execute a verification on the integrity of the zip archive. Presumably if the stream fails the zip will be corrupted.

Correct way to remove temp file after reponse has been sent

I have a simple Express 4 based API that outputs an MP3 file that was generated based on some provided parameters. However, I seem to be unable to delete this temporary MP3 file after the response has been sent out.
What I have:
app.get('/endpoint', function(request, response) {
// Distill parameters from request and create tempFileMp3
var stat = Fs.statSync(tempFileMp3);
response.writeHead(status, {
'Content-Type': 'audio/mpeg',
'Content-Length': stat.size
});
stream = Fs.createReadStream(tempFileMp3);
stream.pipe(response);
});
At first I was hoping that I would be able to delete the tempFileMp3 in a stream event, either the end or finish events:
stream
.on('end', function() {
Fs.unlinkSync(tempFileMp3);
})
.on('close', function() {
Fs.unlinkSync(tempFileMp3);
})
;
But neither the end nor close events are fired.
How can I delete the temporary MP3 file after the response has been sent?
Try capture response finish:
res.on('finish', function() {
// remove temp files
});
res.on('error', function() {
// remove temp files
});

busboy - is there a way to send the response when all files have been uploaded?

I'm trying to upload files to a server using node.js as backend and angular.js as frontend. I'm using express 4 + busboy for this. I have a table in the frontend where I should display all the files I'm uploading. So if I have 3 files and click on upload, angular should post these files to node.js and after getting the response back, refresh the table with those three files.
This is the function I'm using in angular:
function uploadFiles(files){
var fd = new FormData();
for(var i = 0; i<files.length; i++){
fd.append("file", files[i]);
}
$http.post('http://localhost:3000/upload', fd, {
withCredentials: false,
headers: {'Content-Type': undefined },
transformRequest: angular.identity
}).success(refreshTable()).error(function(){
console.log("error uploading");
});
}
and this is from node.js:
app.post('/upload', function(req, res) {
var busboy = new Busboy({ headers: req.headers });
busboy.on('file', function (fieldname, file, filename) {
console.log("Uploading: " + filename);
var fstream = fs.createWriteStream('./files/' + filename);
file.pipe(fstream);
});
busboy.on('finish', function(){
res.writeHead(200, { 'Connection': 'close' });
res.end("");
});
return req.pipe(busboy);
});
the problem is that if I upload three files, as soon as the first file has been uploaded node.js sends the response and hence the table is updated only with the first file uploaded, if I refresh the page, the rest of the files appear.
I think the problem is with this line in node: return req.pipe(busboy); if I remove that line, the post response keeps on pending for a long time and nothing happens, I think this is an async problem, anybody knows if there's a way to send the response back only when all files have been uploaded?
thanks
A simple and common solution to this particular problem is to use a counter variable and listening for the finish event on the fs Writable stream. For example:
app.post('/upload', function(req, res) {
var busboy = new Busboy({ headers: req.headers });
var files = 0, finished = false;
busboy.on('file', function (fieldname, file, filename) {
console.log("Uploading: " + filename);
++files;
var fstream = fs.createWriteStream('./files/' + filename);
fstream.on('finish', function() {
if (--files === 0 && finished) {
res.writeHead(200, { 'Connection': 'close' });
res.end("");
}
});
file.pipe(fstream);
});
busboy.on('finish', function() {
finished = true;
});
return req.pipe(busboy);
});
The reason for this is that busboy's finish event is emitted once the entire request has been fully processed, that includes files. However, there is some delay between when there is no more data to write to a particular file and when the OS/node has flushed its internal buffers to disk (and the closing of the file descriptor). Listening for the finish event for a fs Writable stream lets you know that the file descriptor has been closed and no more writes are going to occur.

Writing PDF to a browser failing in Nodejs

I'm trying to read a PDF from a URL and display it to a user's browser (via the passed in 'response' object). I've tried to use the code below and it works sometimes, but generally fails:
function writePdfToBrowser(url, response) {
http.get(url, function(res) {
logger.verbose('about to start download...');
var chunks = [];
res.on('data', function(chunk) {
chunks.push(chunk);
});
res.on("end", function() {
logger.verbose('downloaded');
var buffer = new Buffer.concat(chunks);
//write downloaded pdf to the original response
response.write(buffer);
//response.send(buffer);
response.end();
});
}).on("error", function() {
logger.error("error!");
});
}
In the new page where I attempted to load the pdf it would just say "Failed to load pdf".
I'm new to Node, so not sure where the problem lies, any ideas? Anyone have any working code to do the same thing?
Thank you for any help!
Mark
Use piping:
function pipe(url, res) {
var request = http.get(url, function(response) {
res.writeHead(response.statusCode, response.headers)
response.pipe(res);
});
request.on('error', function(error){
res.statusCode = 500;
res.end(error.message);
});
}
... and please provide next time more information about what and how it fails, some logs, inspect response im browser before. And so on..

Node.js: serving dynamic pdf, coming up empty

I have a node service that fetches a pdf from an API and serves that pdf.
When I curl or directly open the API, I do see the correct pdf.
But when I serve it from my Node app, I get an empty pdf.
Here's the section of my code that does the pdf render.
} else if (options.type === 'pdf') {
res.writeHead(200, {'content-type' : 'application/pdf', 'content-disposition': 'attachment; filename=invoice.pdf'});
res.end(data.invoice);
I've console.log'ed data.invoice to know it's the right stuff.
typeof(data.invoice) gives string; but I've also tried res.end(new Buffer(data.invoice)); which didn't work either.
Here's the section of my code that fetches the data
var http_options = {
method : options.method
, host : Config.API.host
, path : options.path
, port : Config.API.port
, headers : options.headers
};
var req = http.request(http_options, function (response) {
var raw_response = "";
response.on('data', function (response_data) {
raw_response += response_data.toString();
});
response.on('end', function () {
if (response.statusCode !== 200) {
cb(raw_response);
} else {
cb(false, raw_response);
}
});
});
req.setTimeout(timeout, function () {
req.abort();
cb("API connection timed out");
});
req.on('error', function (error) {
cb("API error while requesting for " + options.path + '\n' + error + '\n' + "http options: " + JSON.stringify(http_options)
});
req.end();
It's quite likely that the toString() and concatenation when you're receiving the PDF are corrupting it. Try writing raw_response to a file (you can use writeFileSync() since this is just a one-time test) and doing a byte-for-byte comparison with the same PDF retrieved with curl.
Note that if the process of string conversion has corrupted it, trying to convert it back to a buffer before sending it won't help. You'll have to keep the whole thing as a buffer from start to finish.
Since you don't intend to modify or read this data in transit, I suggest just using the pipe function to pipe all the data coming in from response out to req. this question has a good sample, but here's an excerpt.
req.on('response', function (proxy_response) {
proxy_response.pipe(response);
response.writeHead(proxy_response.statusCode, proxy_response.headers);
});
Note that there's no reason to convert the chunks coming in from the response from Buffers to something else, just write them through as unmodified buffers, and stream them (this is what pipe will do for you) instead of accumulating them to get maximum efficiency (and node.js streaming hipster points).

Resources