node PDFkit blank pages - node.js

i'm creating a PDF with node.js and this package : https://github.com/devongovett/pdfkit
My problem is that when i download the pdf on the browser it is totaly blanck...
server-side code :
PDFDocument = require('pdfkit');
function creaEtichetta(req, res){
doc = new PDFDocument
size: 'a4'
bufferPages: true
doc.addPage().fontSize(25).text('Here is some vector graphics...', 100, 100);
doc.save()
.moveTo(100, 150)
.lineTo(100, 250)
.lineTo(200, 250)
.fill("#FF3300");
doc.addPage().fillColor("blue").text('Here is a link!', 100, 100).link(100, 100, 160, 27, 'http://google.com/')
doc.pipe(res);
doc.end();
}
exports.creaEtichetta = creaEtichetta;
client-side code :
var data = {};
data.azione = "getEtichettaProdotto";
//Scarico i dati anagrafica
$.ajax({
type: 'POST',
data: JSON.stringify(data),
contentType: 'application/json',
url: 'http://46.101.209.16/endpoint',
success: function(etichettas) {
var blob=new Blob([etichettas]);
var link=document.createElement('a');
link.href=window.URL.createObjectURL(blob);
link.download="Label"+".pdf";
link.click();
}//SUCCESS
});
sorry for bad english, i'm italian

It could be that the binary characters in your pdf aren't being correctly coded in the transfer, which would explain why locally its ok but not when transferred - pdfs are a mix of ascii and binary characters, and if the binary is corrupted it seems that you get a blank pdf.
That's likely to be a browser side issue, this approach worked for me:
https://stackoverflow.com/a/27442914/2900643
Coupled with this:
https://stackoverflow.com/a/33818646/2900643
EDIT: Better still use: https://github.com/eligrey/FileSaver.js/
Server:
var doc = new PDFDocument();
doc.pipe(res);
doc.circle(280, 200, 50).fill("#6600FF");
doc.end();
Browser:
angular.module('app')
.service('PdfService', function($http) {
var svc = this;
svc.getPdf = function() {
return $http.get('/getpdf/', { responseType : 'arraybuffer' });
};
});
angular.module('app')
.controller('PdfCtrl', function($scope, PdfService) {
$scope.getPdf = function() {
PdfService.getPdf().success(function(data) {
var fileName = 'hello.pdf';
var pdf = new Blob([data], {type : 'application/pdf'});
saveAs(pdf, fileName);
})
};
});

I was running in the same issue but without any client-side code involved. So this issue isn't simply client-side. Obviously, the server-side response PDFKit is piping into isn't encoding "binary" as requested by PDFKit, but applying "utf8".
Sadly, as of today there is no way of assigning some default encoding to the stream provided by ServerResponse. See https://github.com/nodejs/node/issues/14146
In my case I was working around this issue for now by collecting chunked output from PDFKit and writing at once. Instead of
var doc = new PDF();
res.type( "pdf" ).attachment( "test.pdf" );
doc.pipe( res );
I am using this:
var doc = new PDF();
res.type( "pdf" ).attachment( "test.pdf" );
var buffers = [];
doc.on( "data", function( chunk ) { buffers.push( chunk ); } );
doc.on( "end", function() {
res.end( Buffer.concat( buffers ), "binary" );
} );
This comes with a disadvantage: since all PDFs are cached in memory this code has an impact on server side memory consumption when it comes to high number of simultaneous requests or to generating huge PDF files.
Confusingly, trying to hand over PDFKit output chunk by chunk didn't work again:
var doc = new PDF();
res.type( "pdf" ).attachment( "test.pdf" );
doc.on( "data", function( chunk ) { res.write( chunk, "binary" ); } );
doc.on( "end", function() { res.end(); } );

Related

Questions regarding pdf to image conversion with Node JS

The plan is to create a pdf file (that only consists of a single page) then the user chooses whether to download as PDF or image. I have already written the code for generating the PDF and it is working fine as of now. The problem now is how to convert this to image. Is it possible to convert files without installing stuff like Ghostscript etc?
I am a complete noob, advice is greatly appreciated. (Recommendations on which libraries to use would also be helpful)
Code for generating the PDF
import PDFDocument from "pdfkit";
static async medicalPrescription(req, res) {
// Some code for generating the PDF contents...
filename = encodeURIComponent(filename) + '.pdf'
res.setHeader('Content-disposition', 'attachment; filename="' + filename + '"')
res.setHeader('Content-type', 'application/pdf')
const content = req.body.content
doc.y = 300
doc.text(content, 50, 50)
doc.pipe(res)
doc.end()
}
The client then receives the generated file and opens it in another tab.
React file that sends the request and opens the response
const handleSubmit = async (e) => {
// Some code for sending the pdf content from the user
fetch("http://localhost:5050/generate-rx", {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: parsed
})
.then(async res => {
if (res.status === 200) {
const blob = await res.blob();
const file = new Blob(
[blob],
{type: 'application/pdf'}
);
const fileURL = URL.createObjectURL(file);
window.open(fileURL);
}
})
}
You can use pdf2pic. It can convert pdf to image.
import { fromPath } from "pdf2pic";
const options = {
density: 100,
saveFilename: "untitled",
savePath: "./images",
format: "png",
width: 600,
height: 600
};
const storeAsImage = fromPath("/path/to/pdf/sample.pdf", options);
const pageToConvertAsImage = 1;
storeAsImage(pageToConvertAsImage).then((resolve) => {
console.log("Page 1 is now converted as image");
console.log(resolve); // send resolve to user
});

Stream audio to Azure speech api by node.js on browser

I'm making a demo of speech to text using Azure speech api on browser by node.js. According to API document here, it does specify that it need .wav or .ogg files. But the example down there does a api call through sending byte data to api.
So I've already get my data from microphone in byte array form. Is it the right path to convert it to byte and send it to api? Or is it better for me to save it as a .wav file then send to the api?
So below is my code.
This is stream from microphone part.
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => { handlerFunction(stream) })
function handlerFunction(stream) {
rec = new MediaRecorder(stream);
rec.ondataavailable = e => {
audioChunks.push(e.data);
if (rec.state == "inactive") {
let blob = new Blob(audioChunks, { type: 'audio/wav; codec=audio/pcm; samplerate=16000' });
recordedAudio.src = URL.createObjectURL(blob);
recordedAudio.controls = true;
recordedAudio.autoplay = true;
console.log(blob);
let fileReader = new FileReader();
var arrayBuffer = new Uint8Array(1024);
var reader = new FileReader();
reader.readAsArrayBuffer(blob);
reader.onloadend = function () {
var byteArray = new Uint8Array(reader.result);
console.log("reader result" + reader.result)
etTimeout(() => getText(byteArray), 1000);
}
}
}
}
This is api call part
function getText(audio, callback) {
console.log("in function audio " + audio);
console.log("how many byte?: " + audio.byteLength)
const sendTime = Date.now();
fetch('https://westus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US', {
method: "POST",
headers: {
'Accept': 'application/json',
'Ocp-Apim-Subscription-Key': YOUR_API_KEY,
// 'Transfer-Encoding': 'chunked',
// 'Expect': '100-continue',
'Content-type': 'audio/wav; codec=audio/pcm; samplerate=16000'
},
body: audio
})
.then(function (r) {
return r.json();
})
.then(function (response) {
if (sendTime < time) {
return
}
time = sendTime
//callback(response)
}).catch(e => {
console.log("Error", e)
})
}
It returns with 400 (Bad Request) and says :
{Message: "Unsupported audio format"}
Reason:
Note you're not creating a MediaRecorder with a audio/wav mimeType by
new Blob(audioChunks,{type:'audio/wav; codec=audio/pcm; samplerate=16000'})
This statement is only a description for blob. I test my Chrome(v71) with isTypeSupported:
MediaRecorder.isTypeSupported("audio/wav") // return false
MediaRecorder.isTypeSupported("audio/ogg") // return false
MediaRecorder.isTypeSupported("audio/webm") // return true
It seems that the MediaRecorder will only record the audio in audio/webm. Also, when I run the following code on Chrome , the default rec.mimeType is audio/webm;codecs=opus
rec = new MediaRecorder(stream);
According to the Audio formats Requiremnts, the audio/webm is not supported yet.
Approach:
Before calling getText() we need convert the webm to wav firstly. There're quite a lot of libraries that can help us do that. I just copy Jam3's script before your code to convert webm to wav :
// add Jam3's script between Line 2 and Line 94 or import that module as you like
// create a audioContext that helps us decode the webm audio
var audioCtx = new (window.AudioContext || window.webkitAudioContext)();
rec = new MediaRecorder(stream,{
mimeType : 'audio/webm',
codecs : "opus",
});
// ...
rec.ondataavailable = e => {
audioChunks.push(e.data);
if (rec.state == "inactive") {
var blob = new Blob(audioChunks, { 'type': 'audio/webm; codecs=opus' });
var arrayBuffer;
var fileReader = new FileReader();
fileReader.onload = function(event) {
arrayBuffer = event.target.result;
};
fileReader.readAsArrayBuffer(blob);
fileReader.onloadend=function(d){
audioCtx.decodeAudioData(
fileReader.result,
function(buffer) {
var wav = audioBufferToWav(buffer);
setTimeout(() => getText(wav), 1000);
},
function(e){ console.log( e); }
);
};
}
}
And it works fine for me :
As a side note, I suggest you should use your backend to invoke the speech-to-text services. Never invoke azure stt service in a browser. That's because exposing your subscription key to front end is really dangerous. Anyone could inspect the network and steal your key.

How to handle client downloading gzip served from Node.js server app?

I have a question about handling a gzip response on my client side application. I would like the client's browser to pop up an alert "how do you want to handle?" download prompt.
My Node.js server is compressing my files into a gzip format then sending it with a HTTP write response. My client receives a HTTP 200 status although the size of the response is very small compared to my file and nothing doesn't populate my web app. I have anticipated the browser to handle this sort of response to a server sending gzip. similar to how gmail handles downloading files. Can you help me to see if I have missed anything?
server.js
var server = http.createServer(function(request, response) {
if (request.url === '/download'){
let data_zip = retrievedata()
const scopedata_zip = ('./scopedata.txt.gz')
response.writeHead(200, { 'Content-Encoding': 'gzip' });
response.writeHead(200, { 'Content-Type': 'application/javascript' });
response.write(scopedata_zip);
}
})
var retrievedata = () =>{
const gzip = zlib.createGzip();
const inp = fs.createReadStream('scopedata.txt');
const out = fs.createWriteStream('scopedata.txt.gz');
inp.pipe(gzip).pipe(out);
return out
}
Client.js
var downloadData=()=>{
var xhr = new XMLHttpRequest();
xhr.open('POST', 'download', true);
//xhr.setRequestHeader("Accept-Encoding", "gzip")
xhr.setRequestHeader("Encoding", "null")
xhr.onload = function (){
if(this.status == 200){
let form = document.createElement("form");
let element1 = document.createElement("input");
document.body.appendChild(form);
let response = this.responseText
console.log(response)
document.getElementById("state").innerHTML = 'download'
document.getElementById("index").innerHTML = response;
// document.getElementById("state").appendChild(form)
}
}
xhr.onerror = function(err){
console.log("request error...",err)
}
xhr.send()
}
The client is just populating my index div the response to, but nothing is received.
my gzip file is 327mb.
Chrome inspector network says this request is only 170B so I am not receiving my file.
Note xhr.setRequestHeader("Accept-Encoding", "gzip") is commented out becuase I get this error: Refused to set unsafe header "Accept-Encoding". I have set it to null to allow the browser to handle this.
Any input on what I am doing wrong?
There were three things I was doing wrong. I managed to get the browser window by creating a new element, checking if the element has a download attribute and appending the XHR.Response as the location from the href. The second portion of my issue was not receiving the zip file with the appropriate request headers. Because my zip file was a larger size the browser handles the binary buffer stream as a blob. Read more about XHR response types XHR.response. The other issue was on my server side which was using fs.readFile to read the zip as a buffer. Because my zip was made up of multiple files fs.readFile it would stop reading as it hit the end of the first file.
so my client code looks like
var xhr = new XMLHttpRequest();
document.getElementById("state").innerHTML = ' '
document.getElementById("index").innerHTML = ' ';
xhr.open('POST', 'download', true);
xhr.setRequestHeader('Content-disposition', 'attachment')
xhr.setRequestHeader("Content-type","application/zip"); //content-type must be set
xhr.setRequestHeader("Encoding", "null") //unsure of why I need this but it doesnt work with out it for me
xhr.responseType = "blob"; // This must be set otherwise the browser was interpretting the buffer stream as string instead of binary
xhr.onload = function (){
if(this.status == 200){
let form = document.createElement("form");
let element1 = document.createElement("input");
document.body.appendChild(form);
let response = this.response // defined as blob above
document.getElementById("state").innerHTML = 'download'
document.getElementById("index").innerHTML = response;
var blob = new Blob([response], {type: "application/zip"});
var file = URL.createObjectURL(blob);
filename = 'Data.zip'
var a = document.createElement("a");
if ("download" in a) { //check if element can download
a.href = file;
a.download = filename;
document.body.appendChild(a);
a.click(); //automatically browser download
document.body.removeChild(a);
}
}
Server side
else if (request.url === '/download'){
archiveZip((data)=>{ // using archivezip and adding a callback function to insert my routes XHR response
response.setHeader('Content-Type', 'application/zip')
response.setHeader('Content-Length', data.length) // this is important header because without it the browser might truncate the entire response especially if there are end of file characters zipped up in the buffer stream
response.setHeader('Content-disposition', 'attachment; filename="Data.zip"');
response.end(data);
})
}
var archiveZip = (callback) =>{
var output = fs.createWriteStream(__dirname + '/Data.zip'); //output
var archive = archiver('zip', {zlib: { level: 9 }});
output.on('close', function() {
console.log(archive.pointer() + ' total bytes');
console.log('archiver has been finalized and the output file descriptor has closed.');
fs.readFile('./Data.zip', function (err, content) {
if (err) {
response.writeHead(400, {'Content-type':'text/html'})
console.log(err);
response.end("No such file");
} else {
callback(content);
}
});
});
output.on('end', function() {
console.log('Data has been drained');
});
archive.on('error', function(err) {
throw err;
});
archive.pipe(output);
// append a file
archive.file(data_files + '/parsed_scope.json', { name: 'parsed_scope.json' });
archive.file(data_files + '/scopedata_index.json', { name: 'scopedata_index.json' });
archive.file(data_files + '/scopedata.txt', { name: 'scopedata.txt' });
archive.finalize();
There are many zip libraries I was looking at ones that can handle zipping a directory with multiple files and went with archiver. I would have like to use the built in zlib that comes with node but only supports single small files.

Azure Functions - NodeJS - Response Body as a Stream

I'd like to return a file from Blob Storage when you hit a given Azure Function end-point. This file is binary data.
Per the Azure Storage Blob docs, the most relevant call appears to be the following since its the only one that doesn't require writing the file to an interim file:
getBlobToStream
However this call gets the Blob and writes it to a stream.
Is there a way with Azure Functions to use a Stream as the value of res.body so that I can get the Blob Contents from storage and immediately write it to the response?
To add some code, trying to get something like this to work:
'use strict';
const azure = require('azure-storage'),
stream = require('stream');
const BLOB_CONTAINER = 'DeContainer';
module.exports = function(context){
var file = context.bindingData.file;
var blobService = azure.createBlobService();
var outputStream = new stream.Writable();
blobService.getBlobToStream(BLOB_CONTAINER, file, outputStream, function(error, serverBlob) {
if(error) {
FileNotFound(context);
} else {
context.res = {
status: 200,
headers: {
},
isRaw: true,
body : outputStream
};
context.done();
}
});
}
function FileNotFound(context){
context.res = {
status: 404,
headers: {
"Content-Type" : "application/json"
},
body : { "Message" : "No esta aqui!."}
};
context.done();
}
Unfortunately we don't have streaming support implemented in NodeJS just yet - it's on the backlog: https://github.com/Azure/azure-webjobs-sdk-script/issues/1361
If you're not tied to NodeJ open to using a C# function instead, you can use the storage sdk object directly in your input bindings and stream request output, instead of using the intermediate object approach.
While #Matt Manson's answer is definitely correct based on the way I asked my question, the following code snippet might be more useful for someone who stumbles across this question.
While I can't send the Stream to the response body directly, I can use a custom stream which captures the data into a Uint8Array, and then sends that to the response body.
NOTE: If the file is REALLY big, this will use a lot of memory.
'use strict';
const azure = require('azure-storage'),
stream = require('stream');
const BLOB_CONTAINER = 'deContainer';
module.exports = function(context){
var file = context.bindingData.file;
var blobService = azure.createBlobService();
var outputStream = new stream.Writable();
outputStream.contents = new Uint8Array(0);//Initialize contents.
//Override the write to store the value to our "contents"
outputStream._write = function (chunk, encoding, done) {
var curChunk = new Uint8Array(chunk);
var tmp = new Uint8Array(this.contents.byteLength + curChunk.byteLength);
tmp.set(this.contents, 0);
tmp.set(curChunk, this.contents.byteLength);
this.contents = tmp;
done();
};
blobService.getBlobToStream(BLOB_CONTAINER, file, outputStream, function(error, serverBlob) {
if(error) {
FileNotFound(context);
} else {
context.res = {
status: 200,
headers: {
},
isRaw: true,
body : outputStream.contents
};
context.done();
}
});//*/
}
function FileNotFound(context){
context.res = {
status: 404,
headers: {
"Content-Type" : "application/json"
},
body : { "Message" : "No esta aqui!"}
};
context.done();
}
I tried #Doug's solution from the last comment above, with a few minor mods in my azure function, and so far, after trying 20 different ideas, this is the only one that actually delivered the file to the browser! Thank you, #Doug...
const fs = require("fs");
const stream = require("stream");
...
const AzureBlob = require('#[my_private_artifact]/azure-blob-storage');
const azureStorage = new AzureBlob(params.connectionString);
//Override the write to store the value to our "contents" <-- Doug's solution
var outputStream = new stream.Writable();
outputStream.contents = new Uint8Array(0);//Initialize contents.
outputStream._write = function (chunk, encoding, done) {
var curChunk = new Uint8Array(chunk);
var tmp = new Uint8Array(this.contents.byteLength + curChunk.byteLength);
tmp.set(this.contents, 0);
tmp.set(curChunk, this.contents.byteLength);
this.contents = tmp;
done();
};
let azureSpeedResult = await azureStorage.downloadBlobToStream(params.containerName, params.objectId, outputStream);
let headers = {
"Content-Length": azureSpeedResult.size,
"Content-Type": mimeType
};
if (params.action == "download") {
headers["Content-Disposition"] = "attachment; filename=" + params.fileName;
}
context.res = {
status: 200,
headers: headers,
isRaw: true,
body: outputStream.contents
};
context.done();
...

How to serve a PDF file stored using CollectionFS in Meteor?

I am working on a Meteor application.
Currently, I have a few PDFs on my server. To serve these already existing PDFs directly to the client, I do it this way and it works very well:
Router.route("/file/:fileName", function() {
var fileName = this.params.fileName;
// console.log(process.env.PWD);
var filePath = process.env.PWD + '/' + fileName;
var fs = Meteor.npmRequire('fs');
var data = fs.readFileSync(filePath);
this.response.writeHead(200, {
"Content-Type": "application/pdf",
"Content-Length": data.length
});
this.response.write(data);
this.response.end();
}, {
where: "server"
});
I save these PDFs to Mongo using CollectionFS (Later, I shall generate PDFs and save them. For now, I am just directly saving these already existing PDFs to Mongo as I first want to get the Mongo part to work.).
testCollection = new FS.Collection("testCollection", {
stores: [new FS.Store.GridFS("testCollection")]
});
testCollection.allow({
'insert': function () {
return true;
}
});
var file = new FS.File(process.env.PWD + '/PDFKitExampleServerSide.pdf');
file.encoding = 'binary';
file.name('myPDF.pdf');
var document = testCollection.insert(file);
console.log(document._id);
My question is, after I save these PDFs to Mongo using CollectionFS (like I do above), how do I retrieve and serve these PDFs?
Router.route("/database/:pdfId", function() {
//need help here
}, { where: "server"});
After a lot of searching and trying, I've finally gotten it to work.
Router.route("/database/:pdfId", function() {
var pdfId = this.params.pdfId;
var file = testCollection.findOne({_id: pdfId});
var readable = file.createReadStream("tmp");
var buffer = new Buffer(0);
readable.on("data", function(b) {
buffer = Buffer.concat([buffer, b]);
});
var response = this.response;
readable.on("end", function() {
response.writeHead(200, {
"Content-Type": "application/pdf",
"Content-Length": buffer.length
});
response.write(buffer);
response.end();
});
}, {
where: "server"
});
I know that this question is old, but I found an easier way to store and retrieve PDFs. Apparently if you store your PDFs in the database and they are smaller than 16MB (which is likely in this type of files) the performance is way slower than if you store the files in your server file system.
For doing that, you can use FS.Store.FileSystem instead of FS.Store.GridFS. The following code works for me:
// Client
var pdfStore = new FS.Store.FileSystem("uploadedFiles");
UploadedFiles = new FS.Collection("uploadedFiles", {
stores: [pdfStore],
filter: {
allow: {
extensions: ['pdf','doc','docx','xls','xlsx','ppt','pptx''jpg','png','jpeg']
}
}
});
// Server
var pdfStore = new FS.Store.FileSystem("uploadedFiles", {path: uploadFilesPath});
UploadedFiles = new FS.Collection("uploadedFiles", {
stores: [pdfStore],
filter: {
maxSize: 5242880, // 5MB in bytes
allow: {
extensions: ['pdf','doc','docx','xls','xlsx','ppt','pptx''jpg','png','jpeg']
},
deny: {
extensions: ['exe']
},
onInvalid: function (message) {
if (Meteor.isClient) {
alert(message);
} else {
console.log(message);
}
}
}
});
And then just use this little helper to retrieve the url to the file:
get_uploaded_link: function(id){
console.log(id);
var file = UploadedFiles.findOne({_id: id});
return file.url();}

Resources