nodejs memory buffer synchronized .pipe() - node.js

I am trying to migrate pdfkit from png-js to pngsj2, because png-js doesn't support interlaced png. I need to load PNG file in a sync way. I try to do it this way:
var fs = require('fs'),
PNG = require('pngjs2').PNG;
var stream = require('stream');
var bufferStream = new stream.PassThrough();
var buf = fs.readFileSync('./logs/rid12.png');
bufferStream.end(buf);
var png = null;
bufferStream.pipe(new PNG())
.on('parsed', function() {
console.log("here");
png = this;
});
console.log("there",png);
"there" happens before "here", so png is null. Is it possible to pipe inmemory buffer to PNG parser so that I don't have to make callback architecture?

There is no way to make a async method synchronous.
There are librarys such as https://github.com/scriby/asyncblock which may be able to fake it.
asyncblock(function(flow) {
var png = null
flow.sync( bufferStream.pipe(new PNG())
.on('parsed', function() {
console.log("here");
png = this;
flow.callback()
})
);
// png not null
});

Related

fs.createReadStream('b.mp3') reduce latency

I'm playing around with events triggering sounds (500 msec long), therefore I use the lame library.
var lame = require('lame');
var fs = require('fs');
var Speaker = require('speaker');
while(listening) {
if(eventIsFired) {
fs.createReadStream('b.mp3')
.pipe(new lame.Decoder)
.pipe(new Speaker);
}
}
Is there any way to preload the stream/file, so I won't need to load it on every single event? it actually blocks my whole while loop and making it async didn't work. How can I reduce latency and make it more efficient?
You can cache the mp3 file to a buffer, then convert that buffer to a readable stream when you need.
var lame = require('lame');
var fs = require('fs');
var Speaker = require('speaker');
var Readable = require('stream').Readable;
var mp3Buffer = fs.readFileSync('b.mp3');
while(listening) {
if(eventIsFired) {
bufferToReadableStream(mp3Buffer)
.pipe(new lame.Decoder)
.pipe(new Speaker);
}
}
function bufferToReadableStream(buffer) {
let stream = new Readable();
stream.push(buffer);
stream.push(null);
return stream;
}

Get PDFKit as base64 string

I'm searching a way to get the base64 string representation of a PDFKit document. I cant' find the right way to do it...
Something like this would be extremely convenient.
var doc = new PDFDocument();
doc.addPage();
doc.outputBase64(function (err, pdfAsText) {
console.log('Base64 PDF representation', pdfAsText);
});
I already tried with blob-stream lib, but it doesn't work on a node server (It says that Blob doesn't exist).
Thanks for your help!
I was in a similar predicament, wanting to generate PDF on the fly without having temporary files lying around. My context is a NodeJS API layer (using Express) which is interacted with via a React frontend.
Ironically, a similar discussion for Meteor helped me get to where I needed. Based on that, my solution resembles:
const PDFDocument = require('pdfkit');
const { Base64Encode } = require('base64-stream');
// ...
var doc = new PDFDocument();
// write to PDF
var finalString = ''; // contains the base64 string
var stream = doc.pipe(new Base64Encode());
doc.end(); // will trigger the stream to end
stream.on('data', function(chunk) {
finalString += chunk;
});
stream.on('end', function() {
// the stream is at its end, so push the resulting base64 string to the response
res.json(finalString);
});
Synchronous option not (yet) present in the documentation
const doc = new PDFDocument();
doc.text("Sample text", 100, 100);
doc.end();
const data = doc.read();
console.log(data.toString("base64"));
I just made a module for this you could probably use. js-base64-file
const Base64File=require('js-base64-file');
const b64PDF=new Base64File;
const file='yourPDF.pdf';
const path=`${__dirname}/path/to/pdf/`;
const doc = new PDFDocument();
doc.addPage();
//save you PDF using the filename and path
//this will load and convert
const data=b64PDF.loadSync(path,file);
console.log('Base64 PDF representation', pdfAsText);
//you could also save a copy as base 64 if you wanted like so :
b64PDF.save(data,path,`copy-b64-${file}`);
It's a new module so my documentation isn't complete yet, but there is also an async method.
//this will load and convert if needed asynchriouniously
b64PDF.load(
path,
file,
function(err,base64){
if(err){
//handle error here
process.exit(1);
}
console.log('ASYNC: you could send this PDF via ws or http to the browser now\n');
//or as above save it here
b64PDF.save(base64,path,`copy-async-${file}`);
}
);
I suppose I could add in a convert from memory method too. If this doesn't suit your needs you could submit a request on the base64 file repo
Following Grant's answer, here is an alternative without using node response but a promise (to ease the call outside of a router):
const PDFDocument = require('pdfkit');
const {Base64Encode} = require('base64-stream');
const toBase64 = doc => {
return new Promise((resolve, reject) => {
try {
const stream = doc.pipe(new Base64Encode());
let base64Value = '';
stream.on('data', chunk => {
base64Value += chunk;
});
stream.on('end', () => {
resolve(base64Value);
});
} catch (e) {
reject(e);
}
});
};
The callee should use doc.end() before or after calling this async method.

Node.js create object.stdin

I have an object in node that has .stdin, .stderr and .stdout objects attached to it. I have stdout and stderr working as Writable streams and I'm having some trouble getting stdin to work as a Readable stream.
In the use case I'm building for I could do the following:
var stream = require('stream');
var util = require('util');
var Readable = stream.Readable;
var My_Stdin = function(){
Readable.call(this, {objectMode: true});
};
util.inherits(My_Stdin, Readable);
My_Stdin.prototype._read = function(data) {
this.emit(data);
};
function myObject(){
this.stdin = new My_Stdin;
this.stdin.on('data', function(data){
process.stdout.write(data);
});
}
var object = new myObject;
process.stdin.pipe(object.stdin);
Right now I'm getting an error that says:
_stream_readable.js:525
var ret = dest.write(chunk);
^
TypeError: dest.write is not a function
How do I properly implement a stdin on my object similar to child_process.spawn?

Node js, piping pdfkit to a memory stream

I am using pdfkit on my node server, typically creating pdf files, and then uploading them to s3.
The problem is that pdfkit examples pipe the pdf doc into a node write stream, which writes the file to the disk, I followed the example and worked correctly, however my requirement now is to pipe the pdf doc to a memory stream rather than save it on the disk (I am uploading to s3 anyway).
I've followed some node memory streams procedures but none of them seem to work with pdf pipe with me, I could just write strings to memory streams.
So my question is: How to pipe the pdf kit output to a memory stream (or something alike) and then read it as an object to upload to s3?
var fsStream = fs.createWriteStream(outputPath + fileName);
doc.pipe(fsStream);
An updated answer for 2020. There is no need to introduce a new memory stream because "PDFDocument instances are readable Node streams".
You can use the get-stream package to make it easy to wait for the document to finish before passing the result back to your caller.
https://www.npmjs.com/package/get-stream
const PDFDocument = require('pdfkit')
const getStream = require('get-stream')
const pdf = () => {
const doc = new PDFDocument()
doc.text('Hello, World!')
doc.end()
return await getStream.buffer(doc)
}
// Caller could do this:
const pdfBuffer = await pdf()
const pdfBase64string = pdfBuffer.toString('base64')
You don't have to return a buffer if your needs are different. The get-stream readme offers other examples.
There's no need to use an intermediate memory stream1 – just pipe the pdfkit output stream directly into a HTTP upload stream.
In my experience, the AWS SDK is garbage when it comes to working with streams, so I usually use request.
var upload = request({
method: 'PUT',
url: 'https://bucket.s3.amazonaws.com/doc.pdf',
aws: { bucket: 'bucket', key: ..., secret: ... }
});
doc.pipe(upload);
1 - in fact, it is usually undesirable to use a memory stream because that means buffering the entire thing in RAM, which is exactly what streams are supposed to avoid!
You could try something like this, and upload it to S3 inside the end event.
var doc = new pdfkit();
var MemoryStream = require('memorystream');
var memStream = new MemoryStream(null, {
readable : false
});
doc.pipe(memStream);
doc.on('end', function () {
var buffer = Buffer.concat(memStream.queue);
awsservice.putS3Object(buffer, fileName, fileType, folder).then(function () { }, reject);
})
A tweak of #bolav's answer worked for me trying to work with pdfmake and not pdfkit. First you need to have memorystream added to your project using npm or yarn.
const MemoryStream = require('memorystream');
const PdfPrinter = require('pdfmake');
const pdfPrinter = new PdfPrinter();
const docDef = {};
const pdfDoc = pdfPrinter.createPdfKitDocument(docDef);
const memStream = new MemoryStream(null, {readable: false});
const pdfDocStream = pdfDoc.pipe(memStream);
pdfDoc.end();
pdfDocStream.on('finish', () => {
console.log(Buffer.concat(memStream.queue);
});
My code to return a base64 for pdfkit:
import * as PDFDocument from 'pdfkit'
import getStream from 'get-stream'
const pdf = {
createPdf: async (text: string) => {
const doc = new PDFDocument()
doc.fontSize(10).text(text, 50, 50)
doc.end()
const data = await getStream.buffer(doc)
let b64 = Buffer.from(data).toString('base64')
return b64
}
}
export default pdf
Thanks to Troy's answer, mine worked with get-stream as well. The difference was I did not convert it to base64string, but rather uploaded it to AWS S3 as a buffer.
Here is my code:
import PDFDocument from 'pdfkit'
import getStream from 'get-stream';
import s3Client from 'your s3 config file';
const pdfGenerator = () => {
const doc = new PDFDocument();
doc.text('Hello, World!');
doc.end();
return doc;
}
const uploadFile = async () => {
const pdf = pdfGenerator();
const pdfBuffer = await getStream.buffer(pdf)
await s3Client.send(
new PutObjectCommand({
Bucket: 'bucket-name',
Key: 'filename.pdf',
Body: pdfBuffer,
ContentType: 'application/pdf',
})
);
}
uploadFile()

One symbol at time stream with node

I'm trying implement a stream which returns one symbol from file on each 'data' event.
I finished with code bellow:
var util = require('util'),
fs = require('fs'),
Readable = require('stream').Readable;
var util = require('util');
var Readable = require('stream').Readable;
var SymbolReadStream = function(filename, options) {
Readable.call(this);
this._readable = fs.createReadStream(filename, options).pause();
self = this;
this._readable.on('readable', function() {
var chunk;
chunk = self._readable.read(1);
// I believe the problem is here
self._readable.pause();
});
};
util.inherits(SymbolReadStream, Readable); // inherit the prototype methods
SymbolReadStream.prototype._read = function() {
this._readable.resume();
};
var r = new SymbolReadStream("test.txt", {
encoding: 'utf8',
});
r.on('data', function(el) {
console.log(el);
});
but this code doesn't work. Please help.
Is there an easier way to achieve the behavior?
This post give a great clue how to answer your question.
Also, you should take a loop at pipe that would be a cleaner way to accomplish what you're trying to do: piping an adapter to the filestream instead of wrapping it up
That said, personaly I wont reinvent the wheel here, and would just search for modules that can accomplish that. Especially "split" modules, making them split on every char, instead on new lines. As an example, event-stream has a split method that "takes the same arguments as string.split except it defaults to '\n' instead of ','". So the logic would be to try myStream.pipe(es.split('')) but the modules takes this like myStream.pipe(es.split()) which breaks on lines. So here's my solution, using a regex to say "break on each char"
var es = require('event-stream');
var fs = require('fs');
var symbolStream = fs.createReadStream(filename, options).pipe(es.split(/(?!$)/));
EDIT: event-stream seems to use split module internally, so you can even try
var split = require('split');
var fs = require('fs');
var symbolStream = fs.createReadStream(filename, options).pipe(split(/(?!$)/));
(this loose test is responsible of converting '' to \r\n)
In you stream implementation there is no emitting 'data' event to handler. Because of it, console.log are never called. After adding events, they will be streamed symbol by symbol. Example below:
var util = require('util'),
fs = require('fs'),
Readable = require('stream').Readable;
function SymbolReadStream(filename, options) {
if (!(this instanceof SymbolReadStream)) {
return new SymbolReadStream(length, options);
}
Readable.call(this);
this._readable = fs.createReadStream(filename, options);
}
util.inherits(SymbolReadStream, Readable); // inherit the prototype methods
SymbolReadStream.prototype._read = function() {
var self = this;
this._readable.on('readable', function() {
var chunk;
while (null !== (chunk = self._readable.read(1))) {
self.emit('data', chunk);
}
});
this._readable.on('end', function() {
self.emit('end');
});
};
var r = new SymbolReadStream("test.txt", {
encoding: 'utf8',
});
r.on('data', function(el) {
console.log(el);
});
r.on('end', function(el) {
console.log('done');
});

Resources