Converting streamed buffers back into numbers? - node.js

I have an implementation of a Readable stream that generates 200 random numbers between 1-200:
/*
Readable that produces a list of 200 random numbers
*/
var stream = require('stream');
function Random(options) {
// Inherits from stream.Readable
stream.Readable.call(this, options);
this._counter = 1;
};
Random.prototype = Object.create(stream.Readable.prototype);
Random.prototype.constructor = stream.Readable;
// Called whenever data is required from the stream
Random.prototype._read = function() {
// Generate a random number between 1 and 200
var randomNumber = Math.floor((Math.random() * 200) + 1);
var buf = new Buffer(randomNumber, 'utf8');
this.push(buf);
this._counter++;
// Generate 200 random numbers, then stop by pushing null
if (this._counter > 200) {
this.push(null);
}
};
module.exports = Random;
In my main.js, all I'm trying to do is instantiate the stream and decode each one of the chunks as they come in. However, I'm getting jibberish as my output -- what's the proper way to get it to print out all of my random numbers?
var Random = require('./random');
// Stream
var random = new Random();
random.on('data', function(chunk) {
console.log(chunk.toString('utf8'))
});

Ahhh -- got it. The Buffer constructor needs to take in a string, not an integer. Changing the buf instantiation line to:
var buf = new Buffer(randomNumber.toString());
did the trick.

Related

streaming audio from mic across websocket. I can see the data being sent but cannot hear it on the receiving client side

I'm trying to broadcast captured mic audio across a websocket. I can see the buffer array is being sent, and the array has actual valid data but the receiving client side cannot hear it. I'm pretty sure my playback function is correct, because I can generate white noise by filling an array with random numbers and using the playback function to hear it. I'm thinking maybe the audio it's broadcasting is too quiet to hear, because the numbers generated in the array are seem to mostly be in the .000### range. Any ideas? Capturing mic audio and broadcasting it seems to be over complicated... :/
//broadcasting side
navigator.mediaDevices.getUserMedia({audio: true,video: false}) // request cam
.then(stream => {
vid.srcObject = stream;
context = new AudioContext();
var source = context.createMediaStreamSource(stream);
var processor = context.createScriptProcessor(1024, 2, 2);
source.connect(processor);
processor.connect(context.destination);
processor.onaudioprocess = function(e) {
audiodata = e.inputBuffer.getChannelData(1);
socket.send(JSON.stringify({sound: audiodata, to: to, from: '$username', text:''}));
};
return vid.play(); // returns a Promise
});
//receiving side object to array
if(typeof (message.sound) != "undefined"){
//$('#video_stream_btn').trigger('click');
var json_sound = message.sound;
var array_sound = [];
for(var i in json_sound){
array_sound.push([i, json_sound [i]]);
}
if(typeof(context) == 'undefined'){
context = new AudioContext();
}
play_sound(array_sound, context);
return;
}
// receiving side play sound function
function play_sound(raw,context){
//alert(raw.length);
var audioBuffer = context.createBuffer(1, raw.length, context.sampleRate);
audioBuffer.getChannelData(0).set(raw);
var source = context.createBufferSource();
source.buffer = audioBuffer;
source.connect(context.destination);
source.start(0);
}
For anyone out there trying to figure this out. I ended up encoding it to an int16array, then sent it across the socket, where the client encoded it back into a float32 array and passed it to the play_sound function. I basically just stole a bunch of stuff off stackoverflow and faked it until I made it, cause I'm not that smart :)
capturing mic and converting to int16array, then sending it across the socket
navigator.mediaDevices.getUserMedia({audio: {sampleSize: 16, channelCount: 2},video: true}) // request cam
.then(stream => {
vid.srcObject = stream; // don't use createObjectURL(MediaStream)
context = new AudioContext();
var source = context.createMediaStreamSource(stream);
var processor = context.createScriptProcessor(1024, 2, 2);
source.connect(processor);
processor.connect(context.destination);
processor.onaudioprocess = function(e) {
// Do something with the data, i.e Convert this to WAV
audiodata = new Int16Array(convertFloat32ToInt16(e.inputBuffer.getChannelData(0)));
console.log(audiodata);
socket.send(JSON.stringify({sound: audiodata, to: to, from: '$username', text:''}));
};
return vid.play(); // returns a Promise
});
relevant function for converting captured mic to int16array:
function convertFloat32ToInt16(buffer){
l = buffer.length;
buf = new Int16Array(l);
while (l--)
{
buf[l] = Math.min(1, buffer[l])*0x7FFF;
}
return buf.buffer;
}
receiving client side json object to int16array, then int16array back to float32array:
if(typeof (message.sound) != "undefined"){
//$('#video_stream_btn').trigger('click');
//var json_sound = message.sound;
if(typeof(context) == 'undefined'){
context = new AudioContext();
}
sound_array = [];
for (i in message.sound)
{
sound_array[i] = (message.sound [i]);
}
//sound_array16 = new Int16Array(sound_array);
sound_array32 = int16ToFloat32(sound_array);
play_sound(sound_array32, context);
return;
}
relevant receiving side int16array to float32array function:
function int16ToFloat32(inputArray) {
let int16arr = new Int16Array(inputArray)
var output = new Float32Array(int16arr.length);
for (var i = 0; i < int16arr.length; i++) {
var int = int16arr[i];
var float = (int >= 0x8000) ? -(0x10000 - int) / 0x8000 : int / 0x7FFF;
output[i] = float;
}
return output;
}

How do I replace a string in a PDF file using NodeJS?

I have a template PDF file, and I want to replace some marker strings to generate new PDF files and save them. What's the best/simplest way to do this? I don't need to add graphics or anything fancy, just a simple text replacement, so I don't want anything too complicated.
Thanks!
Edit: Just found HummusJS, I'll see if I can make progress and post it here.
I found this question by searching, so I think it deserves the answer. I found the answer by BrighTide here: https://github.com/galkahana/HummusJS/issues/71#issuecomment-275956347
Basically, there is this very powerful Hummus package which uses library written in C++ (crossplatform of course). I think the answer given in that github comment can be functionalized like this:
var hummus = require('hummus');
/**
* Returns a byteArray string
*
* #param {string} str - input string
*/
function strToByteArray(str) {
var myBuffer = [];
var buffer = new Buffer(str);
for (var i = 0; i < buffer.length; i++) {
myBuffer.push(buffer[i]);
}
return myBuffer;
}
function replaceText(sourceFile, targetFile, pageNumber, findText, replaceText) {
var writer = hummus.createWriterToModify(sourceFile, {
modifiedFilePath: targetFile
});
var sourceParser = writer.createPDFCopyingContextForModifiedFile().getSourceDocumentParser();
var pageObject = sourceParser.parsePage(pageNumber);
var textObjectId = pageObject.getDictionary().toJSObject().Contents.getObjectID();
var textStream = sourceParser.queryDictionaryObject(pageObject.getDictionary(), 'Contents');
//read the original block of text data
var data = [];
var readStream = sourceParser.startReadingFromStream(textStream);
while(readStream.notEnded()){
Array.prototype.push.apply(data, readStream.read(10000));
}
var string = new Buffer(data).toString().replace(findText, replaceText);
//Create and write our new text object
var objectsContext = writer.getObjectsContext();
objectsContext.startModifiedIndirectObject(textObjectId);
var stream = objectsContext.startUnfilteredPDFStream();
stream.getWriteStream().write(strToByteArray(string));
objectsContext.endPDFStream(stream);
objectsContext.endIndirectObject();
writer.end();
}
// replaceText('source.pdf', 'output.pdf', 0, /REPLACEME/g, 'My New Custom Text');
UPDATE:
The version used at the time of writing an example was 1.0.83, things might change recently.
UPDATE 2:
Recently I got an issue with another PDF file which had a different font. For some reason the text got split into small chunks, i.e. string QWERTYUIOPASDFGHJKLZXCVBNM1234567890- got represented as -286(Q)9(WER)24(T)-8(YUIOP)116(ASDF)19(GHJKLZX)15(CVBNM1234567890-)
I had no idea what else to do rather than make up a regex.. So instead of this one line:
var string = new Buffer(data).toString().replace(findText, replaceText);
I have something like this now:
var string = Buffer.from(data).toString();
var characters = REPLACE_ME;
var match = [];
for (var a = 0; a < characters.length; a++) {
match.push('(-?[0-9]+)?(\\()?' + characters[a] + '(\\))?');
}
string = string.replace(new RegExp(match.join('')), function(m, m1) {
// m1 holds the first item which is a space
return m1 + '( ' + REPLACE_WITH_THIS + ')';
});
Building on Alex's (and other's) solution, I noticed an issue where some non-text data were becoming corrupted. I tracked this down to encoding/decoding the PDF text as utf-8 instead of as a binary string. Anyways here's a modified solution that:
Avoids corrupting non-text data
Uses streams instead of files
Allows multiple patterns/replacements
Uses the MuhammaraJS package which is a maintained fork of HummusJS (should be able to swap in HummusJS just fine as well)
Is written in TypeScript (feel free to remove the types for JS)
import muhammara from "muhammara";
interface Pattern {
searchValue: RegExp | string;
replaceValue: string;
}
/**
* Modify a PDF by replacing text in it
*/
const modifyPdf = ({
sourceStream,
targetStream,
patterns,
}: {
sourceStream: muhammara.ReadStream;
targetStream: muhammara.WriteStream;
patterns: Pattern[];
}): void => {
const modPdfWriter = muhammara.createWriterToModify(sourceStream, targetStream, { compress: false });
const numPages = modPdfWriter
.createPDFCopyingContextForModifiedFile()
.getSourceDocumentParser()
.getPagesCount();
for (let page = 0; page < numPages; page++) {
const copyingContext = modPdfWriter.createPDFCopyingContextForModifiedFile();
const objectsContext = modPdfWriter.getObjectsContext();
const pageObject = copyingContext.getSourceDocumentParser().parsePage(page);
const textStream = copyingContext
.getSourceDocumentParser()
.queryDictionaryObject(pageObject.getDictionary(), "Contents");
const textObjectID = pageObject.getDictionary().toJSObject().Contents.getObjectID();
let data: number[] = [];
const readStream = copyingContext.getSourceDocumentParser().startReadingFromStream(textStream);
while (readStream.notEnded()) {
const readData = readStream.read(10000);
data = data.concat(readData);
}
const pdfPageAsString = Buffer.from(data).toString("binary"); // key change 1
let modifiedPdfPageAsString = pdfPageAsString;
for (const pattern of patterns) {
modifiedPdfPageAsString = modifiedPdfPageAsString.replaceAll(pattern.searchValue, pattern.replaceValue);
}
// Create what will become our new text object
objectsContext.startModifiedIndirectObject(textObjectID);
const stream = objectsContext.startUnfilteredPDFStream();
stream.getWriteStream().write(strToByteArray(modifiedPdfPageAsString));
objectsContext.endPDFStream(stream);
objectsContext.endIndirectObject();
}
modPdfWriter.end();
};
/**
* Create a byte array from a string, as muhammara expects
*/
const strToByteArray = (str: string): number[] => {
const myBuffer = [];
const buffer = Buffer.from(str, "binary"); // key change 2
for (let i = 0; i < buffer.length; i++) {
myBuffer.push(buffer[i]);
}
return myBuffer;
};
And then to use it:
/**
* Fill a PDF with template data
*/
export const fillPdf = async (sourceBuffer: Buffer): Promise<Buffer> => {
const sourceStream = new muhammara.PDFRStreamForBuffer(sourceBuffer);
const targetStream = new muhammara.PDFWStreamForBuffer();
modifyPdf({
sourceStream,
targetStream,
patterns: [{ searchValue: "home", replaceValue: "emoh" }], // TODO use actual patterns
});
return targetStream.buffer;
};
There is another Node.js Package asposepdfcloud, Aspose.PDF Cloud SDK for Node.js. You can use it to replace text in your PDF document conveniently. Its free plan offers 150 credits monthly. Here is sample code to replace text in PDF document, don't forget to install asposepdfcloud first.
const { PdfApi } = require("asposepdfcloud");
const { TextReplaceListRequest }= require("asposepdfcloud/src/models/textReplaceListRequest");
const { TextReplace }= require("asposepdfcloud/src/models/textReplace");
// Get App key and App SID from https://aspose.cloud
pdfApi = new PdfApi("xxxxx-xxxxx-xxxx-xxxxxxxxxxx", "xxxxxxxxxxxxxxxxxxxxxb");
var fs = require('fs');
const name = "02_pages.pdf";
const remoteTempFolder = "Temp";
//const localTestDataFolder = "C:\\Temp";
//const path = remoteTempFolder + "\\" + name;
//var data = fs.readFileSync(localTestDataFolder + "\\" + name);
const textReplace= new TextReplace();
textReplace.oldValue= "origami";
textReplace.newValue= "aspose";
textReplace.regex= false;
const textReplace1= new TextReplace();
textReplace1.oldValue= "candy";
textReplace1.newValue= "biscuit";
textReplace1.regex= false;
const trr = new TextReplaceListRequest();
trr.textReplaces = [textReplace,textReplace1];
// Upload File
//pdfApi.uploadFile(path, data).then((result) => {
// console.log("Uploaded File");
// }).catch(function(err) {
// Deal with an error
// console.log(err);
//});
// Replace text
pdfApi.postDocumentTextReplace(name, trr, null, remoteTempFolder).then((result) => {
console.log(result.body.code);
}).catch(function(err) {
// Deal with an error
console.log(err);
});
P.S: I'm developer evangelist at aspose.

nodejs event stream setting a variable per stream

I have a code that creates a readable stream . I would like to set the name of the stream in the getStream method . I tried setting a property as shown below . I am able to access the property in the onceFunction but I am not able to access the property in the map Function . Let me know what I am doing wrong
var onceFunction = function(str1,record) {
console.log("OnceFunction",this.nodeName);
}
var getStream = function(csvData) {
var dirNames = csvData.split("/");
var nodeName = dirNames[dirNames.length-2];
var fileName = csvData;
stream = fs.createReadStream(csvData);
stream.nodeName = dirNames[dirNames.length-2];
return stream;
};
var myFileList = ["D:\mypath\file"];
for ( var i = 0; i< myFileList.length; i++ ) {
getStream(myFileList[i])
.once('data',onceFunction)
.pipe(es.split())
.on('end',endFunction)
.pipe(es.map(function(data,cb) {
console.log(this.nodeName);
}));
}
Because "es" has it's own "this". And passes it to es.map callback. Where, ofcource, nodeName is empty. Refactor you code to use closures and avoid using "this".
For example in pseudocode:
for ( var i = 0; i< myFileList.length; i++ ) {
processFile(myFileList[i]);
}
var processfile = function(file) {
var stream = getStream(file);
var somevar = stream.nodeName;
stream.once('data',onceFunction)
.pipe(es.split())
.on('end',endFunction)
.pipe(es.map(function(data,cb) {
console.log(somevar);
console.log(stream.nodeName);
}));
}

Serialization-deserialization with Apache Thrift in nodejs

I am working on a Node.js application and I need to serialize and deserialize instances of the structs defined in an .thrift file, like the following:
struct Notification {
1: string subject,
2: string message
}
Now this is easy doable in Java, according to the tutorial at http://www.gettingcirrius.com/2011/03/rabbitmq-with-thrift-serialization.html :
Notification notification = new Notification();
TDeserializer deserializer = new TDeserializer();
deserializer.deserialize(notification, serializedNotification);
System.out.println("Received "+ notification.toString());
But I can't find how this is done using the nodejs library of Thrift. Can anyone help, please?
Ok, after wasting a lot of time on research and trying different solutions, I finally came to the answer to my own question:
//SERIALIZATION:
var buffer = new Buffer(notification);
var transport = new thrift.TFramedTransport(buffer);
var binaryProt = new thrift.TBinaryProtocol(transport);
notification.write(binaryProt);
where notification is the object I wish to serialize. At this point, the byte array can be found in the transport.outBuffers field:
var byteArray = transport.outBuffers;
For deserialization:
var tTransport = new thrift.TFramedTransport(byteArray);
var tProtocol = new thrift.TBinaryProtocol(tTransport);
var receivedNotif = new notification_type.Notification();
receivedNotif.read(tProtocol);
Assuming that the following lines have been added to the index.js file from the nodejs library for thrift:
exports.TFramedTransport = require('./transport').TFramedTransport;
exports.TBufferedTransport = require('./transport').TBufferedTransport;
exports.TBinaryProtocol = require('./protocol').TBinaryProtocol;
Here is my TypeScript version which runs in a browser. npm install buffer before use.
It should work on node if you remove import { Buffer }.
/*
Thrift serializer for browser and node.js
Author: Hirano Satoshi
Usage:
let byteArray = thriftSerialize(thriftObj);
let thriftObj2 = thriftDeserialize(byteArray, new ThriftClass())
let mayBeTrue = byteArrayCompare(byteArray, thriftSerialize(thriftObj2))
*/
import { TBufferedTransport, TFramedTransport, TJSONProtocol, TBinaryProtocol } from 'thrift';
import { Buffer } from 'buffer';
export function thriftSerialize(thriftObj: any): Buffer {
let transport = new TBufferedTransport(null);
let protocol = new TBinaryProtocol(transport);
thriftObj.write(protocol);
// copy array of array into byteArray
let source = transport.outBuffers;
var byteArrayLen = 0;
for (var i = 0, len = source.length; i < len; i++)
byteArrayLen += source[i].length;
let byteArray = new Buffer(byteArrayLen);
for (var i = 0, len = source.length, pos = 0; i < len; i++) {
let chunk = source[i];
chunk.copy(byteArray, pos);
pos += chunk.length;
}
return byteArray;
}
export function thriftDeserialize(byteArray: Buffer, thriftObj: any): any {
let transport = new TBufferedTransport(byteArray);
let callback = (transport_with_data) => {
var proto = new TBinaryProtocol(transport_with_data);
// var proto = new TJSONProtocol(transport);
thriftObj.read(proto);
}
// var buf = new Buffer(byteArray);
TBufferedTransport.receiver(callback)(byteArray);
return thriftObj;
}
export function byteArrayCompare(array1, array2): boolean {
if (!array1 || !array2)
return false;
let val = array1.length === array2.length && array1.every((value, index) => value === array2[index])
return val;
}
Somehow i did not find the the byte array at:
transport.outBuffers
i needed to do the following:
var transport = new Thrift.TFramedTransport(null, function(bytes){
dataWrapper.out = bytes;
cb(dataWrapper)
})
var binaryProt = new Thrift.TCompactProtocol(transport);
notification.write(binaryProt) ;
transport.flush() ; //important without the flush the transport callback will not be invoked

ActionScript 3: ByteArray to binary String

I've been asked to implement and MD5 hasher ActionScript-3 and as I was in the middle of debugging how I formatted my input I came across a problem. When I try and output the ByteArray as a binary string using .toString(2), the toString(2) method will perform some short cuts that alter how the binary should look.
For Example
var bytes:ByteArray = new ByteArray();
bytes.endian = Endian.LITTLE_ENDIAN;
bytes.writeUTFBytes("a");
bytes.writeByte(0x0);
var t1:String = bytes[0].toString(2); // is 1100001 when it should be 01100001
var t2:String = bytes[1].toString(2); // is 0 when it should be 00000000
so I guess my question is, might there a way to output a binary String from a ByteArray that will always shows each byte as a 8 bit block?
All you need is to pad the output of toString(2) with zeros on the left to make its length equal to 8. Use this function for padding
function padString(str:String, len:int, char:String, padLeft:Boolean = true):String{
var padLength:int = len - str.length;
var str_padding:String = "";
if(padLength > 0 && char.length == 1)
for(var i:int = 0; i < padLength; i++)
str_padding += char;
return (padLeft ? str_padding : "") + str + (!padLeft ? str_padding: "");
}
With this function the code looks like this and gives the correct output
var bytes:ByteArray = new ByteArray();
bytes.endian = Endian.LITTLE_ENDIAN;
bytes.writeUTFBytes("a");
bytes.writeByte(0x0);
var t1:String = padString(bytes[0].toString(2), 8, "0"); // is now 01100001
var t2:String = padString(bytes[1].toString(2), 8, "0"); // is now 00000000
Update
If you want to get a string representation of complete byteArray you can use a function which iterates on the byteArray. I have wrote the following function and it seems to work correctly. Give it a try
// String Padding function
function padString(str:String, len:int, char:String, padLeft:Boolean = true):String{
// get no of padding characters needed
var padLength:int = len - str.length;
// padding string
var str_padding:String = "";
// loop from 0 to no of padding characters needed
// Note: this loop will not run if padLength is less than 1
// as i < padLength will be false from begining
for(var i:int = 0; i < padLength; i++)
str_padding += char;
// return string with padding attached either to left or right depending on the padLeft Boolean
return (padLeft ? str_padding : "") + str + (!padLeft ? str_padding: "");
}
// Return a Binary String Representation of a byte Array
function byteArrayToBinaryString(bArray:ByteArray):String{
// binary string to return
var str:String = "";
// store length so that it is not recomputed on every loop
var aLen = bArray.length;
// loop over all available bytes and concatenate the padded string to return string
for(var i:int = 0; i < aLen; i++)
str += padString(bArray[i].toString(2), 8, "0");
// return binary string
return str;
}
Now you can simply use the byteArrayToBinaryString() function like this:
// init byte array and set Endianness
var bytes:ByteArray = new ByteArray();
bytes.endian = Endian.LITTLE_ENDIAN;
// write some data to byte array
bytes.writeUTFBytes("a");
bytes.writeByte(0x0);
// convert to binaryString
var byteStr:String = byteArrayToBinaryString(bytes); // returns 0110000100000000
Here is a function extended on the Hurlant library to handle hashing byteArray.
This class has a learning curve but once you get it you will love it.
As far as your ByteArray issue with toString. I know the toString method is not accurate For this very reason.
You might want to look into byteArray.readMultiByte that will give you the 01 you are looking for. Although I can't seem top get it to work on my sample code either lol
I just always get a and empty string.
var bytes:ByteArray = new ByteArray();
bytes.endian = Endian.LITTLE_ENDIAN;
bytes.writeUTFBytes("a");
bytes.writeByte(0x0);
bytes.position = 0
var t1:String = bytes.readMultiByte(1,'us-ascii'); // is 1100001 when it should be 01100001
trace(t1)
var t2:String = bytes.readMultiByte(1,'iso-8859-01'); // is 0 when it should be 00000000
trace(t2)

Resources