MIME Binary Data to PDF - node.js

I'm receiving some data from an API call that returns some XML and associated files (PDFs):
var req = http.request(HTTPOPTIONS, function(resp){
var rawData = '';
//Build list from response data
resp.on('data', function (chunk) {
rawData+= chunk;
});
//Process List
resp.on('end', function () {
var breakStr = rawData.split('\n')[0];
var fileSections = rawData.split(breakStr);
for(var i in fileSections){
var content = fileSections[i].split((/Content-Length: [0-9]*/));
var fileName = content[0].split('filename=')[1].trim();
var file = {Bucket : 'MyBucket', Key: ROOT+'/'+FOLDER+'/'+SUBFOLDER+'/'+fileName, Body: content[1]};
console.log('Creating file: '+file.Key );
promises.push(S3.upload(file).promise());
}
Promise.all(promises).then(...);
});
});
req.write(XMLREQUEST);
req.end();
But I when I try to open the file created I get [
Any ideas on where I'm going wrong?
UPDATE:
In addition to the above error message I also get [
On these files I get the metadata (Page size/formatting and Font data) but no content.

It appears the problem was because I was storing the data in a string and trying to manipulate it from there. The incoming data chunk is a buffer and using it in this form mean that, once you figure out how to remove the headers, you can create the PDF files.
var req = http.request(HTTPOPTIONS, function(resp){
var respChunks =[];
var respChunksLength = 0;
resp.on('data', function (chunk) {
respChunks.push(chunk);
respChunksLength+=chunk.length;
});
resp.on('end', function () {
var confResp = Buffer.concat(respChunks, respChunksLength);
var breakStr = confResp.slice(0,confResp.indexOf('\n'));
var bufferArray = [];
var startBreak = confResp.indexOf(breakStr);
while(startBreak>-1){
bufferArray.push(confResp.slice(startBreak+breakStr.length+1,confResp.indexOf(breakStr,startBreak+1)));
startBreak = confResp.indexOf(breakStr,startBreak+1);
}
var trim=0;
for(var i = 1; i<bufferArray.length;i++){
trim = bufferArray[i].toString().indexOf('%');
fs.writeFile('testFile'+i+'.pdf',bufferArray[1].slice(trim));
});
});
req.write(generateProposalDetailXML());
req.end();
the bufferArray is concatenated into a single buffer, this is then divided based on the MIME delimiter (breakStr) and the headers (clumsily) removed and written to a file.

Related

Parse XML from site and get values from elements

I am writing a NodeJS script to get the XML from a website and get the values from a specific element.
However, when running it it says 'undefined'.
Code:
var http = require('http');
var xml2js = require('xml2js');
var parseString = require('xml2js').parseString;
var req = http.get("http://feeds.nos.nl/nosnieuwsvideo?format=xml", function(res) {
// save the data
var xml = '';
res.on('data', function(chunk) {
xml += chunk;
console.log(xml);
});
res.on('end', function() {
parseString(xml, function(err, result) {
console.log(result['title']);
});
});
});
req.on('error', function(err) {
// debug error
});
What's the problem?
Thanks
Instead of result['title'], try result.rss.channel[0].title
you're using the wrong path to access it.
alternatively, you can only parse what you need
const transform = require('camaro')
const result = transform(xml, { title: '//channel/title'})
console.log(result.title)
Thanks everyone for the answers.
I got it working by using
for (var i = 0; i < objects2.length; i++) {
var title = Object.values(objects2[i].title).toString();
title.replace("['", "").replace("']", "");
var description = Object.values(objects2[i].description).toString();
description.replace("['", "").replace("']", "");
var pubtime = Object.values(objects2[i].pubDate).toString();
pubtime.replace("['", "").replace("']", "");
var link = Object.values(objects2[i].link).toString();
link.replace("['", "").replace("']", "");
console.log("\n");
console.log(pubtime);
console.log(link);
console.log(title);
console.log(description);
}

How to download excel file from nodejs terminal

I am new to nodejs. Need your help. From the nodejs terminal, i want to download an excel file and convert it to csv (say, mocha online.js). Note: i don't want to do this via a browser.
Below is a script i am working on to download and convert to csv. There is no error nor the expected result:
online.js
if (typeof require !== 'undefined') XLSX = require('xlsx');
var XMLHttpRequest = require("xmlhttprequest").XMLHttpRequest;
/* set up XMLHttpRequest */
var url = "http://oss.sheetjs.com/js-xlsx/test_files/formula_stress_test_ajax.xlsx";
var xhr = new XMLHttpRequest();
xhr.open("GET", url, true);
xhr.responseType = "arraybuffer";
describe('suite', function () {
it('case', function () {
var arraybuffer = xhr.response;
/* convert data to binary string */
var data = new Uint8Array(arraybuffer);
var arr = new Array();
for (var i = 0; i != data.length; ++i) arr[i] = String.fromCharCode(data[i]);
var bstr = arr.join("");
/* Call XLSX */
var sheetName = 'Database';
var workbook = XLSX.read(bstr, { type: "binary" });
var worksheet = workbook.Sheets[sheetName];
var csv = XLSX.utils.sheet_to_csv(worksheet);
console.log(csv);
xhr.send();
//.... perform validations here using the csv data
});
})}
I tried myself with this code, and it seems it is working, the only thing is that I spent 15 minutes trying to understand why my open office would not open the file, I eventually understood that they were sending a zip file ... here is the full code, the doc of the http get function is here http.get
You could have used the request module, but it isn't native, request is easier though.
enjoy!
const url = 'http://oss.sheetjs.com/js-xlsx/test_files/formula_stress_test_ajax.xlsx'
const http = require('http')
const fs = require('fs')
http.get(url, (res) => {
debugger
const {
statusCode
} = res;
const contentType = res.headers['content-type'];
console.log(`The type of the file is : ${contentType}`)
let error;
if (statusCode !== 200) {
error = new Error(`Request Failed.\n` +
`Status Code: ${statusCode}`);
}
if (error) {
console.error(error.message);
// consume response data to free up memory
res.resume();
return;
}
res.setEncoding('binary');
let rawData = '';
res.on('data', (chunk) => {
rawData += chunk;
});
res.on('end', () => {
try {
const parsedData = xlsxToCSVFunction(rawData);
// And / Or just put it in a file
fs.writeFileSync('fileName.zip', rawData, 'binary')
// console.log(parsedData);
} catch (e) {
console.error(e.message);
}
});
}).on('error', (e) => {
console.error(`Got error: ${e.message}`);
});
function xlsxToCSVFunction(rawData) {
return rawData //you should return the csv file here whatever your tools are
}
I actually encountered the same problem 3 months ago : here is what I did!
I did not find any nodeJS module that was exactly as I wanted, so I used in2csv (a python shell program) to transform the data; the t option is to use tabulation as the delimiter
1) Step 1: transforming the xlsx file into csv using in2csv
This code takes all the xlsx files in the current directory, transform them into csv files and put them in another directory
var shelljs = require('shelljs/global')
var dir = pwd().stdout.split('/')
dir = dir[dir.length - 1].replace(/\s/g, '\\ ')
mkdir('../'+ dir + 'CSV')
ls('*.xlsx').forEach(function(file) {
// below are the two lines you need
let string = 'in2csv -t ' + file.replace(/\s/g, '\\ ') + ' > ../'+ dir + 'CSV/' + file.replace('xlsx','csv').replace(/\s/g, '\\ ')
exec(string, {silent:true}, function(code, stdout, stderr){
console.log('new file : ' + file.replace('xlsx','csv'))
if(stderr){
console.log(string)
console.log('Program stderr:', stderr)
}
})
});
Step 2: loading the data in a nodejs program:
my script is very long but the main two lines are :
const args = fileContent.split('\n')[0].split(',')
const content = fileContent.split('\n').slice(1).map(e => e.split(','))
And for the benefit of seekers like me...here is a solution using mocha, request and xlsx
var request = require('request');
var XLSX = require('xlsx');
describe('suite', function () {
it('case', function (done) {
var url = "http://oss.sheetjs.com/js-xlsx/test_files/formula_stress_test_ajax.xlsx";
var options = {
url: url,
headers: {
'Content-Type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
},
encoding: null
};
request.get(options, function (err, res, body){
var arraybuffer = body;
/* convert data to binary string */
var data = arraybuffer;
//var data = new Uint8Array(arraybuffer);
var arr = new Array();
for (var i = 0; i != data.length; ++i) arr[i] = String.fromCharCode(data[i]);
var bstr = arr.join("");
/* Call XLSX */
var sheetName = 'Database';
var workbook = XLSX.read(bstr, { type: "binary" });
var worksheet = workbook.Sheets[sheetName];
var csv = XLSX.utils.sheet_to_csv(worksheet);
console.log(csv);
done();
});
});
});

Node.js - MJPEG TCP stream to base64 images

Based on paparazzo.js lib, I'm trying to get base64 images from a MJPEG stream (streamed over TCP with GStreamer) in a Node.js server, and to send them to the clients via websockets.
I think I'm pretty close, but my images are corrupted. Here is the code I'm using :
var boundary = "----videoboundary";
var data = "";
var tcpServer = net.createServer(function (socket) {
socket.on('data', function(chunk) {
var boundaryIndex = chunk.toString().indexOf(boundary);
if (boundaryIndex !== -1) {
// Get the image's last piece of data :
data += chunk.toString().substring(0, boundaryIndex);
// Convert the data to a base64 image and broadcast it :
var image = new Buffer(data).toString('base64');
io.sockets.emit('image', image);
// Reset the data :
data = '';
// Get the remaining data (with new image's headers) :
var remaining = chunk.toString().substring(boundaryIndex);
// Remove the new image's headers and add the remaining data :
var contentTypeMatches = remaining.match(/Content-Type:\s+image\/jpeg\s+/);
var contentLengthMatches = remaining.match(/Content-Length:\s+(\d+)\s+/);
if(contentLengthMatches != null && contentLengthMatches.length > 1) {
var newImageBeginning = remaining.indexOf(contentLengthMatches[0]) + contentLengthMatches[0].length;
data += remaining.substring(newImageBeginning);
}
else if(contentTypeMatches != null) {
var newImageBeginning = remaining.indexOf(contentTypeMatches[0]) + contentTypeMatches[0].length;
data += remaining.substring(newImageBeginning);
}
else {
var newImageBeginning = boundaryIndex + boundary.length;
data += remaining.substring(newImageBeginning);
io.sockets.emit('error', { message: 'Could not find beginning of next image' });
}
}
else {
data += chunk;
}
});
});
Any idea ?
Thanks
chunk.toString() converts the binary Buffer to a utf8-encoded string (by default), so for binary image data that will probably cause you some problems.
Another option that might help simplify things for you is to use the dicer module. With that, your code may look like:
var Dicer = require('dicer');
var boundary = '----videoboundary';
var tcpServer = net.createServer(function(socket) {
var dice = new Dicer({ boundary: boundary });
dice.on('part', function(part) {
var frameEncoded = '';
part.setEncoding('base64');
part.on('header', function(header) {
// here you can verify content-type, content-length, or any other header
// values if you need to
}).on('data', function(data) {
frameEncoded += data;
}).on('end', function() {
io.sockets.emit('image', frameEncoded);
});
}).on('finish', function() {
console.log('End of parts');
});
socket.pipe(dice);
});

MongoDB Time conversion

I have a MongoDB collection that I am querying based on a time frame and address number. If the query is successful, then the server will return a CSV file with a number of attributes stored in each query entry. This part is working fine.
The problem is that one of the attributes in my CSV file is the timestamp. I'd like it to return the local date time (ie. "Time":"2014-02-09T06:00:02.000Z")... however the CSV file is returning the date object in milliseconds (ie. "Time":1392040717774). Is there an easy way to transform the query entry before it's written to the CSV file to a local date string? Here's my code snippet:
var JSONStream = require('JSONStream');
var httpserver = http.createServer(function(req, res) {
var pathname = url.parse(req.url).pathname;
if (pathname=="/DownloadUCData") {
var requestData = '';
req.on('data', function (data) {
requestData += data;
});
req.on('end', function () {
var json = JSON.parse(requestData);
var st = new Date(json.startDate);
var et = new Date(json.endDate);
st.setHours(st.getHours()-4); //compensate for GMT offset
et.setHours(et.getHours()-4);
st=st.getTime();
et=et.getTime();
var proj=JSON.parse('{"state":1, "temperature":1, "mode":1, "speed":1, "time":1}');
var cursor = userControlCollection.find({"addr": json.addr.toString(), "time": {$gte:st, $lte:et}}, proj);
var dbstream = cursor.stream();
var tempname = json.type+".csv";
var wstream = fs.createWriteStream(tempname);
wstream.on('error', function(e){console.error(e);});
dbstream.on("end", function() {
wstream.end();
console.log("write end");
res.writeHead(200, {"Content-Type": "application/json"});
res.write(JSON.stringify({fname:tempname}));
res.end();
return;
});
var jsonToStrings = JSONStream.stringify(false);
dbstream.pipe(jsonToStrings).pipe(wstream);
});
}
So, I figured out one way to solve this problem (although there may be others). Basically, I had to add a transformation into the piping system to convert the .getTime() data into a new Date() object. Here's the code snippet which seemed to resolve the issue:
var Transform = require('stream').Transform;
var parser = new Transform({objectMode: true});
parser._transform = function(data, encoding, done) {
if(data.time) data.time = new Date(data.time);
this.push(data);
done();
};
var jsonToStrings = JSONStream.stringify(false);
dbstream.pipe(parser).pipe(jsonToStrings).pipe(wstream);

Create Thumbnail Image using Windows Azure Blob Storage

I am trying to use the azure-sdk-for-node to save a streamed image to Windows Azure blob storage but without success. Below is the function that I call and pass the video object with the thumbnail property. Initially I fetch the image using the request object which fetches the image from another website and turn that into a base64 object which in turn gets converted into a stream object because Azure blob service uses createBlockBlobFromStream method as I couldn't use createBlockBlobFromFile or createBlockBlobFromText to upload the image to blob storage.
var azure = require('azure')
, uuid = require('node-uuid')
, http = require('http')
, url = require('url')
, mmm = require('mmmagic')
, Magic = mmm.Magic
, stream = require('stream');
function createVideoThumbnail(video, callback){
var bs = azure.createBlobService(config.storageAccount, config.storageAccessKey, config.blobHost);
var sURL = video.Thumbnail;
var oURL = url.parse(sURL);
var client = http.createClient(80, oURL.hostname);
var request = client.request('GET', oURL.pathname, {'host': oURL.hostname});
request.end();
request.on('response', function (response) {
var type = response.headers["content-type"];
var prefix = "data:" + type + ";base64,";
var body = "";
response.setEncoding('binary');
response.on('end', function () {
var base64 = new Buffer(body, 'binary').toString('base64');
var data = prefix + base64;
console.log('base64 image data ' + video.Thumbnail + ': ' + data + '\n');
var decodedImage = new Buffer(data, 'base64');
var magic = new Magic(mmm.MAGIC_MIME_TYPE);
magic.detect(decodedImage, function(err, result) {
if(err) {
throw err;
}
var bytes = 0;
var imageStream = new stream.Stream();
imageStream.writable = true;
imageStream.write = function(buf) {
bytes += buf.length;
imageStream.emit('data', buf);
};
imageStream.end = function(buf) {
//if(arguments.length) {
imageStream.write(buf);
//}
imageStream.writable = false;
imageStream.emit('end');
console.log(bytes + ' bytes written');
};
var options = {}
console.log('mmm = ' + result + '\n');
options.contentType = result;
options.contentTypeHeader = result;
console.log('\n');
bs.createBlockBlobFromStream(config.imageContainer, uuid().replace(/-/gi, "").toLowerCase() + '.jpg', imageStream, decodedImage.length, options, function(error, blobResult, response) {
if (error)
console.log('got error = ' + JSON.stringify(error) + '\n');
if (blobResult)
console.log('blobResult = ' + JSON.stringify(blobResult) + '\n');
if (response)
console.log('response = ' + JSON.stringify(response) + '\n');
// now store in Azure blob storage
callback();
});
imageStream.end(decodedImage);
});
});
response.on('data', function (chunk) {
if (response.statusCode == 200) body += chunk;
});
});
}
and this is how I call it:
createVideoThumbnail(video, function(){
console.log("returning from create thumbnails\n\n");
});
The function is not working it hangs and won't print out the the final log statement:
console.log("returning from create thumbnails\n\n");
However the base64 does seem to work as I am getting this for the encoding:

mmm = application/octet-stream
5283 bytes written
But I am not getting any of these log statements being printed:
if (error)
console.log('got error = ' + JSON.stringify(error) + '\n');
if (blobResult)
console.log('blobResult = ' + JSON.stringify(blobResult) + '\n');
if (response)
console.log('response = ' + JSON.stringify(response) + '\n');
So I am presuming it is hanging somewhere or I have not structured my code properly. Can anybody see what I am doing wrong ?
Cheers
Rob
My sources:
http://social.msdn.microsoft.com/Forums/en-US/wavirtualmachinesforlinux/thread/47bfe142-c459-4815-b09e-bd0a07ca18d5
Node.js base64 encode a downloaded image for use in data URI
Here's my simplified version. Since magic needs to operate on the whole file, there's no point in using the streaming blog API, instead this is written for the text api. body will be a buffer of the image, I suspect azure will be happy with it sans encoding, call toString('encoding') if it does need it.
var azure = require('azure')
, uuid = require('node-uuid')
, request = require('request')
, mmm = require('mmmagic')
, Magic = mmm.Magic
, stream = require('stream')
, bs = azure.createBlobService(config.storageAccount, config.storageAccessKey, config.blobHost);
function createVideoThumbnail(video, callback){
var sURL = video.Thumbnail;
request(sURL, {encoding:null}, function (err, res, body) {
// encoding:null makes request return a buffer, which is ideal to run magic.detect on
magic.detect(body, function (err, res) {
console.log(res);
var container = config.imageContainer;
var blob = uuid().replace(/-/gi, "").toLowerCase() + '.jpg';
var text = body; //might need to be converted into a string, I don't have azure setup to test
var options = {
contentType: res,
contentTypeHeader: res
};
bs.createBlockBlobFromText(container, blob, text, options, function(error, blobResult, response) {
if (error)
console.log('got error =', error);
// if you give console.log multiple arguments, it will format each of them,
// no need to manipulate objects into strings manually
if (blobResult)
console.log('blobResult =', blobResult);
if (response)
console.log('response =', response);
// now store in Azure blob storage
callback();
});
});
});
}
edit: temp file version
var azure = require('azure')
, uuid = require('node-uuid')
, request = require('request')
, mmm = require('mmmagic')
, Magic = mmm.Magic
, fs = require('fs')
, bs = azure.createBlobService(config.storageAccount, config.storageAccessKey, config.blobHost);
function createVideoThumbnail(video, callback){
var sURL = video.Thumbnail;
var name = uuid().replace(/-/gi, "").toLowerCase() + '.jpg';
var ws = fs.createWriteStream('./tmp/' + name);
request(sURL, {encoding:null})
.pipe(ws).on('close', function () {
console.log('downloaded');
magic.detectFile('./tmp/' + name, function (err, res) {
var container = config.imageContainer;
var blob = uuid().replace(/-/gi, "").toLowerCase() + '.jpg';
var options = {
contentType: res,
contentTypeHeader: res
};
bs.createBlockBlobFromFile(container, name, './tmp/' + name, function (err) {
callback();
});
});
});
}
Thanks to Valery Jacobs on msdn forums was able to come up with the answer. It's a nice clean solid solution using the stream, request and util object.
:)
http://social.msdn.microsoft.com/Forums/en-US/windowsazurepurchasing/thread/25c7705a-4ea0-4d9c-af09-cb48a031d06c

Resources