How to download excel file from nodejs terminal - node.js

I am new to nodejs. Need your help. From the nodejs terminal, i want to download an excel file and convert it to csv (say, mocha online.js). Note: i don't want to do this via a browser.
Below is a script i am working on to download and convert to csv. There is no error nor the expected result:
online.js
if (typeof require !== 'undefined') XLSX = require('xlsx');
var XMLHttpRequest = require("xmlhttprequest").XMLHttpRequest;
/* set up XMLHttpRequest */
var url = "http://oss.sheetjs.com/js-xlsx/test_files/formula_stress_test_ajax.xlsx";
var xhr = new XMLHttpRequest();
xhr.open("GET", url, true);
xhr.responseType = "arraybuffer";
describe('suite', function () {
it('case', function () {
var arraybuffer = xhr.response;
/* convert data to binary string */
var data = new Uint8Array(arraybuffer);
var arr = new Array();
for (var i = 0; i != data.length; ++i) arr[i] = String.fromCharCode(data[i]);
var bstr = arr.join("");
/* Call XLSX */
var sheetName = 'Database';
var workbook = XLSX.read(bstr, { type: "binary" });
var worksheet = workbook.Sheets[sheetName];
var csv = XLSX.utils.sheet_to_csv(worksheet);
console.log(csv);
xhr.send();
//.... perform validations here using the csv data
});
})}

I tried myself with this code, and it seems it is working, the only thing is that I spent 15 minutes trying to understand why my open office would not open the file, I eventually understood that they were sending a zip file ... here is the full code, the doc of the http get function is here http.get
You could have used the request module, but it isn't native, request is easier though.
enjoy!
const url = 'http://oss.sheetjs.com/js-xlsx/test_files/formula_stress_test_ajax.xlsx'
const http = require('http')
const fs = require('fs')
http.get(url, (res) => {
debugger
const {
statusCode
} = res;
const contentType = res.headers['content-type'];
console.log(`The type of the file is : ${contentType}`)
let error;
if (statusCode !== 200) {
error = new Error(`Request Failed.\n` +
`Status Code: ${statusCode}`);
}
if (error) {
console.error(error.message);
// consume response data to free up memory
res.resume();
return;
}
res.setEncoding('binary');
let rawData = '';
res.on('data', (chunk) => {
rawData += chunk;
});
res.on('end', () => {
try {
const parsedData = xlsxToCSVFunction(rawData);
// And / Or just put it in a file
fs.writeFileSync('fileName.zip', rawData, 'binary')
// console.log(parsedData);
} catch (e) {
console.error(e.message);
}
});
}).on('error', (e) => {
console.error(`Got error: ${e.message}`);
});
function xlsxToCSVFunction(rawData) {
return rawData //you should return the csv file here whatever your tools are
}

I actually encountered the same problem 3 months ago : here is what I did!
I did not find any nodeJS module that was exactly as I wanted, so I used in2csv (a python shell program) to transform the data; the t option is to use tabulation as the delimiter
1) Step 1: transforming the xlsx file into csv using in2csv
This code takes all the xlsx files in the current directory, transform them into csv files and put them in another directory
var shelljs = require('shelljs/global')
var dir = pwd().stdout.split('/')
dir = dir[dir.length - 1].replace(/\s/g, '\\ ')
mkdir('../'+ dir + 'CSV')
ls('*.xlsx').forEach(function(file) {
// below are the two lines you need
let string = 'in2csv -t ' + file.replace(/\s/g, '\\ ') + ' > ../'+ dir + 'CSV/' + file.replace('xlsx','csv').replace(/\s/g, '\\ ')
exec(string, {silent:true}, function(code, stdout, stderr){
console.log('new file : ' + file.replace('xlsx','csv'))
if(stderr){
console.log(string)
console.log('Program stderr:', stderr)
}
})
});
Step 2: loading the data in a nodejs program:
my script is very long but the main two lines are :
const args = fileContent.split('\n')[0].split(',')
const content = fileContent.split('\n').slice(1).map(e => e.split(','))

And for the benefit of seekers like me...here is a solution using mocha, request and xlsx
var request = require('request');
var XLSX = require('xlsx');
describe('suite', function () {
it('case', function (done) {
var url = "http://oss.sheetjs.com/js-xlsx/test_files/formula_stress_test_ajax.xlsx";
var options = {
url: url,
headers: {
'Content-Type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
},
encoding: null
};
request.get(options, function (err, res, body){
var arraybuffer = body;
/* convert data to binary string */
var data = arraybuffer;
//var data = new Uint8Array(arraybuffer);
var arr = new Array();
for (var i = 0; i != data.length; ++i) arr[i] = String.fromCharCode(data[i]);
var bstr = arr.join("");
/* Call XLSX */
var sheetName = 'Database';
var workbook = XLSX.read(bstr, { type: "binary" });
var worksheet = workbook.Sheets[sheetName];
var csv = XLSX.utils.sheet_to_csv(worksheet);
console.log(csv);
done();
});
});
});

Related

Parse XML from site and get values from elements

I am writing a NodeJS script to get the XML from a website and get the values from a specific element.
However, when running it it says 'undefined'.
Code:
var http = require('http');
var xml2js = require('xml2js');
var parseString = require('xml2js').parseString;
var req = http.get("http://feeds.nos.nl/nosnieuwsvideo?format=xml", function(res) {
// save the data
var xml = '';
res.on('data', function(chunk) {
xml += chunk;
console.log(xml);
});
res.on('end', function() {
parseString(xml, function(err, result) {
console.log(result['title']);
});
});
});
req.on('error', function(err) {
// debug error
});
What's the problem?
Thanks
Instead of result['title'], try result.rss.channel[0].title
you're using the wrong path to access it.
alternatively, you can only parse what you need
const transform = require('camaro')
const result = transform(xml, { title: '//channel/title'})
console.log(result.title)
Thanks everyone for the answers.
I got it working by using
for (var i = 0; i < objects2.length; i++) {
var title = Object.values(objects2[i].title).toString();
title.replace("['", "").replace("']", "");
var description = Object.values(objects2[i].description).toString();
description.replace("['", "").replace("']", "");
var pubtime = Object.values(objects2[i].pubDate).toString();
pubtime.replace("['", "").replace("']", "");
var link = Object.values(objects2[i].link).toString();
link.replace("['", "").replace("']", "");
console.log("\n");
console.log(pubtime);
console.log(link);
console.log(title);
console.log(description);
}

MIME Binary Data to PDF

I'm receiving some data from an API call that returns some XML and associated files (PDFs):
var req = http.request(HTTPOPTIONS, function(resp){
var rawData = '';
//Build list from response data
resp.on('data', function (chunk) {
rawData+= chunk;
});
//Process List
resp.on('end', function () {
var breakStr = rawData.split('\n')[0];
var fileSections = rawData.split(breakStr);
for(var i in fileSections){
var content = fileSections[i].split((/Content-Length: [0-9]*/));
var fileName = content[0].split('filename=')[1].trim();
var file = {Bucket : 'MyBucket', Key: ROOT+'/'+FOLDER+'/'+SUBFOLDER+'/'+fileName, Body: content[1]};
console.log('Creating file: '+file.Key );
promises.push(S3.upload(file).promise());
}
Promise.all(promises).then(...);
});
});
req.write(XMLREQUEST);
req.end();
But I when I try to open the file created I get [
Any ideas on where I'm going wrong?
UPDATE:
In addition to the above error message I also get [
On these files I get the metadata (Page size/formatting and Font data) but no content.
It appears the problem was because I was storing the data in a string and trying to manipulate it from there. The incoming data chunk is a buffer and using it in this form mean that, once you figure out how to remove the headers, you can create the PDF files.
var req = http.request(HTTPOPTIONS, function(resp){
var respChunks =[];
var respChunksLength = 0;
resp.on('data', function (chunk) {
respChunks.push(chunk);
respChunksLength+=chunk.length;
});
resp.on('end', function () {
var confResp = Buffer.concat(respChunks, respChunksLength);
var breakStr = confResp.slice(0,confResp.indexOf('\n'));
var bufferArray = [];
var startBreak = confResp.indexOf(breakStr);
while(startBreak>-1){
bufferArray.push(confResp.slice(startBreak+breakStr.length+1,confResp.indexOf(breakStr,startBreak+1)));
startBreak = confResp.indexOf(breakStr,startBreak+1);
}
var trim=0;
for(var i = 1; i<bufferArray.length;i++){
trim = bufferArray[i].toString().indexOf('%');
fs.writeFile('testFile'+i+'.pdf',bufferArray[1].slice(trim));
});
});
req.write(generateProposalDetailXML());
req.end();
the bufferArray is concatenated into a single buffer, this is then divided based on the MIME delimiter (breakStr) and the headers (clumsily) removed and written to a file.

XLSX File corrupted when sent from Node.js via Restify to Client

I am working on a project where I am creating an excel file using XLSX node.js library, sending it to a client via Restify where I then use the FileSaver.js library to save it on the local computer. When I write the xlsx workbook to file on the backend, it opens fine, however, when I open it on the client, it is corrupted. I get the error: "Excel cannot open this file. The file format or file extension is not valid. Verify that the file has not been corrupted and that the file extension matches the format of the file".
Here is my code for writing and sending the file on the backend:
var wopts = { bookType:'xlsx', bookSST:false, type:'binary' };
var workbook = xlsx.write(wb, wopts);
res.send(200, workbook);
On the front end, I am using code from the XLSX documentation:
function s2ab(s) {
var buf = new ArrayBuffer(s.length);
var view = new Uint8Array(buf);
for (var i=0; i!=s.length; ++i)
view[i] = s.charCodeAt(i) & 0xFF;
return buf;
}
saveAs(new Blob([s2ab(response.data)],{type:""}), "test.xlsx");
Any thoughts on why this would not work? Any help would be much appreciated. Thanks.
As Luke mentioned in the comments, you have to do a base64 encoding before sending the buffer. Here's a snippet that used the NPM module node-xlsx.
var xlsx = require('node-xlsx');
router.get('/history', function (req, res) {
var user = new User();
user.getHistory(req.user.userId, req.query.offset, req.query.limit)
.then(function (history) {
if (req.headers.contenttype && req.headers.contenttype.indexOf('excel') > -1) {
var data = [['Data', 'amount'], ['19/12/2016', '10']];
var xlsxBuffer = xlsx.build([{ name: 'History', data: data }]);
res.end(xlsxBuffer.toString('base64'));
} else {
res.send(history);
}
})
.catch(function (err) {
res.status(500).send(err);
});
});
And this is the frontend code using Angular:
$scope.getXlsFile = function() {
var config = {
params: {
offset: $scope.offset,
limit: $scope.limit
},
headers: {
'contentType': 'application/vnd.ms-excel',
'responseType': 'arraybuffer'
}
};
$http.get('/api/history', config)
.then(function(res) {
var blob = new Blob([convert.base64ToArrayBuffer(res.data)]);
FileSaver.saveAs(blob, 'historial.xlsx');
})
}
where convert is the following factory:
.factory('convert', function () {
return {
base64ToArrayBuffer: function (base64) {
var binary_string = window.atob(base64);
var len = binary_string.length;
var bytes = new Uint8Array(len);
for (var i = 0; i < len; i++) {
bytes[i] = binary_string.charCodeAt(i);
}
return bytes.buffer;
}
}
})

Can't upload file from Angularjs to Expressjs

I'm trying to upload a image from a AngularJS interface to a nodejs server (expressjs).
(I'm using mean.io)
Every time I upload someting, req.body logs "{}" and req.files logs "undefined"
I'm using angular-file-upload directive in AngularJS
Client-side code:
$scope.onFileSelect = function() {
console.log($files);
for (var i = 0; i < $files.length; i++) {
var file = $files[i];
$scope.upload = $upload.upload({
url: 'map/set',
method: 'POST',
headers: {'enctype': 'multipart/form-data'},
data: {myObj: $scope.myModelObj},
file: file,
}).progress(function(evt) {
console.log('percent: ' + parseInt(100.0 * evt.loaded / evt.total));
}).success(function(data, status, headers, config) {
// file is uploaded successfully
console.log(data);
});
}
};
Server-side code
var app = express();
require(appPath + '/server/config/express')(app, passport, db);
app.use(bodyParser({uploadDir:'./uploads'}));
app.post('/map/set', function(req, res) {
console.log(req.body);
console.log(req.files);
res.end('Success');
});
*****Edit*****
HTML Code
<div class="row">
<input id="file" type="file" ng-file-select="onFileSelect()" >
</div>
Hand built request
$scope.onFileSelect = function() {
//$files: an array of files selected, each file has name, size, and type.
//console.log($files);
var xhr = new XMLHttpRequest();
// not yet supported in most browsers, some examples use
// this but it's not safe.
// var fd = document.getElementById('upload').getFormData();
var fd = new FormData();
var files = document.getElementById('myfileinput').files;
console.log(files);
for(var i = 0;i<files.length; i++) {
fd.append("file", files[i]);
}
/* event listeners */
xhr.upload.addEventListener("progress", uploadProgress, false);
xhr.addEventListener("error", uploadFailed, false);
xhr.addEventListener("load", uploadComplete, false);
xhr.addEventListener("abort", uploadCanceled, false);
function uploadComplete(){
console.log("complete");
}
function uploadProgress(){
console.log("progress");
}
function uploadFailed(){
console.log("failed");
}
function uploadCanceled(){
console.log("canceled");
}
xhr.open("POST", "map/set");
xhr.send(fd);
};
The latest version of mean.io uncluding express 4.x as dependency. In the documentation for migration express 3 to 4 you can read, express will no longer user the connect middlewares. Read more about here: https://github.com/visionmedia/express/wiki/Migrating-from-3.x-to-4.x
The new body-parser module only handles urlencoded and json bodies. That means for multipart bodies (file uploads) you need an additional module like busboy or formadible.
Here is an example how I use angular-file-upload with busboy:
The AngularJS Stuff:
$upload.upload({
url: '/api/office/imageUpload',
data: {},
file: $scope.$files
}) …
I write a little helper module to handle uploads with busboy easier. It’s not very clean coded, but do the work:
var env = process.env.NODE_ENV || 'development';
var Busboy = require('busboy'),
os = require('os'),
path = require('path'),
config = require('../config/config')[env],
fs = require('fs');
// TODO: implement file size limit
exports.processFileUpload = function(req, allowedExtensions, callback){
var busboy = new Busboy({ headers: req.headers });
var tempFile = '';
var fileExtenstion = '';
var formPayload = {};
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
fileExtenstion = path.extname(filename).toLowerCase();
tempFile = path.join(os.tmpDir(), path.basename(fieldname)+fileExtenstion);
file.pipe(fs.createWriteStream(tempFile));
});
busboy.on('field', function(fieldname, val, fieldnameTruncated, valTruncated) {
var jsonValue = '';
try {
jsonValue = JSON.parse(val);
} catch (e) {
jsonValue = val;
}
formPayload[fieldname] = jsonValue;
});
busboy.on('finish', function() {
if(allowedExtensions.length > 0){
if(allowedExtensions.indexOf(fileExtenstion) == -1) {
callback({message: 'extension_not_allowed'}, tempFile, formPayload);
} else {
callback(null, tempFile, formPayload)
}
} else {
callback(null, tempFile, formPayload)
}
});
return req.pipe(busboy);
}
In my controller i can use the module that way:
var uploader = require('../helper/uploader'),
path = require('path');
exports.uploadEmployeeImage = function(req,res){
uploader.processFileUpload(req, ['.jpg', '.jpeg', '.png'], function(uploadError, tempPath, formPayload){
var fileExtenstion = path.extname(tempPath).toLowerCase();
var targetPath = "/exampleUploadDir/testFile" + fileExtenstion;
fs.rename(tempPath, targetPath, function(error) {
if(error){
return callback("cant upload employee image");
}
callback(null, newFileName);
});
});
}
I'm going to take a guess here that the header settings are incorrect.
headers: {'enctype': 'multipart/form-data'},
Should be changed to:
headers: {'Content-Type': 'multipart/form-data'},
Ensure you have an 'id' AND 'name' attribute on the file input - not having an id attribute can cause problems on some browsers. Also, try building the request like this:
var xhr = new XMLHttpRequest();
// not yet supported in most browsers, some examples use
// this but it's not safe.
// var fd = document.getElementById('upload').getFormData();
var fd = new FormData();
var files = document.getElementById('myfileinput').files;
for(var i = 0;i<files.length; i++) {
fd.append("file", files[i]);
}
/* event listeners */
xhr.upload.addEventListener("progress", uploadProgress, false);
xhr.addEventListener("error", uploadFailed, false);
xhr.addEventListener("load", uploadComplete, false);
xhr.addEventListener("abort", uploadCanceled, false);
xhr.open("POST", "your/url");
xhr.send(fd);
angular isn't great with file uploads so doing it by hand might help.

Create Thumbnail Image using Windows Azure Blob Storage

I am trying to use the azure-sdk-for-node to save a streamed image to Windows Azure blob storage but without success. Below is the function that I call and pass the video object with the thumbnail property. Initially I fetch the image using the request object which fetches the image from another website and turn that into a base64 object which in turn gets converted into a stream object because Azure blob service uses createBlockBlobFromStream method as I couldn't use createBlockBlobFromFile or createBlockBlobFromText to upload the image to blob storage.
var azure = require('azure')
, uuid = require('node-uuid')
, http = require('http')
, url = require('url')
, mmm = require('mmmagic')
, Magic = mmm.Magic
, stream = require('stream');
function createVideoThumbnail(video, callback){
var bs = azure.createBlobService(config.storageAccount, config.storageAccessKey, config.blobHost);
var sURL = video.Thumbnail;
var oURL = url.parse(sURL);
var client = http.createClient(80, oURL.hostname);
var request = client.request('GET', oURL.pathname, {'host': oURL.hostname});
request.end();
request.on('response', function (response) {
var type = response.headers["content-type"];
var prefix = "data:" + type + ";base64,";
var body = "";
response.setEncoding('binary');
response.on('end', function () {
var base64 = new Buffer(body, 'binary').toString('base64');
var data = prefix + base64;
console.log('base64 image data ' + video.Thumbnail + ': ' + data + '\n');
var decodedImage = new Buffer(data, 'base64');
var magic = new Magic(mmm.MAGIC_MIME_TYPE);
magic.detect(decodedImage, function(err, result) {
if(err) {
throw err;
}
var bytes = 0;
var imageStream = new stream.Stream();
imageStream.writable = true;
imageStream.write = function(buf) {
bytes += buf.length;
imageStream.emit('data', buf);
};
imageStream.end = function(buf) {
//if(arguments.length) {
imageStream.write(buf);
//}
imageStream.writable = false;
imageStream.emit('end');
console.log(bytes + ' bytes written');
};
var options = {}
console.log('mmm = ' + result + '\n');
options.contentType = result;
options.contentTypeHeader = result;
console.log('\n');
bs.createBlockBlobFromStream(config.imageContainer, uuid().replace(/-/gi, "").toLowerCase() + '.jpg', imageStream, decodedImage.length, options, function(error, blobResult, response) {
if (error)
console.log('got error = ' + JSON.stringify(error) + '\n');
if (blobResult)
console.log('blobResult = ' + JSON.stringify(blobResult) + '\n');
if (response)
console.log('response = ' + JSON.stringify(response) + '\n');
// now store in Azure blob storage
callback();
});
imageStream.end(decodedImage);
});
});
response.on('data', function (chunk) {
if (response.statusCode == 200) body += chunk;
});
});
}
and this is how I call it:
createVideoThumbnail(video, function(){
console.log("returning from create thumbnails\n\n");
});
The function is not working it hangs and won't print out the the final log statement:
console.log("returning from create thumbnails\n\n");
However the base64 does seem to work as I am getting this for the encoding:

mmm = application/octet-stream
5283 bytes written
But I am not getting any of these log statements being printed:
if (error)
console.log('got error = ' + JSON.stringify(error) + '\n');
if (blobResult)
console.log('blobResult = ' + JSON.stringify(blobResult) + '\n');
if (response)
console.log('response = ' + JSON.stringify(response) + '\n');
So I am presuming it is hanging somewhere or I have not structured my code properly. Can anybody see what I am doing wrong ?
Cheers
Rob
My sources:
http://social.msdn.microsoft.com/Forums/en-US/wavirtualmachinesforlinux/thread/47bfe142-c459-4815-b09e-bd0a07ca18d5
Node.js base64 encode a downloaded image for use in data URI
Here's my simplified version. Since magic needs to operate on the whole file, there's no point in using the streaming blog API, instead this is written for the text api. body will be a buffer of the image, I suspect azure will be happy with it sans encoding, call toString('encoding') if it does need it.
var azure = require('azure')
, uuid = require('node-uuid')
, request = require('request')
, mmm = require('mmmagic')
, Magic = mmm.Magic
, stream = require('stream')
, bs = azure.createBlobService(config.storageAccount, config.storageAccessKey, config.blobHost);
function createVideoThumbnail(video, callback){
var sURL = video.Thumbnail;
request(sURL, {encoding:null}, function (err, res, body) {
// encoding:null makes request return a buffer, which is ideal to run magic.detect on
magic.detect(body, function (err, res) {
console.log(res);
var container = config.imageContainer;
var blob = uuid().replace(/-/gi, "").toLowerCase() + '.jpg';
var text = body; //might need to be converted into a string, I don't have azure setup to test
var options = {
contentType: res,
contentTypeHeader: res
};
bs.createBlockBlobFromText(container, blob, text, options, function(error, blobResult, response) {
if (error)
console.log('got error =', error);
// if you give console.log multiple arguments, it will format each of them,
// no need to manipulate objects into strings manually
if (blobResult)
console.log('blobResult =', blobResult);
if (response)
console.log('response =', response);
// now store in Azure blob storage
callback();
});
});
});
}
edit: temp file version
var azure = require('azure')
, uuid = require('node-uuid')
, request = require('request')
, mmm = require('mmmagic')
, Magic = mmm.Magic
, fs = require('fs')
, bs = azure.createBlobService(config.storageAccount, config.storageAccessKey, config.blobHost);
function createVideoThumbnail(video, callback){
var sURL = video.Thumbnail;
var name = uuid().replace(/-/gi, "").toLowerCase() + '.jpg';
var ws = fs.createWriteStream('./tmp/' + name);
request(sURL, {encoding:null})
.pipe(ws).on('close', function () {
console.log('downloaded');
magic.detectFile('./tmp/' + name, function (err, res) {
var container = config.imageContainer;
var blob = uuid().replace(/-/gi, "").toLowerCase() + '.jpg';
var options = {
contentType: res,
contentTypeHeader: res
};
bs.createBlockBlobFromFile(container, name, './tmp/' + name, function (err) {
callback();
});
});
});
}
Thanks to Valery Jacobs on msdn forums was able to come up with the answer. It's a nice clean solid solution using the stream, request and util object.
:)
http://social.msdn.microsoft.com/Forums/en-US/windowsazurepurchasing/thread/25c7705a-4ea0-4d9c-af09-cb48a031d06c

Resources