Parse XML from site and get values from elements - node.js

I am writing a NodeJS script to get the XML from a website and get the values from a specific element.
However, when running it it says 'undefined'.
Code:
var http = require('http');
var xml2js = require('xml2js');
var parseString = require('xml2js').parseString;
var req = http.get("http://feeds.nos.nl/nosnieuwsvideo?format=xml", function(res) {
// save the data
var xml = '';
res.on('data', function(chunk) {
xml += chunk;
console.log(xml);
});
res.on('end', function() {
parseString(xml, function(err, result) {
console.log(result['title']);
});
});
});
req.on('error', function(err) {
// debug error
});
What's the problem?
Thanks

Instead of result['title'], try result.rss.channel[0].title

you're using the wrong path to access it.
alternatively, you can only parse what you need
const transform = require('camaro')
const result = transform(xml, { title: '//channel/title'})
console.log(result.title)

Thanks everyone for the answers.
I got it working by using
for (var i = 0; i < objects2.length; i++) {
var title = Object.values(objects2[i].title).toString();
title.replace("['", "").replace("']", "");
var description = Object.values(objects2[i].description).toString();
description.replace("['", "").replace("']", "");
var pubtime = Object.values(objects2[i].pubDate).toString();
pubtime.replace("['", "").replace("']", "");
var link = Object.values(objects2[i].link).toString();
link.replace("['", "").replace("']", "");
console.log("\n");
console.log(pubtime);
console.log(link);
console.log(title);
console.log(description);
}

Related

Encapsulate node.js code in a function

So far I have the below code which I need to get the id from a JSON object:
var http = require("http");
var fs = require('fs');
//var bodyParser = require("body-parser");
//var moment = require("moment");
var options = {
"method" : "GET",
"hostname" : "xxx.xxx.xxx.xxx",
"port" : "18080",
"path" : "/api/v1/applications/"
};
var req = http.request(options, function (res) {
var chunks = [];
res.on("data", function (chunk) {
chunks.push(chunk);
});
res.on("end", function () {
var body = JSON.parse(Buffer.concat(chunks));
var arrFound = Object.keys(body).filter(function(key) {
if (body[key].name.indexOf("TestName") > -1) {
return body[key].name;
}
}).reduce(function(obj, key){
obj = body[key].id;
return obj;
}, {});;
//console.log("Name: ", arrFound);
console.log("ID: ", arrFound);
});
});
req.end();
I know it's reading the id as I currently write it out to the console(console.log("ID: ", arrFound);).
What I would like to do is have this available for use in other parts of my program. I assume I need to do something like encapsulate it in a function that I can call but node.js is new to me and I'm not entirely sure how/what to do.
Can anybody advise how to do this or point me to some resource that might help?
You should research the module pattern and exports. You are right, the basic idea is to export your code as a function. Based on how you've approached this, it should take a callback function. I've made a naive attempt with what you have.
Notice I pass a null as the first argument to the callback function. It is conventional when using callbacks in node to use the first argument for errors. You probably need some error handling.
Having said that, you night look into some libraries like requests, or the ES6 fetch function. These will allow you to organize your code more neatly. You'll probably wind up using promises instead of callbacks.
var http = require("http");
var fs = require('fs');
//var bodyParser = require("body-parser");
//var moment = require("moment");
var options = {
"method" : "GET",
"hostname" : "xxx.xxx.xxx.xxx",
"port" : "18080",
"path" : "/api/v1/applications/"
};
exports.getId = function(callback) {
var req = http.request(options, function (res) {
var chunks = [];
res.on("data", function (chunk) {
chunks.push(chunk);
});
res.on("end", function () {
var body = JSON.parse(Buffer.concat(chunks));
var arrFound = Object.keys(body).filter(function(key) {
if (body[key].name.indexOf("TestName") > -1) {
return body[key].name;
}
}).reduce(function(obj, key){
obj = body[key].id;
return obj;
}, {});;
//console.log("Name: ", arrFound);
callback(null, arrFound));
});
});
req.end();
}
You call it like this:
exports.getId(function(err, id){
console.log(id)
})

How to download excel file from nodejs terminal

I am new to nodejs. Need your help. From the nodejs terminal, i want to download an excel file and convert it to csv (say, mocha online.js). Note: i don't want to do this via a browser.
Below is a script i am working on to download and convert to csv. There is no error nor the expected result:
online.js
if (typeof require !== 'undefined') XLSX = require('xlsx');
var XMLHttpRequest = require("xmlhttprequest").XMLHttpRequest;
/* set up XMLHttpRequest */
var url = "http://oss.sheetjs.com/js-xlsx/test_files/formula_stress_test_ajax.xlsx";
var xhr = new XMLHttpRequest();
xhr.open("GET", url, true);
xhr.responseType = "arraybuffer";
describe('suite', function () {
it('case', function () {
var arraybuffer = xhr.response;
/* convert data to binary string */
var data = new Uint8Array(arraybuffer);
var arr = new Array();
for (var i = 0; i != data.length; ++i) arr[i] = String.fromCharCode(data[i]);
var bstr = arr.join("");
/* Call XLSX */
var sheetName = 'Database';
var workbook = XLSX.read(bstr, { type: "binary" });
var worksheet = workbook.Sheets[sheetName];
var csv = XLSX.utils.sheet_to_csv(worksheet);
console.log(csv);
xhr.send();
//.... perform validations here using the csv data
});
})}
I tried myself with this code, and it seems it is working, the only thing is that I spent 15 minutes trying to understand why my open office would not open the file, I eventually understood that they were sending a zip file ... here is the full code, the doc of the http get function is here http.get
You could have used the request module, but it isn't native, request is easier though.
enjoy!
const url = 'http://oss.sheetjs.com/js-xlsx/test_files/formula_stress_test_ajax.xlsx'
const http = require('http')
const fs = require('fs')
http.get(url, (res) => {
debugger
const {
statusCode
} = res;
const contentType = res.headers['content-type'];
console.log(`The type of the file is : ${contentType}`)
let error;
if (statusCode !== 200) {
error = new Error(`Request Failed.\n` +
`Status Code: ${statusCode}`);
}
if (error) {
console.error(error.message);
// consume response data to free up memory
res.resume();
return;
}
res.setEncoding('binary');
let rawData = '';
res.on('data', (chunk) => {
rawData += chunk;
});
res.on('end', () => {
try {
const parsedData = xlsxToCSVFunction(rawData);
// And / Or just put it in a file
fs.writeFileSync('fileName.zip', rawData, 'binary')
// console.log(parsedData);
} catch (e) {
console.error(e.message);
}
});
}).on('error', (e) => {
console.error(`Got error: ${e.message}`);
});
function xlsxToCSVFunction(rawData) {
return rawData //you should return the csv file here whatever your tools are
}
I actually encountered the same problem 3 months ago : here is what I did!
I did not find any nodeJS module that was exactly as I wanted, so I used in2csv (a python shell program) to transform the data; the t option is to use tabulation as the delimiter
1) Step 1: transforming the xlsx file into csv using in2csv
This code takes all the xlsx files in the current directory, transform them into csv files and put them in another directory
var shelljs = require('shelljs/global')
var dir = pwd().stdout.split('/')
dir = dir[dir.length - 1].replace(/\s/g, '\\ ')
mkdir('../'+ dir + 'CSV')
ls('*.xlsx').forEach(function(file) {
// below are the two lines you need
let string = 'in2csv -t ' + file.replace(/\s/g, '\\ ') + ' > ../'+ dir + 'CSV/' + file.replace('xlsx','csv').replace(/\s/g, '\\ ')
exec(string, {silent:true}, function(code, stdout, stderr){
console.log('new file : ' + file.replace('xlsx','csv'))
if(stderr){
console.log(string)
console.log('Program stderr:', stderr)
}
})
});
Step 2: loading the data in a nodejs program:
my script is very long but the main two lines are :
const args = fileContent.split('\n')[0].split(',')
const content = fileContent.split('\n').slice(1).map(e => e.split(','))
And for the benefit of seekers like me...here is a solution using mocha, request and xlsx
var request = require('request');
var XLSX = require('xlsx');
describe('suite', function () {
it('case', function (done) {
var url = "http://oss.sheetjs.com/js-xlsx/test_files/formula_stress_test_ajax.xlsx";
var options = {
url: url,
headers: {
'Content-Type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
},
encoding: null
};
request.get(options, function (err, res, body){
var arraybuffer = body;
/* convert data to binary string */
var data = arraybuffer;
//var data = new Uint8Array(arraybuffer);
var arr = new Array();
for (var i = 0; i != data.length; ++i) arr[i] = String.fromCharCode(data[i]);
var bstr = arr.join("");
/* Call XLSX */
var sheetName = 'Database';
var workbook = XLSX.read(bstr, { type: "binary" });
var worksheet = workbook.Sheets[sheetName];
var csv = XLSX.utils.sheet_to_csv(worksheet);
console.log(csv);
done();
});
});
});

MIME Binary Data to PDF

I'm receiving some data from an API call that returns some XML and associated files (PDFs):
var req = http.request(HTTPOPTIONS, function(resp){
var rawData = '';
//Build list from response data
resp.on('data', function (chunk) {
rawData+= chunk;
});
//Process List
resp.on('end', function () {
var breakStr = rawData.split('\n')[0];
var fileSections = rawData.split(breakStr);
for(var i in fileSections){
var content = fileSections[i].split((/Content-Length: [0-9]*/));
var fileName = content[0].split('filename=')[1].trim();
var file = {Bucket : 'MyBucket', Key: ROOT+'/'+FOLDER+'/'+SUBFOLDER+'/'+fileName, Body: content[1]};
console.log('Creating file: '+file.Key );
promises.push(S3.upload(file).promise());
}
Promise.all(promises).then(...);
});
});
req.write(XMLREQUEST);
req.end();
But I when I try to open the file created I get [
Any ideas on where I'm going wrong?
UPDATE:
In addition to the above error message I also get [
On these files I get the metadata (Page size/formatting and Font data) but no content.
It appears the problem was because I was storing the data in a string and trying to manipulate it from there. The incoming data chunk is a buffer and using it in this form mean that, once you figure out how to remove the headers, you can create the PDF files.
var req = http.request(HTTPOPTIONS, function(resp){
var respChunks =[];
var respChunksLength = 0;
resp.on('data', function (chunk) {
respChunks.push(chunk);
respChunksLength+=chunk.length;
});
resp.on('end', function () {
var confResp = Buffer.concat(respChunks, respChunksLength);
var breakStr = confResp.slice(0,confResp.indexOf('\n'));
var bufferArray = [];
var startBreak = confResp.indexOf(breakStr);
while(startBreak>-1){
bufferArray.push(confResp.slice(startBreak+breakStr.length+1,confResp.indexOf(breakStr,startBreak+1)));
startBreak = confResp.indexOf(breakStr,startBreak+1);
}
var trim=0;
for(var i = 1; i<bufferArray.length;i++){
trim = bufferArray[i].toString().indexOf('%');
fs.writeFile('testFile'+i+'.pdf',bufferArray[1].slice(trim));
});
});
req.write(generateProposalDetailXML());
req.end();
the bufferArray is concatenated into a single buffer, this is then divided based on the MIME delimiter (breakStr) and the headers (clumsily) removed and written to a file.

Node appendFile not appending data chunk to file on filesystem

I have a program that is trying to get the values from the request using curl and store them in a file and serve the stored content back. The decision to store or append the contents in file are based on a query parameter appendFlag
Now when i run this program what i am getting in console is "true" and "appending" This suggests that it indeed reads the flag goes to the if part but somehow the appendFile function is not working.
var http = require('http');
var url = require('url');
var querystring = require('querystring');
var fs = require('fs');
http.createServer(function(request,response){
var str = request.url.split('?')[1];
var query = querystring.parse(str);
var writeStream = fs.createWriteStream(query['fileName']);
console.log("query - ");
console.log(query["appendFlag"]);
request.on('data',function(chunk){
if(query["appendFlag"]=="true"){
console.log("appending");
fs.appendFile(query['fileName'],chunk.toString(),function(err){
if(err) throw err;
});
}else{
var bufferGood = writeStream.write(chunk);
if(!bufferGood) request.pause();
}
});
request.on('end',function(){
response.writeHead(200);
response.write("\n Content with this url is - \n");
var readStream = fs.createReadStream(query['fileName'],{bufferSize:64*1024});
readStream.on('data',function(chunk){
response.write(chunk.toString());
});
readStream.on('end',function(){
response.write("\n");
response.end();
});
});
writeStream.on('drain',function(){
request.resume();
});
}).listen(8080);
Then after reading an answer from SO( How to create appending writeStream in Node.js ) i tried -
// Program to extract url from the request and writing in that particular file
var http = require('http');
var url = require('url');
var querystring = require('querystring');
var fs = require('fs');
http.createServer(function(request,response){
var str = request.url.split('?')[1];
var query = querystring.parse(str);
var writeStream = fs.createWriteStream(query['fileName']);
options = {
'flags': 'a' ,
'encoding' : null,
'mode' : 0666
}
var appendStream = fs.createWriteStream(query['fileName'],[options]);
console.log("query - ");
console.log(query["appendFlag"]);
request.on('data',function(chunk){
if(query["appendFlag"]=="true"){
console.log("appending");
var bufferGood = appendStream.write(chunk.toString());
if(!bufferGood) request.pause();
}else{
var bufferGood = writeStream.write(chunk.toString());
if(!bufferGood) request.pause();
}
});
request.on('end',function(){
response.writeHead(200);
response.write("\n Content with this url is - \n");
var readStream = fs.createReadStream(query['fileName'],{bufferSize:64*1024});
readStream.on('data',function(chunk){
response.write(chunk.toString());
});
readStream.on('end',function(){
response.write("\n");
response.end();
});
});
writeStream.on('drain',function(){
request.resume();
});
}).listen(8080);
That is changed the flag to the 'a' and it also did not append the data?
Your can use your first variant. But before appendFile() you've opened writeStream for the same query["filename"]. The stream is already opened.
var writeStream = fs.createWriteStream(query['fileName']);
options = {
'flags': 'a' ,
'encoding' : null,
'mode' : 0666
}
var appendStream = fs.createWriteStream(query['fileName'],[options]);
May be it's better to do something like:
var options = {
flags: query.appendFile ? 'w' : 'a'
...
Next: why [options]? You should remove the brackets.
Next: there is no guarantee you'll have filename param in querystring. Please handle this situation.

MongoDB Time conversion

I have a MongoDB collection that I am querying based on a time frame and address number. If the query is successful, then the server will return a CSV file with a number of attributes stored in each query entry. This part is working fine.
The problem is that one of the attributes in my CSV file is the timestamp. I'd like it to return the local date time (ie. "Time":"2014-02-09T06:00:02.000Z")... however the CSV file is returning the date object in milliseconds (ie. "Time":1392040717774). Is there an easy way to transform the query entry before it's written to the CSV file to a local date string? Here's my code snippet:
var JSONStream = require('JSONStream');
var httpserver = http.createServer(function(req, res) {
var pathname = url.parse(req.url).pathname;
if (pathname=="/DownloadUCData") {
var requestData = '';
req.on('data', function (data) {
requestData += data;
});
req.on('end', function () {
var json = JSON.parse(requestData);
var st = new Date(json.startDate);
var et = new Date(json.endDate);
st.setHours(st.getHours()-4); //compensate for GMT offset
et.setHours(et.getHours()-4);
st=st.getTime();
et=et.getTime();
var proj=JSON.parse('{"state":1, "temperature":1, "mode":1, "speed":1, "time":1}');
var cursor = userControlCollection.find({"addr": json.addr.toString(), "time": {$gte:st, $lte:et}}, proj);
var dbstream = cursor.stream();
var tempname = json.type+".csv";
var wstream = fs.createWriteStream(tempname);
wstream.on('error', function(e){console.error(e);});
dbstream.on("end", function() {
wstream.end();
console.log("write end");
res.writeHead(200, {"Content-Type": "application/json"});
res.write(JSON.stringify({fname:tempname}));
res.end();
return;
});
var jsonToStrings = JSONStream.stringify(false);
dbstream.pipe(jsonToStrings).pipe(wstream);
});
}
So, I figured out one way to solve this problem (although there may be others). Basically, I had to add a transformation into the piping system to convert the .getTime() data into a new Date() object. Here's the code snippet which seemed to resolve the issue:
var Transform = require('stream').Transform;
var parser = new Transform({objectMode: true});
parser._transform = function(data, encoding, done) {
if(data.time) data.time = new Date(data.time);
this.push(data);
done();
};
var jsonToStrings = JSONStream.stringify(false);
dbstream.pipe(parser).pipe(jsonToStrings).pipe(wstream);

Resources