how to avoid nodejs Parse Error when http.get xlsx files? - node.js

I am downloading xlsx file
var options = urlObj;
options.method = 'GET';
options.headers = {'Content-Type':'text/plain'};
function reqCallback(res) {
// default: 'utf8'
res.setEncoding('utf8'); //utf16le'); //binary'); //hex'); //utf8
res.pipe (
simplexlsx( function (err, table) {
if (err) {console.log('simple-xlsx: '+err)};
console.log('simplexlsx for '+ table);
})
)
}
var req = http.request(options);
req.on('response', reqCallback);
req.end();
from:
server: 'Microsoft-IIS/7.5'
'x-aspnet-version': '4.0.30319',
'x-powered-by': 'ASP.NET'
which I don't own also in the header:
'content-type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
and getting:
events.js:72
throw er; // Unhandled 'error' event
^
Error: Parse Error
at Socket.socketOnData (http.js:1588:20)
at TCP.onread (net.js:528:27)
I pipe the response to simple-xlsx parser to json, which works fine when the error is handled.
When I set: setEncoding('hex')
simple-xlsx: Error: invalid signature: 0x62343035
or setEncoding('binary')
simple-xlsx: Error: invalid distance too far back
and setEncoding('utf16le') or setEncoding('utf8') getting the parse error.
How to work around the parse error?

Related

Unable to gracefully close http2 client stream in nodejs

I have the following code which has been heavily inspired from nodejs official documentation of a client-side example
import http2 from 'http2';
// The `http2.connect` method creates a new session with example.com
const session = http2.connect('https://somedomain.com');
// Handle errors
session.on('error', (err) => console.error(err))
const req = session.request({
':authority': 'somedomain.com',
':path': '/some-path',
':method': 'GET',
':scheme': 'https',
'accept': 'text/html',
});
// To fetch the response body, we set the encoding
// we want and initialize an empty data string
req.setEncoding('utf8')
let data = ''
// append response data to the data string every time
// we receive new data chunks in the response
req.on('data', (chunk) => { data += chunk })
// Once the response is finished, log the entire data
// that we received
req.on('end', () => {
console.log(`\n${data}`)
session.close();
});
req.on('error', (error) => {
console.log(error);
});
req.end();
Please note that the actual hostname has been replaced with somedomain.com. Running this, results in data getting logged, as expected, however, the process doesn't shut down gracefully. I get the following unhandled error in the terminal.
node:events:504
throw er; // Unhandled 'error' event
^
Error [ERR_HTTP2_STREAM_ERROR]: Stream closed with error code NGHTTP2_FLOW_CONTROL_ERROR
at new NodeError (node:internal/errors:371:5)
at ClientHttp2Stream._destroy (node:internal/http2/core:2330:13)
at _destroy (node:internal/streams/destroy:102:25)
at ClientHttp2Stream.destroy (node:internal/streams/destroy:64:5)
at Http2Stream.onStreamClose (node:internal/http2/core:544:12)
Emitted 'error' event on ClientHttp2Stream instance at:
at emitErrorNT (node:internal/streams/destroy:157:8)
at emitErrorCloseNT (node:internal/streams/destroy:122:3)
at processTicksAndRejections (node:internal/process/task_queues:83:21) {
code: 'ERR_HTTP2_STREAM_ERROR'
}
I understand it is possible that the server is behaving incorrectly. However, there should be a way on the nodejs client to close the session gracefully. Regardless, what would be the ideal way to handle such errors? I've already tried listening to session.on('error') and req.on('error') but that doesn't work.

Get HTML page by URL

Here is my code:
utilitesRouter.route('/url')
.post(function(request, response) {
console.log(request.body.uri);
var urlOpts = { host: request.body.uri, path: '/', port: '80', method: 'GET' };
var re = /(<\s*title[^>]*>(.+?)<\s*\/\s*title)>/gi;
http.get(urlOpts, function (response) {
response.on('data', function (chunk) {
var str=chunk.toString();
console.log(str);
var match = re.exec(str);
if (match && match[2]) {
console.log(match[2]);
}
});
});
response.json({ url: request.body.uri });
});
If I use POST request with this JSON {"uri":"google.ru" } I get:
302 Moved
google.ru
<HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">
<TITLE>301 Moved</TITLE></HEAD><BODY>
<H1>301 Moved</H1>
The document has moved
here.
</BODY></HTML>
If I use POST requiet with JSON {"uri":"http://google.ru" } I get the error message:
events.js:85
throw er; // Unhandled 'error' event
^
Error: getaddrinfo ENOTFOUND http://google.ru
at errnoException (dns.js:44:10)
at GetAddrInfoReqWrap.onlookup [as oncomplete] (dns.js:94:26)
I can open http://google.ru in my browser.
How can I get the HTML using node.js ?
You may want to request to do that. It just pretty easy.
var request = require("request");
router.get('/proxy', function(req, res, next){
request.get( req.body.uri, function(error, response, body){
if( error )
return next(error);
res.send(body);
});
});
request also support streaming and other cool features too.
You get the error because in your urlOpts the attribute host has to be a domain name, like google.ru or www.google.ru. As you are putting a URL into it, it can't be resolved to an IP via DNS, that's why you get the error at GetAddrInfoReqWrap.onlookup [as oncomplete] (dns.js:94:26).
If you want to use http.get() like the way you do, you would always have to extract the domain part out of your passed uri, i.e. getting google.ru out of http://google.ru to use it as host.

Not save image in PDF getting error in node/expressJs

I have expressJs application i am going to save image in PDF file Here is my code to save image in pdf file
exports.getPDF = function (req, res) {
doc = new PDF();
doc.pipe(fs.createWriteStream('./public/Competition.pdf'));
db.competitions.find({where: {id: req.params.competitionId}}).success(function(competition){
//MY_URL_IS = http://d16dgegkincj5i.cloudfront.net/1_2_brandLogo_new_update_code-thumbnail.png
var options = {
host: 'http://d16dgegkincj5i.cloudfront.net',
port: 80,
path: '/1_2_brandLogo_new_update_code-thumbnail.png'
}
var request = http.get(options, function(res){
var imagedata = ''
res.setEncoding('binary')
res.on('data', function(chunk){
imagedata += chunk
})
res.on('end', function(){
fs.writeFile('./public/Competition.pdf', imagedata, 'binary', function(err){
if (err) throw err
console.log('File saved.')
})
})
})
}).error(function(error){
});
};
I am getting error
events.js:85
throw er; // Unhandled 'error' event
^
Error: getaddrinfo ENOTFOUND
when i get image by http.get and after write to pdf file it will give error and not writing image
The error:
Error: getaddrinfo ENOTFOUND
means that the url was not found.
Just removing the http:// from your host should do the job, like:
var options = {
host: 'd16dgegkincj5i.cloudfront.net',
port: 80,
path: '/1_2_brandLogo_new_update_code-thumbnail.png'
}

How to catch getaddrinfo ENOTFOUND

I have a list of links that I need to check before processing some data. Checking headers with http.get returns error:
events.js:72
throw er; // Unhandled 'error' event
^
Error: getaddrinfo ENOTFOUND
at errnoException (dns.js:37:11)
I cannot handle this error, and exits the process. I tried res.on("error") and try..catch on http.get but nothing works.
Below is the code snippet, and here is live example at runnable.com
//This is OK
getHeaders('http://google.com/404pag-that-does-not-exit');
//Here is the error.
//Uncoughtable error!
getHeaders('http://doesnotexistooooo.com');
function getHeaders(link){
var _http = require("http");
var myUrl = require("url");
var qs=(myUrl.parse(link).search==null) ? "" : myUrl.parse(link).search ;
var path=myUrl.parse(link).pathname;
var options = {
hostname: myUrl.parse(link).hostname,
path: path+qs,
method: 'HEAD'
};
_http.get(options, function(res) {
res.on('error',function(e){
console.log("Error: " + myUrl.parse(link).hostname + "\n" + e.message);
console.log( e.stack );
});
console.log('STATUS: ' + res.statusCode);
console.log('HEADERS: ' + JSON.stringify(res.headers));
});
}
You just need to handle the error event, as stated in the error message. According to the documentation:
If any error is encountered during the request (be that with DNS resolution, TCP level errors, or actual HTTP parse errors) an 'error' event is emitted on the returned request object.
Here is a usage example:
var getRequest = _http.get(options, function(res) {
// …
});
getRequest.on('error', function (err) {
console.log(err);
});
which yields:
$ node test.js
{ [Error: getaddrinfo ENOTFOUND] code: 'ENOTFOUND', errno: 'ENOTFOUND', syscall: 'getaddrinfo' }
At the very top level, you can do
process.on('uncaughtException', function(err) {
console.log('### BIG ONE (%s)', err);
});
if you using request npm
request
.get('http://example.com/doodle.png')
.on('response', function(response) {
console.log(response.statusCode) // 200
console.log(response.headers['content-type']) // 'image/png'
})
.on('error', function(err) { // <------- add this
console.log(err)
});

Getting ETIMEDOUT error when I try to do a simple Get request?

Hi I am trying to call a simple web API which returns a string as response. I want to use node for this. Since I am new to node so I tried reffering to many blog post and got a code snippet which I used but I am getting same error for all urls whether its google.com or anything else.
My Node code is as follows
var http = require('http');
//The url we want is: 'www.random.org/integers/?num=1&min=1&max=10&col=1&base=10&format=plain&rnd=new'
var options = {
host: 'www.random.org',
path: '/integers/?num=1&min=1&max=10&col=1&base=10&format=plain&rnd=new'
};
callback = function(response) {
var str = '';
//another chunk of data has been recieved, so append it to `str`
response.on('data', function (chunk) {
str += chunk;
});
//the whole response has been recieved, so we just print it out here
response.on('end', function () {
console.log(str);
});
}
http.request(options, callback).end();
Error:
F:\nodejs>node ..\NodeLearning\TestServer1\test.js
events.js:72
throw er; // Unhandled 'error' event
^
Error: connect ETIMEDOUT
at errnoException (net.js:901:11)
at Object.afterConnect [as oncomplete] (net.js:892:19)
Can Any one tell me what has gone wrong here?
Can you try one more time by setting a proxy like mentioned below
var options = {
host: 'www.random.org',
path: '/integers/?num=1&min=1&max=10&col=1&base=10&format=plain&rnd=new',
proxy:'add your proxy setting'
};

Resources