I am trying to implement the ._read function of a readable stream, a problem happens when ._read is called and there isn't data, the documentation says that I can push('') until more data comes, and I should only return false when the stream will never have more data.
https://nodejs.org/api/stream.html#stream_readable_read_size_1
But it also says that if I need to do that then something is terribly wrong with my design.
https://nodejs.org/api/stream.html#stream_stream_push
But I can't find an alternative to that.
code:
var http = require('http');
var https = require('https');
var Readable = require('stream').Readable;
var router = require('express').Router();
var buffer = [];
router.post('/', function(clientRequest, clientResponse) {
var delayedMSStream = new Readable;
delayedMSStream._read = function() {
var a=buffer.shift();
if(typeof a === 'undefined'){
this.push('');
return true;
}
else {
this.push(a);
if(a===null) {
return false;
}
return true;
}
};
//I need to get a url from example.com
https.request({hostname:'example.com'}, function(exampleResponse){
data='';
exampleResponse.on('data',function(chunk){data+=chunk});
exampleResponse.on('end',function(){
var MSRequestOptions = {hostname: data, method: 'POST'};
var MSRequest = https.request(MSRequestOptions, function(MSResponse){
MSResponse.on('end', function () {
console.log("MSResponse.on(end)");//>>>
});//end MSResponse.on(end)
}); //end MSRequest
delayedMSStream.pipe(MSRequest);
});
});
clientRequest.on('data', function (chunk) {
buffer.push(chunk);
});
clientRequest.on('end', function () {//when done streaming audio
buffer.push(null);
});
});//end router.post('/')
explanation:
client sends a POST request streaming audio to my server, my server requests a url from example.com, when example.com responds with the url, my server streams the audio to it.
What's a smarter way to do it?
So if I undertstand the code correctly, you:
receive a request,
make your own request to a remote endpoint and fetch a URL
make a new request to that URL and pipe that to original response.
There are ways to do this other then yours, and even your way would look cleaner to me if you just improve the naming a bit. Also, splitting the huge request into a few functions with smaller responsibility scopes might help.
I would make the endpoint this way:
let http = require('http');
let https = require('https');
let Readable = require('stream').Readable;
let router = require('express').Router();
let buffer = [];
/**
* Gets some data from a remote host. Calls back when done.
* We cannot pipe this directly into your stream chain as we need the complete data to get the end result.
*/
function getHostname(cb) {
https.request({
hostname: 'example.com'
}, function(response) {
let data = '';
response.on('error', err => cb(err)); // shortened for brewity
response.on('data', function(chunk) {
data = data + chunk;
});
response.on('end', function() {
// we're done here.
cb(null, data.toString());
});
});
}
router.post('/', function(request, response) {
// first let's get that url.
getHostname(function(err, hostname) {
if (err) { return response.status(500).end(); }
// now make that other request which we can stream.
https.request({
hostname: hostname,
method: 'POST'
}, function(dataStream) {
dataStream.pipe(response);
});
});
});
Now, as said in the comments, with streams2, you don't have to manage your streams. With node versions pre 0.10 you have had to listen to 'read', 'data' etc events, with newer node versions, it's handled. Furthermore, you don't even need it here, streams are smart enough to handle backpressure on their own.
Related
I want to get the html of this page for parsing(click the link to understand what content i want to get).
750-bond list
Here's my code to request this page content
var https = require("https");
var fs = require("fs");
var options = {
hostname: "www.prizebond.net",
port: 443,
path: "/dlist.php?num=455",
method: "GET"
};
var response = "";
var req = https.request(options, function (res) {
res.setEncoding("UTF-8");
console.log(res.statusCode);
res.on("data", function (chunk) {
response += chunk;
});
res.on("end", function () {
fs.writeFile("750-bond.html", response, function (err) {
if (err) {
console.log(err.message);
}
console.log("File downloaded");
});
console.log("end");
});
});
req.end();
Now the problem is that in my 750-bont.html file, I am getting the weird the
result of "Checking your browser before accessing the prizebond.net" not the
original content. Here's the screenshot what I got when I open the 750-
bond.html file in browser.
What I am doing wrong? And how can I get the original content of this webpage?
You can't, unless you write something more sophisticated, but you probably shouldn't.
The purpose of Cloudflare-protection is to prevent what you are trying to realize unfortunately.
You could look into a possibility to access whatever you want to access by a public API or something that prizebond.net provides for example.
I'm trying to POST a raw body with restify. I have the receive side correct, when using POSTman I can send a raw zip file, and the file is correctly created on the server's file system. However, I'm struggling to write my test in mocha. Here is the code I have, any help would be greatly appreciated.
I've tried this approach.
const should = require('should');
const restify = require('restify');
const fs = require('fs');
const port = 8080;
const url = 'http://localhost:' + port;
const client = restify.createJsonClient({
url: url,
version: '~1.0'
});
const testPath = 'test/assets/test.zip';
fs.existsSync(testPath).should.equal(true);
const readStream = fs.createReadStream(testPath);
client.post('/v1/deploy', readStream, function(err, req, res, data) {
if (err) {
throw new Error(err);
}
should(res).not.null();
should(res.statusCode).not.null();
should(res.statusCode).not.undefined();
res.statusCode.should.equal(200);
should(data).not.null();
should(data.endpoint).not.undefined();
data.endpoint.should.equal('http://endpointyouhit:8080');
done();
});
Yet the file size on the file system is always 0. I'm not using my readStream correctly, but I'm not sure how to correct it. Any help would be greatly appreciated.
Note that I want to stream the file, not load it in memory on transmit and receive, the file can potentially be too large for an in memory operation.
Thanks,
Todd
One thing is that you would need to specify a content-type of multi-part/form-data. However, it looks like restify doesn't support that content type, so you're probably out of luck using the restify client to post a file.
To answer my own question, it doesn't appear to be possible to do this with the restify client. I also tried the request module, which claims to have this capability. However, when using their streaming examples, I always had a file size of 0 on the server. Below is a functional mocha integration test.
const testPath = 'test/assets/test.zip';
fs.existsSync(testPath).should.equal(true);
const readStream = fs.createReadStream(testPath);
var options = {
host: 'localhost'
, port: port
, path: '/v1/deploy/testvalue'
, method: 'PUT'
};
var req = http.request(options, function (res) {
//this feels a bit backwards, but these are evaluated AFTER the read stream has closed
var buffer = '';
//pipe body to a buffer
res.on('data', function(data){
buffer+= data;
});
res.on('end', function () {
should(res).not.null();
should(res.statusCode).not.null();
should(res.statusCode).not.undefined();
res.statusCode.should.equal(200);
const json = JSON.parse(buffer);
should(json).not.null();
should(json.endpoint).not.undefined();
json.endpoint.should.equal('http://endpointyouhit:8080');
done();
});
});
req.on('error', function (err) {
if (err) {
throw new Error(err);
}
});
//pipe the readstream into the request
readStream.pipe(req);
/**
* Close the request on the close of the read stream
*/
readStream.on('close', function () {
req.end();
console.log('I finished.');
});
//note that if we end up with larger files, we may want to support the continue, much as S3 does
//https://nodejs.org/api/http.html#http_event_continue
Here is the thing :
I have a client which sends data to a server. This server has to contact an external A.P.I. and send back its response to the client. I just can't figure out how and where I can contact the external A.P.I once the server has got the client data.
I route client data like this :
app.post('/getAutoComplete', routes.read);
routes.read retrieves the data within req.body. With my nodejs version (without express framework), I then request the api this way :
var http = require('http'), options = {
host : "192.168.1.38",
port : 8080,
path : "/myURL",
method : 'POST'
};
var webservice_data = "";
var webservice_request = http.request(options, function(webservice_response)
{
webservice_response.on('error', function(e){ console.log(e.message); });
webservice_response.on('data', function(chunk){ webservice_data += chunk;});
webservice_response.on('end', function(){res.send(webservice_data);});
});
webservice_request.write(req.body);
webservice_request.end();
The problem is that i'd like to use native expressJS method like app.post but I don't know how because :
Express (app) object is not available here (declared in app.js but not in the route file)
I don't know how to send POST data with app.post
Any suggestion ?
app.post('/getAutoComplete', routes.read);
// assuming routes.read lookes something like this
routes.read = function read(req, res) {
var http = require('http'), options = {
host : "192.168.1.38",
port : 8080,
path : "/myURL",
method : 'POST'
};
var webservice_data = "";
var webservice_request = http.request(options, function(webservice_response)
{
webservice_response.on('error', function(e){ console.log(e.message); });
webservice_response.on('data', function(chunk){ webservice_data += chunk;});
webservice_response.on('end', function(){res.send(webservice_data);});
});
webservice_request.write(req.body);
webservice_request.end();
};
Also check out https://github.com/mikeal/request It's the de-facto module for doing web requests in node.
routes.read is a function. You can call it with extra parameters, so for example
app.post('/getAutoComplete', function(req,res) {
var q = req.query.q; // or whatever data you need
routes.read(q, function(err, response) {
if (err) throw err;
return res.json(response);
});
});
Now make the routes.read function use the first parameter as the query and when it's gathered the response from the remote API, call the second parameter with any error as the first parameter and the response as the second one.
Update This answer has already been picked as an answer, but it'd be more helpful if I showed an example of routes.read, too:
routes.read = function(q, cb) {
// pretend we calculate the result
var result = q * 10;
if (result > 100) {
// call the callback with error set
return cb("q value too high");
}
// all is well, use setTimeout to demonstrate
// an asynchronous return
setTimeout(function() { cb(null, result) }, 2000);
};
app.get('/', function(req, res){
var options = {
host: 'www.google.com'
};
http.get(options, function(http_res) {
http_res.on('data', function (chunk) {
res.send('BODY: ' + chunk);
});
res.end("");
});
});
I am trying to download google.com homepage, and reprint it, but I get an "Can't use mutable header APIs after sent." error
Anyone know why? or how to make http call?
Check out the example here on the node.js doc.
The method http.get is a convenience method, it handles a lot of basic stuff for a GET request, which usually has no body to it. Below is a sample of how to make a simple HTTP GET request.
var http = require("http");
var options = {
host: 'www.google.com'
};
http.get(options, function (http_res) {
// initialize the container for our data
var data = "";
// this event fires many times, each time collecting another piece of the response
http_res.on("data", function (chunk) {
// append this chunk to our growing `data` var
data += chunk;
});
// this event fires *one* time, after all the `data` events/chunks have been gathered
http_res.on("end", function () {
// you can use res.send instead of console.log to output via express
console.log(data);
});
});
I have a node.js script that continuously requests a page, sort of like a cron job.
However, after a few minutes Node starts to use a lot of CPU (up to 70%) and memory (up to 200mb).
What is wrong with my script?
function cron(path)
{
var http = require('http');
var site = http.createClient(443, 'www.website.com', true);
var request = site.request('GET', path, {'host': 'www.website.com'});
request.end();
request.on('response', function (response) {
setTimeout(function(){cron(path)},15000);
});
}
cron('/path/to/page');
request.on('response', function (response) {
setTimeout(function(){cron(path)},15000);
});
For every response you create a new cron job. Log your responses. If your getting more then 1 from your request then your exponantially creating more cron jobs.
Your creating a function() {} with a reference to path. So the entire scope state is kept. you want to free memory by adding this:
var site = null;
var request = null;
Your calling require("http") inside a function rather then outside in module scope. You only need to get http once so place at the top of your file in module scope.
var http = require('http');
var site = http.createClient(443, 'www.website.com', true);
function cron(path)
{
var request = site.request('GET', path, {'host': 'www.website.com'});
request.end();
var once = true;
request.on('response', doIt);
function doIt(response) {
if (!once) {
once = null;
doIt = function() {};
setTimeout(function(){cron(path)},15000);
}
});
site = null;
request = null;
}
cron('/path/to/page');
In addition to the tips from #Raynos, here's another. I find that recursive calls like this in long running processes make me a bit nervous so I'd err on the side of using setInterval instead. I'd maybe split the cron and the http behaviour apart in case you want to try and re-use that logic, although that'll depend on your context:
e.g. in node 0.4.7:
var https = require('https');
function poll(path)
{
https.get({
host: 'www.website.com',
port: 443,
path: path
}, function(res) {
console.log("Got response: " + res.statusCode);
}).on('error', function(e) {
console.log("Got error: " + e.message);
});
}
function cron(path)
{
return setInterval(function(){
poll(path);
},15000);
}
var intervalId = cron('/path/to/page'); // keep in case you need to use clearInterval