I'm writing an upload script. If there's an error while writing to disk I want to abort the request and return a 500 status code.
My code (below) sends a 500 as expected, but the upload doesn't stop. I keep getting "data" events until the upload is complete (even though my write pipe is already broken) and then the req.on('end' event fires which tries to send a 204 even though I've already sent a 500.
How can I abort the request so that I stop getting event notifications?
var filename = req.headers['wx-filename'];
var docType = req.headers['wx-doc-type'];
var clientId = req.headers['wx-client-id'];
var dir = Path.join(config.storage_path, docType, clientId);
var filePath = Path.join(dir, filename);
mkdirp.sync(dir);
var destFile = FileSystem.createWriteStream(filePath, {
flags: 'wx' // don't overwrite files
});
if (config.env === 'dev') {
(function () {
var fileSize = req.headers['content-length'];
var uploadedBytes = 0;
req.on('data', function (data) {
uploadedBytes += data.length;
var p = uploadedBytes / fileSize * 100;
var fn = Path.relative(config.storage_path, filePath);
util.log(fn + ': ' + p.toFixed(1) + '%');
});
})();
}
req.on('end', function () {
util.log("END");
resp.writeHead(204);
resp.end();
});
destFile.on('error', function (err) {
util.log("ERROR", err);
resp.writeHead(500, err.code);
resp.end();
// HOW TO STOP REQUEST?
});
req.pipe(destFile);
You need to remove the listeners for data and end, after that send a Connection: close header and at the end send the 500 error.
Related
I need to download ~26k images. The images list and urls are stored in csv file. Im reading the csv file and trying to download the images while looping through the list.
If im using small set ~1-2k it works fine but when i switch to the full set im getting EMFILE error.
Error: EMFILE, open 'S:\images_download\Images\189900008.jpg'
I've noticed that node tries to create all the files at once and this might be the issue but i'm unable to force it to create it one by one. My understanding is the code below should work like this but obviously is not.
(Just to mention that this code is executed on Windows)
Code:
var csv = require("fast-csv");
var fs = require('fs');
var request = require('request');
var async = require('async');
fs.writeFile('errors.txt', '', function(){})
var downloaded = 0;
var totalImages = 0;
var files = [];
csv
.fromPath("Device_Images_List.csv")
.on("data", function(data){
files.push({device: data[0], url: data[1]})
})
.on("end", function(){
totalImages = files.length;
async.each(files, function(file, callback) {
var deviceId = file.device;
var deviceUrl = file.url;
if ( deviceId != 'DEVICE_TYPE_KEY' ) {
try {
writeStream = fs.createWriteStream('./Images/' + deviceId + '.jpg');
proxiedRequest = request.defaults({proxy: "http://proxy:8080"});
proxiedRequest(deviceUrl).pipe(writeStream);
writeStream.on('open', function(fd) {
var rem = proxiedRequest.get(deviceUrl);
rem.on('data', function(chunk) {
writeStream.write(chunk);
});
rem.on('end', function() {
downloaded++;
console.log('Downloaded: ' + deviceId + '; ' + (downloaded + 1) + ' of ' + totalImages);
writeStream.end();
});
});
writeStream.on('close', function(){
callback();
});
} catch (ex) {
fs.appendFile('errors.txt', deviceId + ' failed to download', function (err) {
callback();
});
}
}
}, function(err){
if( err ) {
console.log(err);
} else {
}
});
});
As #slebetman commented the issue can be solved by using async.eachSeries to process the files one by one or async.eachLimit to limit the parallel nodes:
async.eachLimit(files, 5, function(file, callback) {
// ... Process 5 files at the same time
}, function(err){
});
I wrote a simple script to download video files from a CDN, where the direct URLs are simple to generate, for example http://something.com/N.mp4, where N is a number.
The problem is, when downloading files with larger than ~300MB, the files appears perfectly in hard drive, but before the request(...)'s callback, a memory allocation fail happens:
FATAL ERROR: CALL_AND_RETRY_0 Allocation failed - process out of memory
Does this happens because of some serious bad practice? Can request download media files, with this size?
Environment: Win7, 4GB+ free RAM, Node v0.10.31
var request = require('request');
var async = require('async');
var fs = require('fs');
var start = +process.argv[2] || 1;
var end = +process.argv[3] || 50;
var url = 'http://something.com/';
try {
fs.mkdirSync(__dirname + '/videos/');
} catch (e) {}
var index = start;
async.whilst(
function () { return index <= end; },
function (callback) {
var fileName = index + '.mp4';
console.log('Started: ' + fileName);
console.time('Done (' + fileName + ')');
request(url + fileName, function() {
console.timeEnd('Done (' + fileName + ')');
index++;
callback(null);
}).pipe(fs.createWriteStream(__dirname + '/videos/' + fileName));
},
function (err) {
if (err) {
return console.error(err);
}
console.log('Script finished.');
}
);
Example console output:
> node index.js 3
Started: 3.mp4
Done (3.mp4): 296592ms
Started: 4.mp4
Done (4.mp4): 369718ms
Started: 5.mp4
FATAL ERROR: CALL_AND_RETRY_0 Allocation failed - process out of memory
If you use request module with a callback it buffers the whole response body in memory. Try omitting callback and using finish event of fs stream instead.
var writer = fs.createWriteStream(__dirname + '/videos/' + fileName);
writer.on('finish', function() {
// ...
index++;
callback(null);
});
request(url + fileName).pipe(writer);
It looks like you're trying to download videos 3 to 50 all in parallel, so that might be what's causing you to run out of memory. You could try doing them in series and see if that fixes the problem. With async.waterfall your code might look something like this:
var tasks = [];
for (; index < end; index++) {
tasks.push(function(callback) {
var fileName = index + '.mp4';
console.log('Started: ' + fileName);
console.time('Done (' + fileName + ')');
request(url + fileName, function() {
console.timeEnd('Done (' + fileName + ')');
callback(null);
}).pipe(fs.createWriteStream(__dirname + '/videos/' + fileName));
});
}
async.waterfall(tasks, function(err) {
if (err) {
return console.error(err);
}
console.log('Script finished.');
});
I am developing an API using Node.js. In my application when i hit an URL though browser i gto JSON response in my browser perfectly.. But when i get the response through my node.js code, its coming as junk.
Consider i am hitting the following url in browser:
localhost:2000/xxxxx/rrrrr/ggggg
I am receiving perfect output.
The following is the node.js code:
proxyReq.on("response", function(proxyRes) {
var body = ''
try{
proxyRes.on("data", function(chunk) { //Capture API response here---revisit
body += new Buffer(chunk, 'binary').toString();
//console.log("cccc=" +chunk)
/*zlib.unzip(chunk.toString(), function(err, chunk){
console.log("Inside zliib");
if (!err){
console.log('Response'+chunk.toString())
} else {
console.log("Inside zlib error");
}
});*/
//body = chunk.toString();
//console.log(chunk.toString('utf-8'));
console.log('cccccccc=' +body);
});
}catch(err){
console.log("errrr=" +err.stack);
}
}
Here the 'body' is printing as some junked data. I tried for "utf-8" and "binar" nothing works. Help me to solve this. Thanks in advance.
Mu Junked Data:
Ys��ǿJ����}/c�؎}�%�ld� a�T��mIL�Z� ]�T��I�r����ߕ�h4��ϕ�7t{�Q4��8���\�L�N؛T��VWM�r
?W&Q��N&/Q����|�W9??W��t�b�n:t>:��(t��G��K��w��=��r\���_��W�N�c��{���u�ۺU���m�^���z�'�ǫ��LFQ�uc���s�>��f
I didn't install using npm:
Third party code regarding ProxyReq:
var ended, mod, proxyReq, req_options;
if (err) {
return _this.error(err, req, res);
}
mod = req.api.data.protocol === "https" ? https : http;
req_options = _this.getHttpProxyOptions(req);
req_options.agent || (req_options.agent = new mod.Agent({
maxSockets: 100,
rejectUnauthorized: req.api.data.strictSSL
}));
_this.logger.debug(("Backend: " + req_options.method + " to ") + ("'" + req.api.data.protocol + "://") + ("" + req_options.host + ":" + req_options.port + req_options.path));
proxyReq = mod.request(req_options);
proxyReq.setTimeout(req.api.data.endPointTimeout * 1000, function() {
var e;
e = new Error("ETIMEDOUT");
e.code = "ETIMEDOUT";
proxyReq.emit("error", e);
return proxyReq.abort();
});
ended = false;
I found your problem. This is the documentation of event response for http/https
Emitted when a response is received to this request. This event is emitted only once. The response argument will be an instance of http.IncomingMessage.
And http.IncomingMessage is a ReadableStream.
After that, you should use this two events of stream to convert this into String :
Event: 'data'
Event: 'end'
For stream.on('data', ...) you should collect your data data into either a Buffer (if it is binary) or into a string.
For on('end', ...) you should call a callback with you completed buffer, or if you can inline it and use return using a Promises library.
Example, for you, you can change your proxyReq.on("response") callback content by this :
var Buffer = require('buffer').Buffer;
proxyReq.on("response", function(chunk) {
//proxyRes.setEncoding ('utf8');
var body = '';
chunk.on('data', function(data) {
body += new Buffer(data, 'binary').toString();
});
chunk.on('end', function () {
// callback
});
chunk.on('error', function (err) {
// catchable error
});
});
To simplify you request part, I recommand you to use the node-request package : https://github.com/mikeal/request
I am trying to download many (around 2,000) images from a JSON feed using Node, (and specifically the request module). When I try to do this (looping through the JSON) I get
throw er; // Unhandled stream error in pipe.
I checked ulimit -n and it was set at 256, so I increased that to 4,000 and I still get the same error (although after I am downloading a much higher number of images).
I have two questions,
Why am I still getting an error if I raised the maximum download number well in excess of the number of simultaneous downloads I actually have
What is the best way to "queue" or pause the downloads so as not to overwhelm my system? Here is my code.
var fs = require('fs')
, http = require('http')
, request = require('request')
, url = 'http://www.urlOfJsonFeed'
function get(){
http.get(url, function(res) {
var body = '';
res.on('data', function(chunk) {
body += chunk;
});
res.on('end', function() {
jParse(body);
});
}).on('error', function(e) {
console.log("Got error: ", e);
});
}
function jParse(info){
data = JSON.parse(info)
entries = data.entries
numToDownload = entries.length;
for(var i = numToDownload - 1; i >= 0; i -= 1){
link = entries[i]['imgUrl']
download(link, 'images/' + mov + '.mp4', function(){
console.log('Downloaded ' + dMov + ' movies')
dMov++
}
}
}
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
if (err) {
console.log('header error');
}
if (!err && res.statusCode == 200) {
//Download the image
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
}
});
};
get()
I have the following PHP Script on server that will wait 10 seconds and say Hello:
<php sleep(10); ?>Hello
On the client side (node), I have the following:
var http = require('http');
http.get ('http://example.com/sleep.php', function (resp) {
resp.on('data', function (d) {
console.log ('data!', d.toString());
});
resp.on('end', function (d) {
console.log ('Finished!');
});
}).on('error', function (e) {
console.log ('error:', e);
});
The problem is, if the internet connection stopped during the request, it will not trigger error OR end events.
To re-produce the problem:
Place the PHP script somewhere on the Internet
Execute the node script
Disconnect the Internet
The script does nothing
I've also found that if the connection is back within 10 seconds, it can still receive the message.
So, I made a simple interval loop to check the status. However it can't detect if the connection has stopped working or still waiting for response:
var http = require('http');
var lastRespond = 0, intervalCheck;
var respFinished = false;
http.get ('http://jixun.no-ip.org/sleep.php', function (resp) {
resp.on('data', function (d) {
lastRespond = new Date;
console.log ('data!', d.toString());
});
resp.on('end', function (d) {
respFinished = true;
console.log ('Finished!');
});
}).on('error', function (e) {
console.log ('error:', e);
});
intervalCheck = setInterval(function () {
if (respFinished) {
clearInterval(intervalCheck);
} else if (new Date - lastRespond >= 120000) {
console.log ('Timeout :(');
clearInterval(intervalCheck);
}
}, 120000); // 2 mins.
So my question is: Is there any way to check if the socket closed / connection stopped after sending the request?
Thanks in advance.
Using setTimeout on the actual request could solve your problem. Well, it's not an actual event like 'close', 'end' and 'error'. Sample below does reproduce and solves the issue, haven't tried it in another context though.
var http = require('http');
http.get ('http://fake-response.appspot.com?sleep=5', function (resp) {
resp.on('data', function (d) {
console.log ('data!', d.toString());
});
resp.on('end', function (d) {
console.log ('Finished!');
});
}).on('error', function (e) {
console.log ('error:', e);
}).setTimeout(12000, function ( ) {
console.log('Timeout reached...');
process.exit(1);
});
More information can be found in the documentation. Either use that or listening on the 'close' event as well, that works well with the net module.
Maybe this would be usefull for you:
Create a bash script which checks connection (grabbed from https://stackoverflow.com/a/14939373/1779015 with little modifications):
#!/bin/bash
# Test for network conection
for interface in $(ls /sys/class/net/ | grep -v lo);
do
if [[ $(cat /sys/class/net/$interface/carrier 2> /dev/null) = 1 ]]; then OnLine=1; fi
done
if ! [ $OnLine ]; then echo "0";
else
echo "1";
fi
Then call it from node script and read stdout, for example with child_process (http://nodejs.org/api/child_process.html):
var spawn = require('child_process').spawn,
hasInet = spawn('./test-inet.sh');
hasInet.stdout.pipe(process.stdout);
Here is my solution ... instead of using set interval I check if on "data" event keeps firing until END event.
var http = require('http');
var url = require('url');
var options = {};
var parsed_url = url.parse(url, true);
var req_options = {
path: parsed_url.pathname,
host: parsed_url.hostname
};
var file = fs.createWriteStream(filename,options);
try{
const check_data_timeout = 10000;
var check_data_timer = 0;
var current_size = 0;
var request = http.get(req_options, function(response) {
len = parseInt(response.headers['content-length'], 10);
response.on("data", function(chunk) {
current_size += chunk.length;
percent = (100.0 * current_size / len).toFixed(2);
console.log("Download percent : "+percent+"%");
clearTimeout(check_data_timer);
check_data_timer = setTimeout(function(){
console.log("UPS !! No new data ... connection must be stalled");
},check_data_timeout);
});
response.on("end", function() {
console.log("Response ENDED !!!");
clearTimeout(check_data_timer);
});
response.pipe(file);
check_data_timer = setTimeout(function(){
console.log("UPS !! No new data ... connection must be stalled");
},check_data_timeout);
}).once('error', function(error) {
console.log("Response ERROR !!!");
});
}catch(e){
console.log(e);
}
Hope it helps ...