How to include output from ExpressJS res.render in a ZIP file? - node.js

I have a built a method in ExpressJS that exports a document as an HTML page:
html: function (req, res) {
Project.findOne( { req.params.project },
function (err, project) {
res.contentType('text/html');
res.render('exporting/html', { project.name });
}
);
},
Additionally, I'd like to create a method that includes that generated HTML page, together with some static assets, in a ZIP archive.
Here's my current code:
zip: function (req, res) {
Project.findOne( { req.params.project },
function (err, project) {
res.contentType('application/zip');
res.setHeader('content-disposition', 'attachment; filename=' + project.name + '.zip');
var zip = new AdmZip();
zip.addFile("readme.txt", new Buffer("This was inside the ZIP!"));
//------ Here I'd like to use zip.addFile to include the HTML output from the html method above ------
res.send(zip.toBuffer());
}
);
}
How can I make the zip method include the output from the html method?

You have two options: one is relatively simple and the other is a bit more complicated. You'll have to decide which you believe is which. ;)
First method
Since you are relying on express' Response.render to create your HTML from a view, you'll need to call that route on your server to retrieve the content of the page so you can include it in your zip response.
Assuming you have var http=require('http'); somewhere in this file, you can:
zip: function (req, res) {
var projectId=req.params.project||'';
if(!projectId){ // make sure we have what we need!
return res.status(404).send('requires a projectId');
}
// Ok, we start by requesting our project...
Project.findOne({id:projectId},function(err, project) {
if(err) { // ALWAYS handle errors!
return res.status(500).send(err);
}
if('object'!==typeof project){ // did we get what we expected?
return res.status(404).send('failed to find project for id: '+projectId);
}
var projectName=project.name || ''; // Make no assumptions!
if(!projectName){
return res.status(500).send('whoops! project has no name!');
}
// For clarity, let's write a function to create our
// zip archive which will take:
// 1. a name for the zip file as it is sent to the requester
// 2. an array of {name:'foo.txt',content:''} objs, and
// 3. a callback which will send the result back to our
// original requester.
var createZipArchive=function(name, files, cb){
// create our zip...
var zip=new AdmZip();
// add the files to the zip
if(Array.isArray(files)){
files.forEach(function(file){
zip.addFile(file.name,new Buffer(file.content));
});
}
// pass the filename and the new zip to the callback
return cb(name, zip);
};
// And the callback that will send our response...
//
// Note that `res` as used here is the original response
// object that was handed to our `zip` route handler.
var sendResult=function(name, zip){
res.contentType('application/zip');
res.setHeader('content-disposition','attachment; filename=' + name);
return res.send(zip.toBuffer());
};
// Ok, before we formulate our response, we'll need to get the
// html content from ourselves which we can do by making
// a get request with the proper url.
//
// Assuming this server is running on localhost:80, we can
// use this url. If this is not the case, modify it as needed.
var url='http://localhost:80/html';
var httpGetRequest = http.get(url,function(getRes){
var body=''; // we'll build up the result from our request here.
// The 'data' event is fired each time the "remote" server
// returns a part of its response. Remember that these data
// can come in multiple chunks, and you do not know how many,
// so let's collect them all into our body var.
getRes.on('data',function(chunk){
body+=chunk.toString(); // make sure it's not a Buffer!
});
// The 'end' event will be fired when there are no more data
// to be read from the response so it's here we can respond
// to our original request.
getRes.on('end',function(){
var filename=projectName+'.zip',
files=[
{
name:'readme.txt',
content:'This was inside the ZIP!'
},{
name:'result.html',
content:body
}
];
// Finally, call our zip creator passing our result sender...
//
// Note that we could have both built the zip and sent the
// result here, but using the handlers we defined above
// makes the code a little cleaner and easier to understand.
//
// It might have been nicer to create handlers for all the
// functions herein defined in-line...
return createZipArchive(filename,files,sendResult);
});
}).on('error',function(err){
// This handler will be called if the http.get request fails
// in which case, we simply respond with a server error.
return res.status(500).send('could not retrieve html: '+err);
});
);
}
This is really the best way to solve your problem, even though it might seem complex. Some of the complexity can be reduced by using a better
HTTP client library like superagent which reduce all the event handling rig-a-ma-roll to a simple:
var request = require('superagent');
request.get(url, function(err, res){
...
var zip=new AdmZip();
zip.addFile('filename',new Buffer(res.text));
...
});
Second method
The second method utilizes the render() method of express' app object, which is exactly what res.render() uses to convert views into HTML.
See Express app.render() for how this function operates.
Note that this solution is the same except for the portion annotated starting at // - NEW CODE HERE -.
zip: function (req, res) {
var projectId=req.params.project||'';
if(!projectId){ // make sure we have what we need!
return res.status(404).send('requires a projectId');
}
// Ok, we start by requesting our project...
Project.findOne({id:projectId},function(err, project) {
if(err) { // ALWAYS handle errors!
return res.status(500).send(err);
}
if('object'!==typeof project){ // did we get what we expected?
return res.status(404).send('failed to find project for id: '+projectId);
}
var projectName=project.name || ''; // Make no assumptions!
if(!projectName){
return res.status(500).send('whoops! project has no name!');
}
// For clarity, let's write a function to create our
// zip archive which will take:
// 1. a name for the zip file as it is sent to the requester
// 2. an array of {name:'foo.txt',content:''} objs, and
// 3. a callback which will send the result back to our
// original requester.
var createZipArchive=function(name, files, cb){
// create our zip...
var zip=new AdmZip();
// add the files to the zip
if(Array.isArray(files)){
files.forEach(function(file){
zip.addFile(file.name,new Buffer(file.content));
});
}
// pass the filename and the new zip to the callback
return cb(name, zip);
};
// And the callback that will send our response...
//
// Note that `res` as used here is the original response
// object that was handed to our `zip` route handler.
var sendResult=function(name, zip){
res.contentType('application/zip');
res.setHeader('content-disposition','attachment; filename=' + name);
return res.send(zip.toBuffer());
};
// - NEW CODE HERE -
// Render our view, build our zip and send our response...
app.render('exporting/html', { name:projectName }, function(err,html){
if(err){
return res.status(500).send('failed to render view: '+err);
}
var filename=projectName+'.zip',
files=[
{
name:'readme.txt',
content:'This was inside the ZIP!'
},{
name:'result.html',
content:html
}
];
// Finally, call our zip creator passing our result sender...
//
// Note that we could have both built the zip and sent the
// result here, but using the handlers we defined above
// makes the code a little cleaner and easier to understand.
//
// It might have been nicer to create handlers for all the
// functions herein defined in-line...
return createZipArchive(filename,files,sendResult);
});
}
While this method is somewhat shorter, by utilizing the underlying mechanism that Express uses to render views, it "couples" your zip route to the Express engine in such a way that, should the Express API change in the future, you'll need to make two changes to your server code (to properly handle the html route and the zip routes), rather than only one using the previous solution.
Personally, I favor the first solution as it is cleaner (in my mind) and more independent of unexpected change. But as they say YMMV ;).

Related

Creating promises in loop

I have to create promises in loop according to given config file and return response when all are resolved. Here goes the code-
{for(let type in spotlight){
switch (type){
case "outliers":{
let ops= spotlight[type];
for(let i=0;i<ops.length;i++){
(function(op){
let p= new Promise(function(resolve,reject){
let reqUrl= urlCreator(op.uri,op.query);
//console.log("--------------------"+reqUrl);
apiService.get(reqUrl,function(isSuccess,data){
if(!isSuccess){
return reject(data);
}
// console.log(isSuccess);
// console.log(data);
// console.log("trend is ------"+JSON.stringify(op));
// create objects array
// let temp= [];
// let overallScore= data.overall.score;
// for(let day in overallScore){
// temp.push({"key": day,"value": parseFloat(overallScore[day])});
// }
//let outliers= stats.outliers(temp,"key","value");
resolve({"type":type,"name": op.name,"data": outliers});
})
});
promiseArray.push(p);
}(ops[i]))
}
break;
}
case "filters":{
let ops= spotlight[type];
for(let i=0;i<ops.length;i++){
(function(op){
let p= new Promise(function(resolve,reject){
let reqUrl= urlCreator(op.uri,op.query);
apiService.get(reqUrl,function(isSuccess,data){
if(!isSuccess){
return reject(data);
}
// console.log(isSuccess);
// console.log(data);
// console.log("coc is ------"+JSON.stringify(op));
resolve({"type": type,"name": op.name,"data": data});
})
})
promiseArray.push(p);
}(ops[i]))
}
break;
}
}
}
Promise.all(promiseArray).then(values=>{
return res.json(values);
},
reason=>{
return res.json(reason);
}).catch(reason=>{
return res.json(reason);
})}
Problem is that promises never return, neither resolved, nor rejected. According to the config file, it has to hit two URLs, say u1 and u2. I tried to log the output to see which requests are returning. When the server is started and very first req is made, U1 returns and req hangs. on refresh I get response from U2,U2 and request hangs, then on refresh again U1,U1 and this continues. It seems to me that for some reason only one request is returned and other sits in buffer or something and come when next request is made. Both requests are being made to the local server only, I am routing it externally just to make use of cache as url is being used as key for cache.
I tried using dummy urls like facebook.com and google.com, and it works perfectly fine.Using one local url and another like facebook.com also works, but when both urls are of local server, it gets stuck.
Does it has any thing to do with single threaded nature of node or due to using same socket for making both requests.
PS- I am using npm-request to make URL calls.
Perhaps hesitating before making the second request would solve your problem.
I've made some tools that could help with that. See the MacroQTools.js file at
https://github.com/J-Adrian-Zimmer/JavascriptPromisesClarified.git
You're defining the request callback as function(success , data), while request consumes error-first callbacks, defined like function(error , response).
You're calling request like:
apiService.get(reqUrl,function(isSuccess,data){
if(!isSuccess){
return reject(data);
}
// console.log(isSuccess);
// console.log(data);
// console.log("coc is ------"+JSON.stringify(op));
resolve({"type": type,"name": op.name,"data": data});
});
Pretending that, if the first parameter misses, you have to reject it with the second parameter, data. While, really, it would something like:
apiService.get(reqUrl,function(err,data){
if(err){
reject(err);
}
else{
// console.log(isSuccess);
// console.log(data);
// console.log("coc is ------"+JSON.stringify(op));
resolve({"type": type,"name": op.name,"data": data});
}
});
Since request expects error-first callbacks (like almost anything in node that takes a callback).
So, when the requests actually work as expected, your code must be actually rejecting the promises with the actual real value, since when the request works, isSuccess is null and data has the real response value.
This surely is breaking something and is not good, while just fixing it maybe doesn't solve your issue completely: I believe your requests are acting weird because some configuration problem of your api, not just because you're rejecting promises when requests are successful (that would just send the data as the rejection reason).
Also you're handling the rejection of Promise.all() twice, passing a second handler to then and calling catch again. Only one is needed, and the .catch(handler) is probably better.
I made a small working example on how you can use Promise.all to collect async requests. I used imdb as the apiService, but any async http service would work too. I didn't reproduce totally from your code, but I'm sure you can adapt this to make your code work, at least the part of the code that is just consuming http services.
var express = require('express');
var app = express();
var Promise = require('bluebird');
var imdb = require('imdb-api');
app.get('/', controllerHandler );
app.listen(3000, function () {
console.log('Example app listening on port 3000!')
});
var apiService = {}
apiService.get = imdb.getReq;
function controllerHandler(request , response){
//like iterating through spotlight.type and returning an array of promises from it.
//in this case the array is from films and Airbag is obviously the best of them
var promises = [{name : 'The Matrix'} , { name : 'Avatar'} , {name : 'Airbag'}].map( createPromise );
//use either .catch(errorHandler) or then( successHandler , errorHandler ). The former is the better:
Promise.all(promises).then( successHandler ).catch( errorHandler );
function successHandler(result){
return response.json(result);
}
function errorHandler(reason){
console.log('There was an error calling to the service:');
console.log(reason);
return response.send('there was an error');
}
}
function createPromise(film){
return new Promise( function(resolve , reject){
apiService.get(film , function(err , data){
if(err)
reject( new Error(err));
else
resolve( {title : data.title , year : data.year} );
});
});
};

Write file and usage nodejs express

For a school project I'm creating a portal for KVM using NodeJS and Express.
I need to adjust an XML file and then use that XML File to create an VM.
So i created 2 functions
CreateXML:
function createXML(req, res, next) {
var parses = new xml2js.Parser();
fs.readFile('Debian7.xml', function(err, data){
parser.parseString(data, function (err, result){
result.domain.name = req.body.name;
result.domain.memory[0]['$'].unit = "GB";
result.domain.memory[0]['_'] = req.body.ram;
result.domain.currentMemory[0]['$'].unit = "GB";
result.domain.currentMemory[0]['_'] = req.body.ram;
result.domain.vcpu = req.body.cpus;
var builder = new xml2js.Builder({headless: true});
var xml = builder.buildObject(result);
fs.writeFile('./xmlfiles/' + req.body.name + '.xml', xml, function(err, data){
if(err) console.log(err);
});
});
});
};
CreateDomain:
function createDomain(req, res){
var domainXML = fs.readFileSync('./xmlfiles/' + req.body.name + '.xml', 'utf8');
hypervisor.connect(function(){
hypervisor.createDomainAsync(domainXML).then(function (domain){
console.log('Domain Created');
res.json({success: true, msg: 'succesfully created domain'})
});
});
}
then I call these functions as middleware in my post request
apiRoutes.post('/domainCreate', createXML, createDomain);
But then when I use Postman on the api route I get the following error:
Error: ENOENT: no such file or directory, open './xmlfiles/rickyderouter23.xml'
After the error it still creates the XML file and when I create the XML file before I use postman it works fine. It's like it needs to execute both functions before the creation of the XML file, how do I create the XML file after the first function and then use it in the second function.
The answer is "it's asynchronous" (just like many, many problems in node.js/javascript).
The fs.readFile function is asynchronous: when you call it, you give it a callback function which it will call when it finishes loading the file.
The parser.parseString is asynchronous - it will call your callback function when it finishes parsing the XML.
The fs.writeFile is the same - it will call your callback function when it finishes writing the file.
The hypervisor.connect function is the same - it will call your callback function when it finishes connecting.
The middleware functions are called in order, but they both contain code that may not have completed before they return. So when your code calls createDomain and tries to read the XML file created in createXML, the XML file probably doesn't exist yet. The fs.readFile might not be finished yet; even if it is, the parser.parseString function might not be finished yet; even if that one is finished, the fs.writeFile might not be finished yet.
One way to solve this would be to put the functionality of the createXML and createDomain functions together into one middleware function. That would allow you to rewrite it so that all the function calls that depend on previous asynchronous function calls could actually wait for those calls to complete before executing. A simple way to do it would be this:
function createXML(req, res, next) {
var parses = new xml2js.Parser();
fs.readFile('Debian7.xml', function(err, data){
parser.parseString(data, function (err, result){
result.domain.name = req.body.name;
result.domain.memory[0]['$'].unit = "GB";
result.domain.memory[0]['_'] = req.body.ram;
result.domain.currentMemory[0]['$'].unit = "GB";
result.domain.currentMemory[0]['_'] = req.body.ram;
result.domain.vcpu = req.body.cpus;
var builder = new xml2js.Builder({headless: true});
var xml = builder.buildObject(result);
fs.writeFile('./xmlfiles/' + req.body.name + '.xml', xml, function(err, data){
if(err) console.log(err);
// notice the call to createDomain here - this ensure
// that the connection to the hypervisor is not started
// until the file is written
createDomain(req, res);
});
});
});
};
And change your route to:
apiRoutes.post('/domainCreate', createXML);
Now, that's pretty ugly. I don't like the idea of lumping those two middleware functions into one and I'd prefer to rewrite it to use a promise-based approach, but that's the basic the idea.

Stop function from being invoked multiple times

I'm in the process of building a file upload component that allows you to pause/resume file uploads.
The standard way to achieve this seems to be to break the file into chunks on the client machine, then send the chunks along with book-keeping information up to the server which can store the chunks into a staging directory, then merge them together when it has received all of the chunks. So, this is what I am doing.
I am using node/express and I'm able to get the files fine, but I'm running into an issue because my merge_chunks function is being invoked multiple times.
Here's my call stack:
router.post('/api/videos',
upload.single('file'),
validate_params,
rename_uploaded_chunk,
check_completion_status,
merge_chunks,
record_upload_date,
videos.update,
send_completion_notice
);
the check_completion_status function is implemented as follows:
/* Recursively check to see if we have every chunk of a file */
var check_completion_status = function (req, res, next) {
var current_chunk = 1;
var see_if_chunks_exist = function () {
fs.exists(get_chunk_file_name(current_chunk, req.file_id), function (exists) {
if (current_chunk > req.total_chunks) {
next();
} else if (exists) {
current_chunk ++;
see_if_chunks_exist();
} else {
res.sendStatus(202);
}
});
};
see_if_chunks_exist();
};
The file names in the staging directory have the chunk numbers embedded in them, so the idea is to see if we have a file for every chunk number. The function should only next() one time for a given (complete) file.
However, my merge_chunks function is being invoked multiple times. (usually between 1 and 4) Logging does reveal that it's only invoked after I've received all of the chunks.
With this in mind, my assumption here is that it's the async nature of the fs.exists function that's causing the issue.
Even though the n'th invocation of check_completion_status may occur before I have all of the chunks, by the time we get to the nth call to fs.exists(), x more chunks may have arrived and been processed concurrently, so the function can keep going and in some cases get to the end and next(). However those chunks that arrived concurrently are also going to correspond to invocations of check_completion_status, which are also going to next() because we obviously have all of the files at this point.
This is causing issues because I didn't account for this when I wrote merge_chunks.
For completeness, here's the merge_chunks function:
var merge_chunks = (function () {
var pipe_chunks = function (args) {
args.chunk_number = args.chunk_number || 1;
if (args.chunk_number > args.total_chunks) {
args.write_stream.end();
args.next();
} else {
var file_name = get_chunk_file_name(args.chunk_number, args.file_id)
var read_stream = fs.createReadStream(file_name);
read_stream.pipe(args.write_stream, {end: false});
read_stream.on('end', function () {
//once we're done with the chunk we can delete it and move on to the next one.
fs.unlink(file_name);
args.chunk_number += 1;
pipe_chunks(args);
});
}
};
return function (req, res, next) {
var out = path.resolve('videos', req.video_id);
var write_stream = fs.createWriteStream(out);
pipe_chunks({
write_stream: write_stream,
file_id: req.file_id,
total_chunks: req.total_chunks,
next: next
});
};
}());
Currently, I'm receiving an error because the second invocation of the function is trying to read the chunks that have already been deleted by the first invocation.
What is the typical pattern for handling this type of situation? I'd like to avoid a stateful architecture if possible. Is it possible to cancel pending handlers right before calling next() in check_completion_status?
If you just want to make it work ASAP, I would use a lock (much like a db lock) to lock the resource so that only one of the requests processes the chunks. Simply create a unique id on the client, and send it along with the chunks. Then just store that unique id in some sort of a data structure, and look that id up prior to processing. The example below is by far not optimal (in fact this map will keep growing, which is bad), but it should demonstrate the concept
// Create a map (an array would work too) and keep track of the video ids that were processed. This map will persist through each request.
var processedVideos = {};
var check_completion_status = function (req, res, next) {
var current_chunk = 1;
var see_if_chunks_exist = function () {
fs.exists(get_chunk_file_name(current_chunk, req.file_id), function (exists) {
if (processedVideos[req.query.uniqueVideoId]){
res.sendStatus(202);
} else if (current_chunk > req.total_chunks) {
processedVideos[req.query.uniqueVideoId] = true;
next();
} else if (exists) {
current_chunk ++;
see_if_chunks_exist();
} else {
res.sendStatus(202);
}
});
};
see_if_chunks_exist();
};

EventEmitter memory leak detected: Proper way to pass CSV data to multiple modules?

I am dipping my toe into using different npm modules my own way whereas before I just executed already created gulpfiles. The npm module penthouse loads a webpage and determines the above the fold CSS for that page. I am trying to take that module and use it with a site crawler so I can get the above the fold css for all pages, and store that CSS in a table.
So essentially I am:
Crawling a site to get all the urls
capturing the page id from each url
storing pages & their id's in a CSV
load the CSV and pass each URL to penthouse
take penthouse output and store it in a table
So I am fine up until the last two steps. When I am reading the CSV, I get the error possible EventEmitter memory leak detected. 11 exit listeners added. Use emitter.setMaxListeners() to increase limit.
The stack trace points here at line 134. After reading about the error, it makes sense because I see a bunch of event listeners being added, but I don't see penthouse ever really executing and closing the event listeners.
It works just fine standalone as expected (Running penthouse against a single page then exiting). But when I execute the code below to try and loop through all URLs in a csv, it spits out the memory leak error twice, and just hangs. None of my console.log statements in the following script are executed.
However, I added console.log to the end of the penthouse index.js file, and it is executed multiple times (where it adds event listeners), but it never timeouts or exits.
So it's clear I am not integrating this properly, but not sure how to proceed. What would be the best way to force it to read one line in the CSV at a time, process the URL, then take the output and store it in the DB before moving onto the next line?
const fs = require('fs');
var csv = require('fast-csv');
var penthouse = require('penthouse'),
path = require('path');
var readUrlCsv = function() {
var stream = fs.createReadStream("/home/vagrant/urls.csv");
var csvStream = csv()
//returns single line from CSV
.on("data", function(data) {
// data[0]: table id, data[1]: page type, data[2]: url
penthouse({
url : data[2],
css : './dist/styles/main.css'
}, function(err, criticalCss) {
if (err) {
console.log(err);
}
console.log('do we ever get here?'); //answer is no
if (data[1] === 'post') {
wp.posts().id( data[0] ).post({
inline_css: criticalCss
}).then(function( response ) {
console.log('saved to db');
});
} else {
wp.pages().id( data[0] ).page({
inline_css: criticalCss
}).then(function( response ) {
console.log('saved to db');
});
}
});
})
.on("end", function(){
console.log("done");
});
return stream.pipe(csvStream);
};
UPDATE
Changed my method to look like below so it processes all rows first, but still throws the same error. Writes "done" to the console, and immediately spits out the memory warning twice.
var readUrlCsv = function() {
var stream = fs.createReadStream("/home/vagrant/urls.csv");
var urls = [];
var csvStream = csv()
.on("data", function(data) {
// data[0]: table id, data[1]: page type, data[2]: url
urls.push(data);
})
.on("end", function(){
console.log("done");
buildCriticalCss(urls);
});
return stream.pipe(csvStream);
};
var buildCriticalCss = function(urls) {
//console.log(urls);
urls.forEach(function(data, idx) {
//console.log(data);
penthouse({
url : data[2],
css : './dist/styles/main.css',
// OPTIONAL params
width : 1300, // viewport width
height : 900, // viewport height
timeout: 30000, // ms; abort critical css generation after this timeout
strict: false, // set to true to throw on css errors (will run faster if no errors)
maxEmbeddedBase64Length: 1000 // charaters; strip out inline base64 encoded resources larger than this
}, function(err, criticalCss) {
if (err) {
console.log(err);
}
console.log('do we ever finish one?');
if (data[1] === 'post') {
console.log('saving post ' + data[0]);
wp.posts().id( data[0] ).post({
inline_css: criticalCss
}).then(function( response ) {
console.log('saved post to db');
});
} else {
console.log('saving page ' + data[0]);
wp.pages().id( data[0] ).page({
inline_css: criticalCss
}).then(function( response ) {
console.log('saved page to db');
});
}
});
});
};
Update 2
I took the simple approach to control the amount of concurrent processes spawned.
var readUrlCsv = function() {
var stream = fs.createReadStream("/home/vagrant/urls.csv");
var urls = [];
var csvStream = csv()
.on("data", function(data) {
// data[0]: table id, data[1]: page type, data[2]: url
urls.push(data);
})
.on("end", function(){
console.log("done");
//console.log(urls);
buildCriticalCss(urls);
});
return stream.pipe(csvStream);
};
function buildCriticalCss(data) {
var row = data.shift();
console.log(row);
penthouse({
url : row[2],
css : './dist/styles/main.css',
// OPTIONAL params
width : 1300, // viewport width
height : 900, // viewport height
timeout: 30000, // ms; abort critical css generation after this timeout
strict: false, // set to true to throw on css errors (will run faster if no errors)
maxEmbeddedBase64Length: 1000 // charaters; strip out inline base64 encoded resources larger than this
}, function(err, criticalCss) {
if (err) {
console.log('err');
}
// handle your criticalCSS
console.log('finished');
console.log(row[2]);
// now start next job, if we have more urls
if (data.length !== 0) {
buildCriticalCss(data);
}
});
}
The error message you're seeing is a default printed to the console by node's event library if more than the allowed number of event listeners are defined for an instance of EventEmitter. It does not indicate an actual memory leak. Rather it is displayed to make sure you're aware of the possibility of a leak.
You can see this by checking the event.EventEmitter source code at lines 20 and 244.
To stop EventEmitter from displaying this message and since penthouse does not expose its specific EventEmitter, you'll need to set the default allowed event emitter listeners to something larger than its default value of 10 using:
var EventEmitter=require('event').EventEmitter;
EventEmitter.defaultMaxListeners=20;
Note that according to Node's documentation for EventEmitter.defaultMaxListeners, this will change the maximum number of listeners for all instances of EventEmitter, including those that have already been defined previous to the change.
Or you could simply ignore the message.
Further to the hanging of your code, I'd advise gathering all the results from the parsing of your CSV into an array, and then processing the array contents separately from the parsing process.
This would accomplish two things: It would allow you to
be assured the entire CSV file was valid before you started processing, and
instrument debugging messages while processing each element, which would give you deeper insight into how each element of the array was processed.
UPDATE
As noted below, depending on how many URLs you're processing, you're probably overwhelming Node's ability to handle all of your requests in parallel.
One easy way to proceed would be to use eventing to marshall your processing so your URLs are processed sequentially, as in:
var assert=require('assert'),
event=require('events'),
fs=require('fs'),
csv=require('fast-csv');
penthouse=require('penthouse');
var emitter=new events.EventEmitter();
/** Container for URL records read from CSV file.
*
* #type {Array}
*/
var urls=[];
/** Reads urls from file and triggers processing
*
* #emits processUrl
*/
var readUrlCsv = function() {
var stream = fs.createReadStream("/home/vagrant/urls.csv");
stream.on('error',function(e){ // always handle errors!!
console.error('failed to createReadStream: %s',e);
process.exit(-1);
});
var csvStream = csv()
.on("data", function(data) {
// data[0]: table id, data[1]: page type, data[2]: url
urls.push(data);
})
.on("end", function(){
console.log("done reading csv");
//console.log(urls);
emitter.emit('processUrl'); // start processing URLs
})
.on('error',function(e){
console.error('failed to parse CSV: %s',e);
process.exit(-1);
});
// no return required since we don't do anything with the result
stream.pipe(csvStream);
};
/** Event handler to process a single URL
*
* #emits processUrl
*/
var onProcessUrl=function(){
// always check your assumptions
assert(Array.isArray(urls),'urls must be an array');
var urlRecord=urls.shift();
if(urlRecord){
assert(Array.isArray(urlRecord),'urlRecord must be an array');
assert(urlRecord.length>2,'urlRecord must have at least three elements');
penthouse(
{
// ...
},
function(e,criticalCss){
if(e){
console.error('failed to process record %s: %s',urlRecord,e);
return; // IMPORTANT! do not drop through to rest of func!
}
// do what you need with the result here
if(urls.length===0){ // ok, we're done
console.log('completed processing URLs');
return;
}
emitter.emit('processUrl');
}
);
}
}
/**
* processUrl event - triggers processing of next URL
*
* #event processUrl
*/
emitter.on('processUrl',onProcessUrl); // assign handler
// start everything going...
readUrlCsv();
The benefit of using events here rather than your solution is the lack of recursion which can easily overwhelm your stack.
Hint: You can use events to handle all program flow issues normally addressed by Promises or modules like async.
And since events are at the very heart of Node (the "event loop"), it's really the best, most efficient way to solve such problems.
It's both elegant and "The Node Way"!
Here is a gist that illustrates the technique, without relying on streams or penthouse, the output of which is:
url: url1
RESULT: RESULT FOR url1
url: url2
RESULT: RESULT FOR url2
url: url3
RESULT: RESULT FOR url3
completed processing URLs
Besides using console.logs which usually is enough, you can also use the built in debugger: https://nodejs.org/api/debugger.html
Another thing you can do is go into the node_modules/penthouse directory and add your console.logs or debugger statement into the code for that module. That way you can debug your program there rather than the module just being a black box.
Also make sure there isn't some kind of race condition where for example the CSV doesn't always get output before it tries to read them in.
I think that the memory leak issue is probably a red herring as far as making your code function.
From your comment it sounds like you want to do something like the following with async.mapSeries: http://promise-nuggets.github.io/articles/15-map-in-series.html You could also use promises as it shows or even after getting promises set up use the async/await stuff with a regular for loop after compiling with babel. In the long run I recommend doing that sort of thing with async/await and babel but that might be overkill just to get this working.

How to use filesystem's createReadStream with Meteor router(NodeJS)

I need to allow the user of my app to download a file with Meteor. Currently what I do is when the user requests to download a file I enter into a "fileRequests" collection in Mongo a document with the file location and a timestamp of the request and return the ID of the newly created request. When the client gets the new ID it imediately goes to mydomain.com/uploads/:id. I then use something like this to intercept the request before Meteor does:
var connect = Npm.require("connect");
var Fiber = Npm.require("fibers");
var path = Npm.require('path');
var fs = Npm.require("fs");
var mime = Npm.require("mime");
__meteor_bootstrap__.app
.use(connect.query())
.use(connect.bodyParser()) //I add this for file-uploading
.use(function (req, res, next) {
Fiber(function() {
if(req.method == "GET") {
// get the id here, and stream the file using fs.createReadStream();
}
next();
}).run();
});
I check to make sure the file request was made less than 5 seconds ago, and I immediately delete the request document after I've queried it.
This works, and is secure(enough) I think. No one can make a request without being logged in and 5 seconds is a pretty small window for someone to be able to highjack the created request URL but I just don't feel right with my solution. It feels dirty!
So I attempted to use Meteor-Router to accomplish the same thing. That way I can check if they're logged in correctly without doing the 5 second open to the world trickery.
So here's the code I wrote for that:
Meteor.Router.add('/uploads/:id', function(id) {
var path = Npm.require('path');
var fs = Npm.require("fs");
var mime = Npm.require("mime");
var res = this.response;
var file = FileSystem.findOne({ _id: id });
if(typeof file !== "undefined") {
var filename = path.basename(file.filePath);
var filePath = '/var/MeteorDMS/uploads/' + filename;
var stat = fs.statSync(filePath);
res.setHeader('Content-Disposition', 'attachment; filename=' + filename);
res.setHeader('Content-Type', mime.lookup(filePath));
res.setHeader('Content-Length', stat.size);
var filestream = fs.createReadStream(filePath);
filestream.pipe(res);
return;
}
});
This looks great, fits right in with the rest of the code and is easy to read, no hacking involved, BUT! It doesn't work! The browser spins and spins and never quite knows what to do. I have ZERO error messages coming up. I can keep using the app on other tabs. I don't know what it's doing, it never stops "loading". If I restart the server, I get a 0 byte file with all the correct headers, but I don't get the data.
Any help is greatly appreciated!!
EDIT:
After digging around a bit more, I noticed that trying to turn the response object into a JSON object results in a circular structure error.
Now the interesting thing about this is that when I listen to the filestream for the "data" event, and attempt to stringify the response object I don't get that error. But if I attempt to do the same thing in my first solution(listen to "data" and stringify the response) I get the error again.
So using the Meteor-Router solution something is happening to the response object. I also noticed that on the "data" event response.finished is flagged as true.
filestream.on('data', function(data) {
fs.writeFile('/var/MeteorDMS/afterData', JSON.stringify(res));
});
The Meteor router installs a middleware to do the routing. All Connect middleware either MUST call next() (exactly once) to indicate that the response is not yet settled or MUST settle the response by calling res.end() or by piping to the response. It is not allowed to do both.
I studied the source code of the middleware (see below). We see that we can return false to tell the middleware to call next(). This means we declare that this route did not settle the response and we would like to let other middleware do their work.
Or we can return a template name, a text, an array [status, text] or an array [status, headers, text], and the middleware will settle the response on our behalf by calling res.end() using the data we returned.
However, by piping to the response, we already settled the response. The Meteor router should not call next() nor res.end().
We solved the problem by forking the Meteor router and making a small change. We replaced the else in line 87 (after if (output === false)) by:
else if (typeof(output)!="undefined") {
See the commit with sha 8d8fc23d9c in my fork.
This way return; in the route method will tell the router to do nothing. Of course you already settled the response by piping to it.
Source code of the middleware as in the commit with sha f910a090ae:
// hook up the serving
__meteor_bootstrap__.app
.use(connect.query()) // <- XXX: we can probably assume accounts did this
.use(this._config.requestParser(this._config.bodyParser))
.use(function(req, res, next) {
// need to wrap in a fiber in case they do something async
// (e.g. in the database)
if(typeof(Fiber)=="undefined") Fiber = Npm.require('fibers');
Fiber(function() {
var output = Meteor.Router.match(req, res);
if (output === false) {
return next();
} else {
// parse out the various type of response we can have
// array can be
// [content], [status, content], [status, headers, content]
if (_.isArray(output)) {
// copy the array so we aren't actually modifying it!
output = output.slice(0);
if (output.length === 3) {
var headers = output.splice(1, 1)[0];
_.each(headers, function(value, key) {
res.setHeader(key, value);
});
}
if (output.length === 2) {
res.statusCode = output.shift();
}
output = output[0];
}
if (_.isNumber(output)) {
res.statusCode = output;
output = '';
}
return res.end(output);
}
}).run();
});

Resources