I want to pipe a readable css file to the http response - node.js

I have an issue with outputting the readable stream to the http response.
behind the scenes there is a regular request and response streams coming from the generic http createServer. I check to see if the 'req.url' ends in css, and I create a readable stream of this file. I see the css contents in the console.log, with the right css code I expect. Then, I try to pipe the readable css file stream to the response, but in Chrome, the file response is blank when I inspect the response. It is a 200 response though. Any thoughts at first glance? I've tried different variations of where I have code commented out.
router.addRoute("[a-aA-z0-9]{1,50}.css$", function(matches){
var cssFile = matches[0];
var pathToCss = process.cwd() + "/" + cssFile;
// takes care of os diffs regarding path delimiters and such
pathToCss = path.normalize(pathToCss);
console.log(matches);
console.log("PATH TO CSS");
console.log(pathToCss)
var readable = fs.createReadStream(pathToCss);
var write = function(chunk){
this.queue(chunk.toString());
console.log(chunk.toString());
}
var end = function(){
this.queue(null);
}
var thru = through(write,end);
//req.on("end",function(){
res.pipe(readable.pipe(thru)).pipe(res);
//res.end();
//});
});

you need to pipe your readable stream into your through-stream, and then pipe it into the response:
readable.pipe(thru).pipe(res);
edit: for preparing your css path, just use path.join instead of concatenating your path and normalizing it:
var pathToCss = path.join(process.cwd(), cssFile);

I separated out this route (css) from my normal html producing routes, the problem I had was that my normal routes in my router object returned strings, like res.end(compiled_html_str), and the css file readable stream was going through that same routing function. I made it separate by isolating it from my router.
var cssMatch = [];
if(cssMatch = req.url.match(/.+\/(.+\.css$)/)){
res.writeHead({"Content-Type":"text/css"});
var cssFile = cssMatch[1];
var pathToCss = process.cwd() + "/" + cssFile;
// takes care of os diffs regarding path delimiters and such
pathToCss = path.normalize(pathToCss);
console.log(cssMatch);
console.log("PATH TO CSS");
console.log(pathToCss)
var readable = fs.createReadStream(pathToCss);
var cssStr = "";
readable.on("data",function(chunk){
cssStr += chunk.toString();
});
readable.on("end",function(){
res.end(cssStr);
});
}

Related

Cheerio scraping returning only two rows

So I tested my scraping on a static HTML file before adding it to my Node app.
The problem is that it's not returning all the rows.
On the site:
$('#sport tr').length
//Returns 13
In Cheerio:
$('#sport tr').length
//Returns 2
I'm stumped, here is the code I'm using. I've contained the URL as proof, so you can visit it yourself if you wish.
I'm suspecting it's something to do with var $ = cheerio.load(html); however I'm not experienced in Cheerio to say outright that's the problem.
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var app = express();
app.get('/scrape', function(req, res){
var url = 'http://www.olbg.com/football.php';
var json = [];
request(url, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
console.log($('#sport tr').length);
var headers = [];
$('#sport tr th').each(function(i, th) {
var text = $(th).text();
if (text.trim() !== "") {
headers[i] = text.replace(/[\t\n\r\s]/mgi, '');
}
});
$('#sport tr').each(function(i, tr) {
// skip if header
if (!$(tr).is('th')) {
var temp = {};
temp["Event"] = $(tr).find('td').eq(0).text().trim();
temp["TopSelection"] = $(tr).find('td').eq(1).text().trim();
temp["BookieOdds"] = $(tr).find('td').eq(2).text().trim();
temp["OLBGRating"] = $(tr).find('td').eq(3).find('img').length;
if (temp["Event"] !== "" || temp["TopSelection"] !== ""){
json.push(temp);
}
}
});
}
// To write to the system we will use the built in 'fs' library.
// In this example we will pass 3 parameters to the writeFile function
// Parameter 1 : output.json - this is what the created filename will be called
// Parameter 2 : JSON.stringify(json, null, 4) - the data to write, here we do an extra step by calling JSON.stringify to make our JSON easier to read
// Parameter 3 : callback function - a callback function to let us know the status of our function
fs.writeFile('output.json', JSON.stringify(json), function(err){
console.log('File successfully written!');
})
// Finally, we'll just send out a message to the browser reminding you that this app does not have a UI.
res.send(json);
});
});
app.listen("8081");
console.log("Magic happens on port 8081");
exports = module.exports = app;
The reason that you're not getting the expected result is because the (table) html on that page is mangled. If you look at the second <td> in the second <tr> of the table#sport, you'll see an "extra" </td>. This causes the <td> that the table#sport is inside to close (and an implicit closing of table#sport) on some parsers because that is the closest open <td>. So that is why the parser reports only 2 <tr>s instead of 13. The other <tr>s you're expecting are now outside of table#sport.
Probably your best bet is to pass the html through an HTML tidying program/script (e.g. this one with the clean option enabled) first before passing it to cheerio. After that, your selector should return the elements you're probably expecting.

How can I create a txt file that holds the contents of an array in JavaScript?

I have several arrays that contain data that I would like to export, each array to a txt file, in order to be analyzed using MATLAB.
Let's say my array is:
var xPosition = [];
// some algorithm that adds content to xPosition
// TODO: export array into a txt file let's call it x_n.txt
It would be great to store each element of an array per line.
I have found a guide for the solution to my question in this post. The following code is what I ended up using:
var fs = require('fs');
var xPosition = [];
// some algorithm that adds content to xPosition
var file = fs.createWriteStream('./positions/x_n.txt');
file.on('error', function(err) { /* error handling */ });
xPosition.forEach(function(v) { file.write(v + '\n'); });
file.end();
The solution you found works, but here's how I'd have done it:
var fs = require('fs');
var xPosition = [1,2,3]; // Generate this
var fileName = './positions/x_n.txt';
fs.writeFileSync(fileName, xPosition.join('\n'));
This uses node's synchronous file writing capability, which is ideal for your purposes. You don't have to open or close file handles, etc. I'd use streams only if I had gigabytes of data to write out.

nodejs: each line in separate file

I want to split a file: each line in a separate file. The initial file is really big. I finished with code bellow:
var fileCounter = -1;
function getWritable() {
fileCounter++;
writable = fs.createWriteStream('data/part'+ fileCounter + '.txt', {flags:'w'});
return writable;
}
var readable = fs.createReadStream(file).pipe(split());
readable.on('data', function (line) {
var flag = getWritable().write(line, function() {
readable.resume();
});
if (!flag) {
readable.pause();
}
});
It works but it is ugly. Is there more nodish way to do that? maybe with piping and without pause/resume.
NB: it's not a question about lines/files/etc . The question is about streams and I just try to illustrate it with the problem
You can use Node's built-in readline module.
var fs = require('fs');
var readline = require('readline');
var fileCounter = -1;
var file = "foo.txt";
readline.createInterface({
input: fs.createReadStream(file),
terminal: false
}).on('line', function(line) {
var writable = fs.createWriteStream('data/part'+ fileCounter + '.txt', {flags:'w'});
writable.write(line);
fileCounter++
});
Note that this will lose the last line of the file if there is no newline at the end, so make sure your last line of data is followed by a newline.
Also note that the docs indicate that it is Stability index 2, meaning:
Stability: 2 - Unstable The API is in the process of settling, but has
not yet had sufficient real-world testing to be considered stable.
Backwards-compatibility will be maintained if reasonable.
How about the following? Did you try? Pause and resume logic isn't realy needed here.
var split = require('split');
var fs = require('fs');
var fileCounter = -1;
var readable = fs.createReadStream(file).pipe(split());
readable.on('data', function (line) {
fileCounter++;
var writable = fs.createWriteStream('data/part'+ fileCounter + '.txt', {flags:'w'});
writable.write(line);
writable.close();
});
Piping dynamically would be hard...
EDIT: You could create a writable (so pipe()able) object that would, on('data') event, do the "create file, open it, write the data, close it" but it :
wouldn't be reusable
wouldn't follow the KISS principle
would require a special and specific logic for file naming (It would accept a string pattern as an argument in its constructor with a placeholder for the number. Etc...)
I realy don't recommend that path, or you're going to take ages implementing a non-realy-reusable module. Though, that would make a good writable implementation exercise.

Downloading a Gzip'd file

I'm attempting to download a file using the http module in node. While the file seems to download sucessfully, the resultant file cannot be opened using gzip. I've tried downloading the file through other methods, and that works, and I've tried using multiple ways to open the resultant gzip'd file, but all of those produce the same error.
I did attempt to use the request module, but there seemed to be no way of accessing the returned HTTP headers before the file was finished downloading, which I need because I'd like to offer some sort of visual indicator as to how long this file is going to take to download.
This is (roughly) the code that I've got so far.
var http = require('http');
var fs = require('fs');
var progress = 0;
downloadFile = function() {
http.get(FILE_URL, function(response) {
var maxBytes = parseInt(response.headers['content-length'], 10);
var dumpFile = fs.createWriteStream(FILENAME + '.dl');
response.pipe(dumpFile);
response
.on('data', function(chunk) {
progress += chunk.length;
// progressbar-type code here
})
.on('end', function() {
// pass
})
dumpFile.on('finish', function() {
dumpFile.close();
fs.rename(FILENAME + '.dl', FILENAME);
});
}
So my question: How would you advise I download a file, bearing in mind it's a large file and I need some sort of visual indicator for download progress? Should I give up on http? Or am I doing something monumentally stupid?
Thanks!

Why append rather than write when using knox / node.js to grab file from Amazon s3

I'm experimenting with the knox module for node.js as a way of managing some small files in an Amazon S3 bucket. Everything works fine stand-alone: I can upload a file, download a file, etc. However, I want to be able to download a file on recurring schedule. When I modify the code to run on an interval, I'm getting the downloaded file appending to the previous instance instead of overwriting.
I'm not sure if I've made a mistake in the file write code or in the knox handling code. I've tried several different write approaches (writeFile, writeStream, etc.) and I've looked at the knox source code. Nothing obvious to me stands out as a problem. Here's the code I'm using:
knox = require('knox');
fs = require('fs');
var downFile = DOWNFILE;
var downTxt = '';
var timer = INTERVAL;
var path = S3PATH + downFile;
setInterval(function()
{
var s3client = knox.createClient(
{
key: '********************',
secret: '**********************************',
bucket: '********'
});
s3client.get(path).on('response', function(response)
{
response.setEncoding('ascii');
response.on('data', function(chunk)
{
downTxt += chunk;
});
response.on('end', function()
{
fs.writeFileSync(downFile, downTxt, 'ascii');
});
}).end();
},
timer);
The problem is with your placement of var downTxt = '';. That is the only place you set downTxt to blank, so every time you retrieve more data, you add it to the data that you got in the previous request because you never clear the data from the previous request. The simplest fix is to move that line to just before the setEncoding line.
However, the way you are processing the data is unnecessarily complicated. Try something like this instead. You don't need to recreate the client every time, and setting the encoding will just break things if you are downloading non-text files, and it won't make a difference with text files. Next, you shouldn't manually collect the data, you can immediately start writing it to the file as you receive it. Lastly, since request is a standard stream, you don't need to monitor the 'data' event because you can just use pipe.
var knox = require('knox'),
fs = require('fs'),
downFile = DOWNFILE,
timer = INTERVAL,
path = S3PATH + downFile,
s3client = knox.createClient({
key: '********************',
secret: '**********************************',
bucket: '********'
});
(function downloadFile() {
var str = fs.createWriteStream(downFile);
s3client.get(path).pipe(str);
str.on('close', function() {
setTimeout(downloadFile, timer);
});
})();

Resources