This is the simplified version of what I have, but basically the same.
var url_parts = url.parse(req.url, true);
var pathname = url_parts.pathname;
var query = url_parts.query;
var datapath = "data_";
if(query.sort !== undefined)
datapath += query.sort + ".json";
var file = fs.readFile(datapath, function(err,data) {
if(err) throw err;
jsondata = data.toString();
});
This works fine when the file does not exist in the directory. For example, calling http://localhost:12035/sort=date works fine if there is no "data_date.json". However, if that file does exist in the directory, "data_undefined.json" is used instead. Why is this? I have tried all kinds of workarounds to try and narrow it down (ie take a substring of just querystring.stringify(query) ) to no avail...
You should really be checking if query.sort exists, because if I get http://localhost:12035/ query.sort will return "undefined".
Related
So I tested my scraping on a static HTML file before adding it to my Node app.
The problem is that it's not returning all the rows.
On the site:
$('#sport tr').length
//Returns 13
In Cheerio:
$('#sport tr').length
//Returns 2
I'm stumped, here is the code I'm using. I've contained the URL as proof, so you can visit it yourself if you wish.
I'm suspecting it's something to do with var $ = cheerio.load(html); however I'm not experienced in Cheerio to say outright that's the problem.
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var app = express();
app.get('/scrape', function(req, res){
var url = 'http://www.olbg.com/football.php';
var json = [];
request(url, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
console.log($('#sport tr').length);
var headers = [];
$('#sport tr th').each(function(i, th) {
var text = $(th).text();
if (text.trim() !== "") {
headers[i] = text.replace(/[\t\n\r\s]/mgi, '');
}
});
$('#sport tr').each(function(i, tr) {
// skip if header
if (!$(tr).is('th')) {
var temp = {};
temp["Event"] = $(tr).find('td').eq(0).text().trim();
temp["TopSelection"] = $(tr).find('td').eq(1).text().trim();
temp["BookieOdds"] = $(tr).find('td').eq(2).text().trim();
temp["OLBGRating"] = $(tr).find('td').eq(3).find('img').length;
if (temp["Event"] !== "" || temp["TopSelection"] !== ""){
json.push(temp);
}
}
});
}
// To write to the system we will use the built in 'fs' library.
// In this example we will pass 3 parameters to the writeFile function
// Parameter 1 : output.json - this is what the created filename will be called
// Parameter 2 : JSON.stringify(json, null, 4) - the data to write, here we do an extra step by calling JSON.stringify to make our JSON easier to read
// Parameter 3 : callback function - a callback function to let us know the status of our function
fs.writeFile('output.json', JSON.stringify(json), function(err){
console.log('File successfully written!');
})
// Finally, we'll just send out a message to the browser reminding you that this app does not have a UI.
res.send(json);
});
});
app.listen("8081");
console.log("Magic happens on port 8081");
exports = module.exports = app;
The reason that you're not getting the expected result is because the (table) html on that page is mangled. If you look at the second <td> in the second <tr> of the table#sport, you'll see an "extra" </td>. This causes the <td> that the table#sport is inside to close (and an implicit closing of table#sport) on some parsers because that is the closest open <td>. So that is why the parser reports only 2 <tr>s instead of 13. The other <tr>s you're expecting are now outside of table#sport.
Probably your best bet is to pass the html through an HTML tidying program/script (e.g. this one with the clean option enabled) first before passing it to cheerio. After that, your selector should return the elements you're probably expecting.
I have a problem reading the stats of a file. I have this code:
var fs = require('fs');
process.stdin.setEncoding('utf8');
process.stdin.on('readable', function() {
var chunk = process.stdin.read();
if (chunk !== null) {
var stats =fs.statSync(chunk);
length=stats.size;
console.log(length);
}
});
When I exec this code I get this error:
return binding.stat(pathModule._makeLong(path));
^
Error: ENOENT, no such file or directory 'hello.txt
But the problem is that "hello.txt" actually exists at the same directory¡
I have tried with other files and I always get the same error.
Any ideas?
Thanks¡
The chunk read from the standard input contains a new line in the end, which was conflicting with your call to fs.statSync. Try this:
process.stdin.on('readable', function() {
var chunk = process.stdin.read();
if (chunk !== null && chunk !== '') {
var stats = fs.statSync(chunk.trim()); // trim the input
length=stats.size;
console.log(length);
}
});
Also note that the function will be constantly executed for as long as 'readable' events are triggered. You may wish to terminate the program at some point or anything like that.
i have a small problem, when i try to copy one file from my tmp dir to my ftp dir the writen file is empty. I have no error, i don't understand what i'm doing wrong
var ftpPath = "/var/www/ftp/",
zipPath = "/var/www/tmp/",
file = "test";
fs.createReadStream(zipPath + file).pipe(fs.createWriteStream(ftpPath + file));
My test file contain loremipsum sample.
If you have any solution, i take it, this is the only line that bug in my app :(
First, make sure that the file /var/www/tmp/test exists, is a file, and has the right permissions for the user you start the script with.
Second, make sure that /var/www/ftp/ has writing permissions.
Then the following code should work :
var readerStream = fs.createReadStream('/var/www/tmp/test');
var writerStream = fs.createWriteStream('/var/www/ftp/test');
readerStream.pipe(writerStream);
Edit :
try debugging using this snippet :
var data;
var readerStream = fs.createReadStream('/var/www/tmp/test');
readerStream.on('data', function(data) {
data += data;
});
readerStream.on('end', function() {
console.log(data);
});
I am fairly new to NodeJS, I am trying to read all files in a given dir and then print out the results line by line using the code below
var fs=require('fs'),fsf = require('fs'),lazy = require('lazy');
var fr;
var dir = '/path/to/dir';
fs.readdir(dir,function(err,files){
if (err) throw err;
files.forEach(function(file){
console.log(file);
fr = fsf.createReadStream(file);
//console.log(fr);
new lazy(fr).lines.forEach(function(line){
console.log(line.toString());
});
});
I am getting the following error
Cannot call method 'toString' of undefined
Any pointers will be really appreciated!
Update: - There were actually two issues
(main) The blank lines in the individual files were causing this
exception.
The hidden files were getting picked up by the program.
Corrected both and here is the refactored code
var fs=require('fs'),lazy = require('lazy');
var fr;
var dir = '/path/to/dir';
fs.readdir(dir,function(err,files){ //Get a listing of all the files in the dir
if (err) throw err;
files.forEach(function(file){
if(file.search('\\.md') != -1) { //Only read markdown files
console.log(file);
fr = fs.createReadStream(file);
new lazy(fr).lines.forEach(function(line){
if (typeof line != 'undefined'){ // Skip blank lines within the files
if ((line.toString().search('\\+') != -1)||(line.toString().search('#') != -1)){
console.log(line.toString());
}
}
});
}
});
});
The code seems fine and is working with other directories and on other machines. After some investigation it seems to be an issue with the .DS_Store hidden files in the directory. I was trying this on a Mac with OSX 10.9.4. I am not 100% sure, but for now that seems to be the likely cause of the error.
Thanks!
I'm using post requests to search for binaries but can't get the following code to work correctly. fs.exists() won't accept the variable b_path but will work correctly if given a hard coded string. b_path prints to the console as expected, correctly building the path to the binary.
app.post('*', function(req, res) {
// generate the name of the binary
var request = require('url').parse(req.url, true);
var len = request.pathname.toString().length;
var binary = request.pathname.slice(1,len);
binary = binary.concat(' ');
var b_path = app.get('binaries_path')+binary;
fs.exists(b_path, function (exists) {
if(exists) {
console.log('exists');
}
}
}
Why does this occur?