Web Scrape Meteor Pages - node.js

I'm trying to write an application that scrapes a meteor webpage. This is rather difficult as meteor webpages render initially entirely as Javascript. Is there some way perhaps to render the page with some sort of scraper?
Probably going to do it with node, if that helps.
Thanks

You could use phantomjs to render the webpage. This is an example, specifically designed for meteor webpages, (from spiderable) to capture their HTML:
var fs = require('fs');
var child_process = require('child_process');
console.log('Loading a web page');
var page = require('webpage').create();
page.open("http://localhost:3000", function(status) {
});
var i = 0;
setInterval(function() {
var ready = page.evaluate(function () {
if (typeof Meteor !== 'undefined'
&& typeof(Meteor.status) !== 'undefined'
&& Meteor.status().connected) {
Deps.flush();
return DDP._allSubscriptionsReady();
}
return false;
});
console.log("Ready", ready);
if (ready) {
var out = page.content;
console.log(out);
phantom.exit();
}
}, 100);
It is this way but you could wrap the output and capture it using require('child_process').exec and stdin.
You can run the code with phantomjs script.js and it would give you back the HTML of a meteor page.

If they have the spiderable package enabled, then you can pretend to be a web crawler to get the server to render the page.
If you don't control the server or it isn't enabled, you will probably have to use Selenium - but the crawling will be CPU intensive and slow.

Related

Custom Computed Etag for Express.js

I'm working on a simple local image server that provides images to a web application with some JSON. The web application has pagination that will do a get request "/images?page=X&limit&200" to an express.js server that returns the JSON files in a single array. I want to take advantage of the browser's internal caching such that if a user goes to a previous page the express.js returns an ETAG. I was wondering how this could be achieved with express.js? For this application, I really just want the computation of the ETAG to take in three parameters the page, the directory, and the limit (It doesn't need to consider the whole JSON body). Also this application is for local use only, so I want the server to do the heavy lifting since I figured it be faster than the browser. I did see https://www.npmjs.com/package/etag which seems promising, but I'm not sure how to use it with express.js
Here's a boilerplate of the express.js code I have below:
var express = require('express');
var app = express();
var fs = require('fs');
app.get('/', async (req, res) =>{
let files = [];
let directory = fs.readdirSync("mypath");
let page = parseInt(req.query.page);
let limit = parseInt(req.query.limit);
for (let i = 0; i < limit; ++i) {
files.push(new Promise((resolve) => {
fs.readFile(files[i + page * limit].name, (err, data) => {
// format the data so easy to use for UI
resolve(JSON.parse(data));
});
});
}
let results = await Promise.all(files);
// compute an etag here and attach it the results.
res.send(results);
});
app.listen(3000);
When your server sends an ETag to the client, it must also be prepared to check the ETag that the client sends back to the server in the If-None-Match header in a subsequent "conditional" request.
If it matches, the server shall respond with status 304; otherwise there is no benefit in using ETags.
var serverEtag = "<compute from page, directory and limit>";
var clientEtag = req.get("If-None-Match");
if (clientEtag === serverEtag) res.status(304).end();
else {
// Your code from above
res.set("ETag", serverEtag);
res.send(results);
}
The computation of the serverEtag could be based on the time of the last modification in the directory, so that it changes whenever any of the images in that directory changes. Importantly, this could be done without carrying out the fs.readFile statements from your code.

Socket.IO socket protection without authentication

I've created a simple Node.js app using Express.js and socket.io (available here), where the user clicks a button, and it increments a number on the page. This number is also incremented live among all clients connected to the page. I am using web sockets and socket.io to get the client-server communication and live number updating system.
I am using the flood-protection module to limit socket emits to 5 per second, but this really doesn't make the game very fun because of the low amount of clicks per second you can have, and hackers could just use a setInterval and still make considerable progress automatically, even at such a low rate.
My issue:
I don't want the user to have to authenticate themselves - anybody should be able to play!
I want to keep the click rate around 15 clicks per second, if possible.
I don't want people to be able to send socket messages and automatically click the button from the browser console.
Here's the program:
index.js
var express = require("express");
var http = require("http");
var socketIO = require("socket.io");
var path = require("path");
var fs = require("fs");
var FloodProtection = require("flood-protection").default;
__dirname = path.resolve();
function btoa(str) {
return new Buffer(str, 'latin1').toString('base64');
};
function atob(b64Encoded) {
return new Buffer(b64Encoded, 'base64').toString('latin1');
};
var app = express();
app.get("/", function(req, res){
res.sendFile(__dirname + "/index.html");
});
var temp;
num = temp | parseInt(atob(fs.readFileSync("num.txt"))) | 0
var server = http.createServer(app);
var io = socketIO.listen(server, {log: true});
io.sockets.on("connection", (socket) => {
protector = new FloodProtection({rate: 5, per: 1})
io.sockets.emit("update", num);
socket.on("push", (value) => {
if (protector.check()) {
num++;
temp = num
io.sockets.emit("update", temp);
} else {
io.sockets.emit("update", "You can only click the button five times per second.")
socket.disconnect(2)
setTimeout(()=>{}, 3000)
}
});
socket.on("disconnect", () => {
fs.writeFile("num.txt", btoa(String(temp)), (err) => {
if (err) throw err;
console.log("saved | new num: " + temp);
})
})
});
server.listen(5000);
index.html
<html>
<head>
<title>A Button</title>
</head>
<body>
<button onclick='push();'>Click me!</button>
<p id="out"></p>
</body>
<script type="text/javascript" src="/socket.io/socket.io.js"></script>
<script type="text/javascript">
var variableFromFrontEnd = 2;
var socket = io.connect("/");
socket.on("connect", function() {
socket.on("update", function(val) {
document.getElementById("out").innerHTML = val
});
});
socket.on("disconnect", function() {
setTimeout(()=>{socket.connect();}, 1000);
});
function push() {
if (socket.connected) {
socket.emit("push");
}
}
</script>
</html>
num.txt is a base-64 encoded number.
So, is there a way to be able to do this without significant rate limiting or authentication? Or am I just going to have to use rate limiting?
There's a lot of different ways for users to cheat, and just as many ways to prevent them. The general rule is that you can only "make things harder" (if you're lucky, hard enough that the potential cheater loses interest before succeeding).
For a browser-based game, I would make sure that you are at least ensuring your game gets totally minified/tersed (so your javascript code is as unreadable as possible, and it's more difficult to just call a "click" function directly), and build in checksums in your messages (so the user can't just make socket calls directly to the server).
Once you've done that, you still have to deal with users who generate click events on the element directly with code or a plugin, or users who use a program outside the browser to generate click events above the button repeatedly. Your best defense against this is not to prevent it, but instead to detect it -- probably on the server side, by watching for users that have a sustained click rate that is not humanly possible, and then blowing up their game / temporarily banning their IP / etc.
See a related question Ways to prevent or reduce cheating for more related ideas (this question is about general client-server games, not browser games, but some of the discussion is still useful).

scraping data from a website node js

i am new to scraping data from a website, i would like to scrape the level number from: https://fortnitetracker.com/profile/pc/Twitch.BadGuyBen, i have tried using cheerio and request for this task and im not sure if im using the right selector maybe some tips on what i should do. this is my code:
var request = require('request');
var cheerio = require('cheerio');
var options = {
url: `https://fortnitetracker.com/profile/pc/Twitch.BadGuyBen`,
method: 'GET'
}
request(options, function (error, response, body) {
var $ = cheerio.load(body);
var level = "";
var xp = "";
$('.top-stats').filter(function(){
var data = $(this);
level = data.children().first().find('.value').text();
console.log(level);
})
});
again i am not sure if i have even selected the right class much appreciated.
EDIT:
also '.top-stats' is present further on
website open in chrome dev tools
other .top-stats class
You can't use request to get the body since the stats are displayed using javascript. You will have to use something like puppeteer to request the page and execute the javascript and then scrape the stats.

Is it possible to have a one way socket.io connection?

My application only needs socket.io to send data from the server to the client. To prevent Denial Of Service attacks, I want to disconnect the client if i tries to emit data. Is this possible?
I've looked at some stackoverflow questions:
force client disconnect from server with socket.io and nodejs
How to protect against distributed denial-of-service attacks in Node.js with Socket.io?
But I've not been able to find a working solution.
There is a option to cache all events (from here Socket.io Client: respond to all events with one handler?).
Then on any event you will just disconnect client on server side.
var socket = io.connect();
var globalEvent = "*";
socket.$emit = function (name) {
if(!this.$events) return false;
for(var i=0;i<2;++i){
if(i==0 && name==globalEvent) continue;
var args = Array.prototype.slice.call(arguments, 1-i);
var handler = this.$events[i==0?name:globalEvent];
if(!handler) handler = [];
if ('function' == typeof handler) handler.apply(this, args);
else if (io.util.isArray(handler)) {
var listeners = handler.slice();
for (var i=0, l=listeners.length; i<l; i++)
listeners[i].apply(this, args);
} else return false;
}
return true;
};
socket.on(globalEvent,function(event){
//Force disconnect
socket.disconnect();
});
This probably isn't very helpful, but the best I've heard of doing is Comet streams. It's an older method, and a lot of people don't like it (myself included), but it's an option for one-way server to client updates.
Essentially, on the client side you have an iframe that connects to the server, and the server sends back a response in the form of a multipart response, occasionally sending back script tags with bits of stuff to execute. So, a trivial (and probably broken) example would be this:
<!--index.html-->
<html>
<body>
<iframe src="/comet/status"></iframe>
</body>
</html>
And then the server code...
// server code (I like Node.JS)
app.get('/comet/status', function (req, res) {
// A function that does a lot of work,
// and occasionally calls a callback with progress
doWork(function (progress) {
res.write('<script>console.log("Progress: " + progress);</script>');
});
res.end();
});
Like I said, this is a pretty incomplete example, but it's a way to accomplish what you're looking for, even if in an older way. Instead of console logging, you'd probably update an element that displays progress.

Using Node.js as a simple web server

I want to run a very simple HTTP server. Every GET request to example.com should get index.html served to it but as a regular HTML page (i.e., same experience as when you read normal web pages).
Using the code below, I can read the content of index.html. How do I serve index.html as a regular web page?
var http = require('http');
var fs = require('fs');
var index = fs.readFileSync('index.html');
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end(index);
}).listen(9615);
One suggestion below is complicated and requires me to write a get line for each resource (CSS, JavaScript, images) file I want to use.
How can I serve a single HTML page with some images, CSS and JavaScript?
Simplest Node.js server is just:
$ npm install http-server -g
Now you can run a server via the following commands:
$ cd MyApp
$ http-server
If you're using NPM 5.2.0 or newer, you can use http-server without installing it with npx. This isn't recommended for use in production but is a great way to quickly get a server running on localhost.
$ npx http-server
Or, you can try this, which opens your web browser and enables CORS requests:
$ http-server -o --cors
For more options, check out the documentation for http-server on GitHub, or run:
$ http-server --help
Lots of other nice features and brain-dead-simple deployment to NodeJitsu.
Feature Forks
Of course, you can easily top up the features with your own fork. You might find it's already been done in one of the existing 800+ forks of this project:
https://github.com/nodeapps/http-server/network
Light Server: An Auto Refreshing Alternative
A nice alternative to http-server is light-server. It supports file watching and auto-refreshing and many other features.
$ npm install -g light-server
$ light-server
Add to your directory context menu in Windows Explorer
reg.exe add HKCR\Directory\shell\LightServer\command /ve /t REG_EXPAND_SZ /f /d "\"C:\nodejs\light-server.cmd\" \"-o\" \"-s\" \"%V\""
Simple JSON REST server
If you need to create a simple REST server for a prototype project then json-server might be what you're looking for.
Auto Refreshing Editors
Most web page editors and IDE tools now include a web server that will watch your source files and auto refresh your web page when they change.
I use Live Server with Visual Studio Code.
The open source text editor Brackets also includes a NodeJS static web server. Just open any HTML file in Brackets, press "Live Preview" and it starts a static server and opens your browser at the page. The browser will auto refresh whenever you edit and save the HTML file. This especially useful when testing adaptive web sites. Open your HTML page on multiple browsers/window sizes/devices. Save your HTML page and instantly see if your adaptive stuff is working as they all auto refresh.
Web / SPA / PWA / Mobile / Desktop / Browser Ext Web Developers
Some SPA frameworks include a built in version of the Webpack DevServer that can detect source file changes and trigger an incremental rebuild and patch (called hot reloading) of your SPA or PWA web app. Here's a few popular SPA frameworks that can do this.
VueJS Developers
For VueJS developers, a favorite is Quasar Framework that includes the Webpack DevServer out of the box with switches to support server-side rendering (SSR) and proxy rules to cure your CORS issues. It includes a large number of optimized components designed to adapt for both Mobile and Desktop. These allows you to build one app for ALL platforms (SPA, SPA+SSR, PWA, PWA+SSR, Cordova and Capacitor Mobile AppStore apps, Electron Desktop Node+VueJS apps and even Browser extensions).
Another popular one is NuxtJS that also supports static HTML/CSS code generation as well as SSR or no-SSR build modes with plugins for other UI component suites.
React Framework Developers
ReactJS developers can also setup hot reloading.
Cordova/Capacitor + Ionic Framework Developers
Iconic is a mobile only hybrid component framework that now supports VueJS, React and Angular development. A local server with auto refresh features is baked into the ionic tool. Just run ionic serve from your app folder. Even better ... ionic serve --lab to view auto-refreshing side by side views of both iOS and Android.
Note: This answer is from 2011. However, it is still valid.
You can use Connect and ServeStatic with Node.js for this:
Install connect and serve-static with NPM
$ npm install connect serve-static
Create server.js file with this content:
var connect = require('connect');
var serveStatic = require('serve-static');
connect()
.use(serveStatic(__dirname))
.listen(8080, () => console.log('Server running on 8080...'));
Run with Node.js
$ node server.js
You can now go to http://localhost:8080/yourfile.html
Check out this gist. I'm reproducing it here for reference, but the gist has been regularly updated.
Node.JS static file web server. Put it in your path to fire up servers in any directory, takes an optional port argument.
var http = require("http"),
url = require("url"),
path = require("path"),
fs = require("fs"),
port = process.argv[2] || 8888;
http.createServer(function(request, response) {
var uri = url.parse(request.url).pathname
, filename = path.join(process.cwd(), uri);
fs.exists(filename, function(exists) {
if(!exists) {
response.writeHead(404, {"Content-Type": "text/plain"});
response.write("404 Not Found\n");
response.end();
return;
}
if (fs.statSync(filename).isDirectory()) filename += '/index.html';
fs.readFile(filename, "binary", function(err, file) {
if(err) {
response.writeHead(500, {"Content-Type": "text/plain"});
response.write(err + "\n");
response.end();
return;
}
response.writeHead(200);
response.write(file, "binary");
response.end();
});
});
}).listen(parseInt(port, 10));
console.log("Static file server running at\n => http://localhost:" + port + "/\nCTRL + C to shutdown");
Update
The gist does handle css and js files. I've used it myself. Using read/write in "binary" mode isn't a problem. That just means that the file isn't interpreted as text by the file library and is unrelated to content-type returned in the response.
The problem with your code is you're always returning a content-type of "text/plain". The above code does not return any content-type, but if you're just using it for HTML, CSS, and JS, a browser can infer those just fine. No content-type is better than a wrong one.
Normally the content-type is a configuration of your web server. So I'm sorry if this doesn't solve your problem, but it worked for me as a simple development server and thought it might help some other people. If you do need correct content-types in the response, you either need to explicitly define them as joeytwiddle has or use a library like Connect that has sensible defaults. The nice thing about this is that it's simple and self-contained (no dependencies).
But I do feel your issue. So here is the combined solution.
var http = require("http"),
url = require("url"),
path = require("path"),
fs = require("fs")
port = process.argv[2] || 8888;
http.createServer(function(request, response) {
var uri = url.parse(request.url).pathname
, filename = path.join(process.cwd(), uri);
var contentTypesByExtension = {
'.html': "text/html",
'.css': "text/css",
'.js': "text/javascript"
};
fs.exists(filename, function(exists) {
if(!exists) {
response.writeHead(404, {"Content-Type": "text/plain"});
response.write("404 Not Found\n");
response.end();
return;
}
if (fs.statSync(filename).isDirectory()) filename += '/index.html';
fs.readFile(filename, "binary", function(err, file) {
if(err) {
response.writeHead(500, {"Content-Type": "text/plain"});
response.write(err + "\n");
response.end();
return;
}
var headers = {};
var contentType = contentTypesByExtension[path.extname(filename)];
if (contentType) headers["Content-Type"] = contentType;
response.writeHead(200, headers);
response.write(file, "binary");
response.end();
});
});
}).listen(parseInt(port, 10));
console.log("Static file server running at\n => http://localhost:" + port + "/\nCTRL + C to shutdown");
You don't need express. You don't need connect. Node.js does http NATIVELY. All you need to do is return a file dependent on the request:
var http = require('http')
var url = require('url')
var fs = require('fs')
http.createServer(function (request, response) {
var requestUrl = url.parse(request.url)
response.writeHead(200)
fs.createReadStream(requestUrl.pathname).pipe(response) // do NOT use fs's sync methods ANYWHERE on production (e.g readFileSync)
}).listen(9615)
A more full example that ensures requests can't access files underneath a base-directory, and does proper error handling:
var http = require('http')
var url = require('url')
var fs = require('fs')
var path = require('path')
var baseDirectory = __dirname // or whatever base directory you want
var port = 9615
http.createServer(function (request, response) {
try {
var requestUrl = url.parse(request.url)
// need to use path.normalize so people can't access directories underneath baseDirectory
var fsPath = baseDirectory+path.normalize(requestUrl.pathname)
var fileStream = fs.createReadStream(fsPath)
fileStream.pipe(response)
fileStream.on('open', function() {
response.writeHead(200)
})
fileStream.on('error',function(e) {
response.writeHead(404) // assume the file doesn't exist
response.end()
})
} catch(e) {
response.writeHead(500)
response.end() // end the response so browsers don't hang
console.log(e.stack)
}
}).listen(port)
console.log("listening on port "+port)
I think the part you're missing right now is that you're sending:
Content-Type: text/plain
If you want a web browser to render the HTML, you should change this to:
Content-Type: text/html
Step1 (inside command prompt [I hope you cd TO YOUR FOLDER]) : npm install express
Step 2: Create a file server.js
var fs = require("fs");
var host = "127.0.0.1";
var port = 1337;
var express = require("express");
var app = express();
app.use(express.static(__dirname + "/public")); //use static files in ROOT/public folder
app.get("/", function(request, response){ //root dir
response.send("Hello!!");
});
app.listen(port, host);
Please note, you should add WATCHFILE (or use nodemon) too. Above code is only for a simple connection server.
STEP 3: node server.js or nodemon server.js
There is now more easy method if you just want host simple HTTP server.
npm install -g http-server
and open our directory and type http-server
https://www.npmjs.org/package/http-server
The fast way:
var express = require('express');
var app = express();
app.use('/', express.static(__dirname + '/../public')); // ← adjust
app.listen(3000, function() { console.log('listening'); });
Your way:
var http = require('http');
var fs = require('fs');
http.createServer(function (req, res) {
console.dir(req.url);
// will get you '/' or 'index.html' or 'css/styles.css' ...
// • you need to isolate extension
// • have a small mimetype lookup array/object
// • only there and then reading the file
// • delivering it after setting the right content type
res.writeHead(200, {'Content-Type': 'text/html'});
res.end('ok');
}).listen(3001);
Rather than dealing with a switch statement, I think it's neater to lookup the content type from a dictionary:
var contentTypesByExtension = {
'html': "text/html",
'js': "text/javascript"
};
...
var contentType = contentTypesByExtension[fileExtension] || 'text/plain';
You can just type those in your shell
npx serve
Repo: https://github.com/zeit/serve.
You don't need to use any npm modules to run a simple server, there's a very tiny library called "npm Free Server" for Node:
50 lines of code
Outputs if you are requesting a file or a folder
Gives it a red or green color if it failed or worked
Less than 1KB in size (minified)
Fully commented so you can tweak it as needed
npm-free-server (on GitHub)
This is basically an updated version of the accepted answer for connect version 3:
var connect = require('connect');
var serveStatic = require('serve-static');
var app = connect();
app.use(serveStatic(__dirname, {'index': ['index.html']}));
app.listen(3000);
I also added a default option so that index.html is served as a default.
if you have node installed on you PC probably you have the NPM, if you don't need NodeJS stuff, you can use the serve package for this:
1 - Install the package on your PC:
npm install -g serve
2 - Serve your static folder:
serve <path>
d:> serve d:\StaticSite
It will show you which port your static folder is being served, just navigate to the host like:
http://localhost:3000
I found a interesting library on npm that might be of some use to you. It's called mime(npm install mime or https://github.com/broofa/node-mime) and it can determine the mime type of a file. Here's an example of a webserver I wrote using it:
var mime = require("mime"),http = require("http"),fs = require("fs");
http.createServer(function (req, resp) {
path = unescape(__dirname + req.url)
var code = 200
if(fs.existsSync(path)) {
if(fs.lstatSync(path).isDirectory()) {
if(fs.existsSync(path+"index.html")) {
path += "index.html"
} else {
code = 403
resp.writeHead(code, {"Content-Type": "text/plain"});
resp.end(code+" "+http.STATUS_CODES[code]+" "+req.url);
}
}
resp.writeHead(code, {"Content-Type": mime.lookup(path)})
fs.readFile(path, function (e, r) {
resp.end(r);
})
} else {
code = 404
resp.writeHead(code, {"Content-Type":"text/plain"});
resp.end(code+" "+http.STATUS_CODES[code]+" "+req.url);
}
console.log("GET "+code+" "+http.STATUS_CODES[code]+" "+req.url)
}).listen(9000,"localhost");
console.log("Listening at http://localhost:9000")
This will serve any regular text or image file (.html, .css, .js, .pdf, .jpg, .png, .m4a and .mp3 are the extensions I've tested, but it theory it should work for everything)
Developer Notes
Here is an example of output that I got with it:
Listening at http://localhost:9000
GET 200 OK /cloud
GET 404 Not Found /cloud/favicon.ico
GET 200 OK /cloud/icon.png
GET 200 OK /
GET 200 OK /501.png
GET 200 OK /cloud/manifest.json
GET 200 OK /config.log
GET 200 OK /export1.png
GET 200 OK /Chrome3DGlasses.pdf
GET 200 OK /cloud
GET 200 OK /-1
GET 200 OK /Delta-Vs_for_inner_Solar_System.svg
Notice the unescape function in the path construction. This is to allow for filenames with spaces and encoded characters.
Edit:
Node.js sample app Node Chat has the functionality you want.
In it's README.textfile
3. Step is what you are looking for.
step1
create a server that responds with hello world on port 8002
step2
create an index.html and serve it
step3
introduce util.js
change the logic so that any static file is served
show 404 in case no file is found
step4
add jquery-1.4.2.js
add client.js
change index.html to prompt user for nickname
Here is the server.js
Here is the util.js
var http = require('http');
var fs = require('fs');
var index = fs.readFileSync('index.html');
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/html'});
// change the to 'text/plain' to 'text/html' it will work as your index page
res.end(index);
}).listen(9615);
I think you where searching for this. In your index.html, simply fill it with normal html code - whatever you want to render on it, like:
<html>
<h1>Hello world</h1>
</html>
The way I do it is to first of all install node static server globally via
npm install node-static -g
then navigate to the directory that contains your html files and start the static server with static.
Go to the browser and type localhost:8080/"yourHtmlFile".
Basically copying the accepted answer, but avoiding creating a js file.
$ node
> var connect = require('connect'); connect().use(static('.')).listen(8000);
Found it very convinient.
Update
As of latest version of Express, serve-static has become a separate middleware. Use this to serve:
require('http').createServer(require('serve-static')('.')).listen(3000)
Install serve-static first.
I use below code to start a simple web server which render default html file if no file mentioned in Url.
var http = require('http'),
fs = require('fs'),
url = require('url'),
rootFolder = '/views/',
defaultFileName = '/views/5 Tips on improving Programming Logic Geek Files.htm';
http.createServer(function(req, res){
var fileName = url.parse(req.url).pathname;
// If no file name in Url, use default file name
fileName = (fileName == "/") ? defaultFileName : rootFolder + fileName;
fs.readFile(__dirname + decodeURIComponent(fileName), 'binary',function(err, content){
if (content != null && content != '' ){
res.writeHead(200,{'Content-Length':content.length});
res.write(content);
}
res.end();
});
}).listen(8800);
It will render all js, css and image file, along with all html content.
Agree on statement "No content-type is better than a wrong one"
from w3schools
it is pretty easy to create a node server to serve any file that is requested, and you dont need to install any packages for it
var http = require('http');
var url = require('url');
var fs = require('fs');
http.createServer(function (req, res) {
var q = url.parse(req.url, true);
var filename = "." + q.pathname;
fs.readFile(filename, function(err, data) {
if (err) {
res.writeHead(404, {'Content-Type': 'text/html'});
return res.end("404 Not Found");
}
res.writeHead(200, {'Content-Type': 'text/html'});
res.write(data);
return res.end();
});
}).listen(8080);
http://localhost:8080/file.html
will serve file.html from disk
var http = require('http');
var fs = require('fs');
var index = fs.readFileSync('index.html');
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'html'});
res.end(index);
}).listen(9615);
//Just Change The CONTENT TYPE to 'html'
I'm not sure if this is exactly what you wanted, however, you can try changing:
{'Content-Type': 'text/plain'}
to this:
{'Content-Type': 'text/html'}
This will have the browser client display the file as html instead of plain text.
Express function sendFile does exactly what you need, and since you want web server functionality from node, express comes as natural choice and then serving static files becomes as easy as :
res.sendFile('/path_to_your/index.html')
read more here : https://expressjs.com/en/api.html#res.sendFile
A small example with express web server for node:
var express = require('express');
var app = express();
var path = require('path');
app.get('/', function(req, res) {
res.sendFile(path.join(__dirname + '/index.html'));
});
app.listen(8080);
run this, and navigate to http://localhost:8080
To expand on this to allow you to serve static files like css and images, here's another example :
var express = require('express');
var app = express();
var path = require('path');
app.use(express.static(__dirname + '/css'));
app.get('/', function(req, res) {
res.sendFile(path.join(__dirname + '/index.html'));
});
app.listen(8080);
so create a subfolder called css, put your static content in it, and it will be available to your index.html for easy reference like :
<link type="text/css" rel="stylesheet" href="/css/style.css" />
Notice relative path in href!
voila!
A slightly more verbose express 4.x version but that provides directory listing, compression, caching and requests logging in a minimal number of lines
var express = require('express');
var compress = require('compression');
var directory = require('serve-index');
var morgan = require('morgan'); //logging for express
var app = express();
var oneDay = 86400000;
app.use(compress());
app.use(morgan());
app.use(express.static('filesdir', { maxAge: oneDay }));
app.use(directory('filesdir', {'icons': true}))
app.listen(process.env.PORT || 8000);
console.log("Ready To serve files !")
Crazy amount of complicated answers here. If you don't intend to process nodeJS files/database but just want to serve static html/css/js/images as your question suggest then simply install the pushstate-server module or similar;
Here's a "one liner" that will create and launch a mini site. Simply paste that entire block in your terminal in the appropriate directory.
mkdir mysite; \
cd mysite; \
npm install pushstate-server --save; \
mkdir app; \
touch app/index.html; \
echo '<h1>Hello World</h1>' > app/index.html; \
touch server.js; \
echo "var server = require('pushstate-server');server.start({ port: 3000, directory: './app' });" > server.js; \
node server.js
Open browser and go to http://localhost:3000. Done.
The server will use the app dir as the root to serve files from. To add additional assets just place them inside that directory.
There are already some great solutions for a simple nodejs server.
There is a one more solution if you need live-reloading as you made changes to your files.
npm install lite-server -g
navigate your directory and do
lite-server
it will open browser for you with live-reloading.
The simpler version which I've came across is as following. For education purposes, it is best, because it does not use any abstract libraries.
var http = require('http'),
url = require('url'),
path = require('path'),
fs = require('fs');
var mimeTypes = {
"html": "text/html",
"mp3":"audio/mpeg",
"mp4":"video/mp4",
"jpeg": "image/jpeg",
"jpg": "image/jpeg",
"png": "image/png",
"js": "text/javascript",
"css": "text/css"};
http.createServer(function(req, res) {
var uri = url.parse(req.url).pathname;
var filename = path.join(process.cwd(), uri);
fs.exists(filename, function(exists) {
if(!exists) {
console.log("not exists: " + filename);
res.writeHead(200, {'Content-Type': 'text/plain'});
res.write('404 Not Found\n');
res.end();
return;
}
var mimeType = mimeTypes[path.extname(filename).split(".")[1]];
res.writeHead(200, {'Content-Type':mimeType});
var fileStream = fs.createReadStream(filename);
fileStream.pipe(res);
}); //end path.exists
}).listen(1337);
Now go to browser and open following:
http://127.0.0.1/image.jpg
Here image.jpg should be in same directory as this file.
Hope this helps someone :)
local-web-server is definitely worth a look! Here's an excerpt from the readme:
local-web-server
A lean, modular web server for rapid full-stack development.
Supports HTTP, HTTPS and HTTP2.
Small and 100% personalisable. Load and use only the behaviour required by your project.
Attach a custom view to personalise how activity is visualised.
Programmatic and command-line interfaces.
Use this tool to:
Build any type of front-end web application (static, dynamic, Single Page App, Progessive Web App, React etc).
Prototype a back-end service (REST API, microservice, websocket, Server Sent Events service etc).
Monitor activity, analyse performance, experiment with caching strategy etc.
Local-web-server is a distribution of lws bundled with a "starter pack" of useful middleware.
Synopsis
This package installs the ws command-line tool (take a look at the usage guide).
Static web site
Running ws without any arguments will host the current directory as a static web site. Navigating to the server will render a directory listing or your index.html, if that file exists.
$ ws
Listening on http://mbp.local:8000, http://127.0.0.1:8000, http://192.168.0.100:8000
Static files tutorial.
This clip demonstrates static hosting plus a couple of log output formats - dev and stats.
Single Page Application
Serving a Single Page Application (an app with client-side routing, e.g. a React or Angular app) is as trivial as specifying the name of your single page:
$ ws --spa index.html
With a static site, requests for typical SPA paths (e.g. /user/1, /login) would return 404 Not Found as a file at that location does not exist. However, by marking index.html as the SPA you create this rule:
If a static file is requested (e.g. /css/style.css) then serve it, if not (e.g. /login) then serve the specified SPA and handle the route client-side.
SPA tutorial.
URL rewriting and proxied requests
Another common use case is to forward certain requests to a remote server.
The following command proxies blog post requests from any path beginning with /posts/ to https://jsonplaceholder.typicode.com/posts/. For example, a request for /posts/1 would be proxied to https://jsonplaceholder.typicode.com/posts/1.
$ ws --rewrite '/posts/(.*) -> https://jsonplaceholder.typicode.com/posts/$1'
Rewrite tutorial.
This clip demonstrates the above plus use of --static.extensions to specify a default file extension and --verbose to monitor activity.
HTTPS and HTTP2
For HTTPS or HTTP2, pass the --https or --http2 flags respectively. See the wiki for further configuration options and a guide on how to get the "green padlock" in your browser.
$ lws --http2
Listening at https://mba4.local:8000, https://127.0.0.1:8000, https://192.168.0.200:8000
Most of the answers above describe very nicely how contents are being served. What I was looking as additional was listing of the directory so that other contents of the directory can be browsed. Here is my solution for further readers:
'use strict';
var finalhandler = require('finalhandler');
var http = require('http');
var serveIndex = require('serve-index');
var serveStatic = require('serve-static');
var appRootDir = require('app-root-dir').get();
var log = require(appRootDir + '/log/bunyan.js');
var PORT = process.env.port || 8097;
// Serve directory indexes for reports folder (with icons)
var index = serveIndex('reports/', {'icons': true});
// Serve up files under the folder
var serve = serveStatic('reports/');
// Create server
var server = http.createServer(function onRequest(req, res){
var done = finalhandler(req, res);
serve(req, res, function onNext(err) {
if (err)
return done(err);
index(req, res, done);
})
});
server.listen(PORT, log.info('Server listening on: ', PORT));
This is one of the fastest solutions i use to quickly see web pages
sudo npm install ripple-emulator -g
From then on just enter the directory of your html files and run
ripple emulate
then change the device to Nexus 7 landscape.
Node.js webserver from scratch
No 3rd-party frameworks; Allows query string; Adds trailing slash; Handles 404
Create a public_html subfolder and place all of your content in it.
Gist: https://gist.github.com/veganaize/fc3b9aa393ca688a284c54caf43a3fc3
var fs = require('fs');
require('http').createServer(function(request, response) {
var path = 'public_html'+ request.url.slice(0,
(request.url.indexOf('?')+1 || request.url.length+1) - 1);
fs.stat(path, function(bad_path, path_stat) {
if (bad_path) respond(404);
else if (path_stat.isDirectory() && path.slice(-1) !== '/') {
response.setHeader('Location', path.slice(11)+'/');
respond(301);
} else fs.readFile(path.slice(-1)==='/' ? path+'index.html' : path,
function(bad_file, file_content) {
if (bad_file) respond(404);
else respond(200, file_content);
});
});
function respond(status, content) {
response.statusCode = status;
response.end(content);
}
}).listen(80, function(){console.log('Server running on port 80...')});

Resources