I want to make a script that search for a variable in a external page.
For example, i want the script to visit this page: Here,
Check whether a server is available and notify me somehow.
Can someone help me with this?
My solution is to recuperate the text of the source of a page. Then search for any strings that we want in that source. Just add another search call to extend the search. Works in Firefox.
<script>
function print(text) {
alert(text);
}
function search(where, what) {
var position = where.indexOf(what)
if ( position !== -1) {
print("Found \"" + what + "\" at " + position);
return;
} //else
print("\"" + what + "\" not found");
}
var xhr = new XMLHttpRequest();
xhr.open("GET", "https://billing.dacentec.com/hostbill/index.php?/cart/dedicated-servers/", true);
xhr.onload = function (e) {
if (xhr.readyState === 4) {
if (xhr.status === 200) {
var externalText = xhr.responseText;
search(externalText, "technology");
search(externalText, "secure");
search(externalText, "we");
} else {
print(xhr.statusText);
}
}
};
xhr.onerror = function (e) {
print(xhr.statusText);
};
xhr.send(null);
</script>
Related
I need to crawl all the pages on a site (the crwling part works fine.) and so i need to run THIS script on my server using node.js. I tried implementing the following logic:
var request = require('request');
var cheerio = require('cheerio');
var URL = require('url-parse');
var jsdom = require("jsdom");
var { JSDOM } = jsdom;
var START_URL = "http://balneol.com/";
var SEARCH_FONT = "helvetica";
var MAX_PAGES_TO_VISIT = 100000;
var pagesVisited = {};
var numPagesVisited = 0;
var pagesToVisit = [];
var url = new URL(START_URL);
var baseUrl = url.protocol + "//" + url.hostname;
pagesToVisit.push(START_URL);
crawl();
function crawl() {
if(numPagesVisited >= MAX_PAGES_TO_VISIT) {
console.log("Reached max limit of number of pages to visit.");
return;
}
var nextPage = pagesToVisit.pop();
if (nextPage in pagesVisited) {
// We've already visited this page, so repeat the crawl
crawl();
} else {
// New page we haven't visited
visitPage(nextPage, crawl);
}
}
function visitPage(url, callback) {
// Add page to our set
pagesVisited[url] = true;
numPagesVisited++;
// Make the request
console.log("Visiting page " + url);
request(url, function(error, response, body) {
// Check status code (200 is HTTP OK)
console.log("Status code: " + response.statusCode);
if(response.statusCode !== 200) {
callback();
return;
}
// Parse the window.document body
// var window = jsdom.jsdom(body).defaultView();
var { window } = new JSDOM(body);
//var $ = cheerio.load(body);
var helveticaFound = searchForHelvetica(window, 'font-family');
if(helveticaFound) {
console.log('Word ' + SEARCH_FONT + ' found at page ' + url);
} else {
collectInternalLinks($);
// In this short program, our callback is just calling crawl()
// callback();
}
});
}
function searchForHelvetica( window , css) {
if(typeof getComputedStyle == "undefined")
getComputedStyle= function(elem){
return elem.currentStyle;
}
var who, hoo, values= [], val,
nodes= window.document.body.getElementsByTagName('*'),
L= nodes.length;
for(var i= 0; i<L; i++){
who= nodes[i];
console.log(nodes[i]);
if(who.style){
hoo= '#'+(who.id || who.nodeName+'('+i+')');
console.log(who.style._values);
// return false;
val= who.style.fontFamily || getComputedStyle(who, '')[css];
if(val){
if(verbose) values.push([hoo, val]);
else if(values.indexOf(val)== -1) values.push(val);
// before IE9 you need to shim Array.indexOf (shown below)
}
}
}
// console.log(values);
// return values;
}
function collectInternalLinks($) {
var relativeLinks = $("a[href^='/']");
console.log("Found " + relativeLinks.length + " relative links on page");
relativeLinks.each(function() {
pagesToVisit.push(baseUrl + $(this).attr('href'));
});
}
If you see my visit page function you will see the below two lines of code:
var { window } = new JSDOM(body);
var helveticaFound = searchForHelvetica(window, 'font-family');
as you can see on the 2nd line i am passing the window object to the searchForHelvetic function.
In my searchForHelvetic function , if i console.log(nodes[i]); , i don't get the html element and hence the rest of the script does't quite run as expected. does the jsdom window differ from the window object in the browser ? how do i get the script working ? I.E. basically use the window object to run through all the pages on the page and spit out all the fonts used on the page ?
EDIT::-
To break the problem down to a micro level, if i console.log(who); inside searchForHelvetica function , i get the following result:
HTMLElement {}
HTMLDivElement {}
HTMLDivElement {}
HTMLDivElement {}
HTMLAnchorElement {}
HTMLImageElement {}
HTMLDivElement {}
HTMLFormElement {}
HTMLDivElement {}
HTMLLabelElement {}
HTMLInputElement {}
HTMLButtonElement {}
HTMLButtonElement {}
HTMLSpanElement {}
etc..
But if i were to do the same in a web browser the result world be different Eg.
nodes = window.document.body.getElementsByTagName('*');
console.log(node[1]) // <div id="mobile-menu-box" class="hide">...</div>
How do i get a similar result in node.js ?
I am desperately trying to process something like 200 screenshots in a single shot,
my first attempt was to follow the guidelines with a simple script invoked 200 times,
phantom.create()
.then(function(instance) {
console.log("1 - instance")
phInstance = instance;
return instance.createPage();
})
.then(function(page) {
console.log("2 - page")
sitepage = page;
return page.open(url);
})
.then(function(status) {
console.log("3 - render")
sitepage.property('clipRect', {top: 0, left: 0, width:3000,height:890}).then(function() {
sitepage.render(fname).then(function(finished) {
console.log("\t\t\t---> finished");
sitepage.close();
phInstance.exit();
callback({msg: 'ok'})
phantom.exit();
return;
});
});
})
this approach kinda works, but it's really overwhelming for the cpu,
the problem is related to the fact that this way of doings things leads to 200 phantom processes that quickly eats up all the memory.
A more profitable way of doing so, would be to create a single phantom instance and then drive it to open
one page at the time and render it, something that could be done with a phantom script, like so:
var content, counter, f, fs, grab_screen, img, lines, next_screen, page, system, url;
page = require('webpage').create();
system = require('system');
fs = require('fs');
content = '';
lines = [];
url = '';
img = '';
counter = 0;
page.viewportSize = {
width: 1200,
height: 800
};
page.settings.userAgent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36';
f = fs.open("sites.txt", "r");
content = f.read();
lines = content.split("\n");
grab_screen = function() {
var site;
site = lines[counter];
url = 'http://' + site + '/';
img = 'screens/' + site + '.png';
console.log("Grabbing screen for: " + url);
return page.open(url, function(status) {
return window.setTimeout(function() {
page.render(img);
counter++;
return next_screen();
}, 200);
});
};
next_screen = function() {
console.log("On to " + counter + " ...");
if (counter < lines.length) {
return grab_screen();
} else {
return phantom.exit();
}
};
next_screen();
so I was wondering how to achieve that with phantomjs-node.
I finally solved my problem with two things:
realizing that node.js is NOT multithreading.
Using a single instance of phantom, to render multiple urls.
here's how it came out:
var webshot = function(id) {
console.log('makeshot ', shots[id].url);
requestSync("POST", "http://localhost:4041/options/set", { json:{ opts:JSON.stringify(shots[id].options) } });
phInstance.createPage().then(function(_page) {
console.log("2 - page")
sitepage = _page;
return _page.open(shots[id].url);
})
.then(function(status) {
console.log("3 - render %s / %s", id, shots.length);
sitepage.property('clipRect', {top: 0, left: 0, width:1500,height:220}).then(function() {
sitepage.render(shots[id].fname).then(function(finished) {
console.log("\t\t\t---> finished");
sitepage.close();
fnames[Math.ceil(parseInt(shots[id].options.pack_id)/mt_per_snap)-1] = "localhost_" + shots[id].options.pack_id + ".png";
if(id<shots.length-1) {
id += 1;
webshot(id);
} else {
console.log("all done: %s files has been written", shots.length);
// invoke pdf generation for the pdf page
cb("files_written", { });
generatePDF();
}
return;
});
});
})
}
so, long story short: I have put the page I wanted to render in a separate script, which I feed with variables before making the shot, and this solves the "multithreading problem", afterwards I have a single variable named phInstance, that is declared as follows:
var initPhantom = function() {
phantom.create()
.then(function(instance) {
console.log("1 - instance")
phInstance = instance;
})
}
remember to kill the phantom instance once you're done, otherwise it
will stay there and suck your resources for good.
You could try something like webshot. I'm using it with async.js,
however I sometimes get Error: PhantomJS exited with return value 1.
Have not yet found out why.
async.map(
links,
function(link, cb) {
var config = {...}; // your webshot options
var folder = link; // make unique folder name from link?
var file = path.join('./', 'screenshots', folder, 'screenshot.png');
webshot(link, file, config, function(err) {
cb(err, link);
});
},
function(e, links) {
// done
}
);
Resources:
https://www.npmjs.com/package/webshot
https://www.npmjs.com/package/asyncjs
like i menthioned in the topic i'm looking for a plugin that save my marked text and could restore it after loose of focus. Like a JS Library called Rangy i used in the past. Is there such a plugin or does anyone has an idea how i could deal with this kind of problem?
Regardings Adrian
function gEBI(id) {
return document.getElementById(id);
}
var savedSel;
var savedSelActiveElement;
function saveSelection() {
if (savedSel) {
// rangy.removeMarkers(savedSel);
}
savedSel = rangy.saveSelection();
savedSelActiveElement = document.activeElement;
}
function restoreSelection() {
if (savedSel) {
rangy.restoreSelection(savedSel, true);
window.setTimeout(function() {
if (savedSelActiveElement && typeof savedSelActiveElement.focus != "undefined") {
savedSelActiveElement.focus();
}
}, 1);
}
}
$(document).ready(function()
{
try {
document.execCommand("MultipleSelection", null, true);
} catch(ex) {}
rangy.init();
// Enable buttons
var saveRestoreModule = rangy.modules.SaveRestore;
if (rangy.supported && saveRestoreModule && saveRestoreModule.supported) {
var saveButton = gEBI("saveButton");
//saveButton.disabled = false;
saveButton.ontouchstart = saveButton.onmousedown = function() {
saveSelection();
return false;
};
$('.EditorTab').mousedown(function(){
saveSelection();
return false;
});
});
The following solution would allow you to store and restore any number of ranges, but it assumes that you have not destroyed the nodes that the ranges are attached to when it comes time to restore those ranges.
var SelectionStore = (function() {
var savedRanges = {};
return {
store: function(saveIdentifier) {
var ranges = rangy.getSelection().getAllRanges();
savedRanges[saveIdentifier] = ranges;
return ranges;
},
restore: function(saveIdentifier) {
var i, selection;
selection = rangy.getSelection();
if (!savedRanges[saveIdentifier]) throw new Error('Invalid saved selection identifier used. Selection not found for ID: ' + saveIdentifier);
selection.removeAllRanges();
for (i in savedRanges[saveIdentifier]) selection.addRange(savedRanges[saveIdentifier][i]);
}
}
})();
Usage examples:
//store the current selection
SelectionStore.store('Tab1');
//restore a selection
SelectionStore.restore('Tab1');
jsfiddle link: http://jsfiddle.net/T8ee7/
When I call Knockout's subscribe method is there a way I can get both the previous and new value? Right now, I can only call get these values separately.
I want to trigger some code if the old and new value are different.
I suppose I could do the following, but it can get messy...
(http://jsfiddle.net/MV3fN/)
var sv = sv || {};
sv.PagedRequest = function (pageNumber, pageSize) {
this.pageNumber = ko.observable(pageNumber || 1);
this.numberOfPages = ko.observable(1);
this.pageSize = ko.observable(pageSize || sv.DefaultPageSize);
};
var _pagedRequest = new sv.PagedRequest();
var oldValue;
_pagedRequest.pageNumber.subscribe(function (previousValue) {
console.log("old: " + previousValue);
oldValue = previousValue;
}, _pagedRequest, "beforeChange");
_pagedRequest.pageNumber.subscribe(function (newValue) {
console.log("new: " + newValue);
if (oldValue != newValue) {
console.log("value changed!");
}
});
_pagedRequest.pageNumber(10);
_pagedRequest.pageNumber(20);
I prefer using an observable extender.
http://jsfiddle.net/neonms92/xybGG/
Extender:
ko.extenders.withPrevious = function (target) {
// Define new properties for previous value and whether it's changed
target.previous = ko.observable();
target.changed = ko.computed(function () { return target() !== target.previous(); });
// Subscribe to observable to update previous, before change.
target.subscribe(function (v) {
target.previous(v);
}, null, 'beforeChange');
// Return modified observable
return target;
}
Example Usage:
// Define observable using 'withPrevious' extension
self.hours = ko.observable().extend({ withPrevious: 1 });
// Subscribe to observable like normal
self.hours.subscribe(function () {
if (!self.hours.changed()) return; // Cancel if value hasn't changed
print('Hours changed from ' + self.hours.previous() + ' to ' + self.hours());
});
This seems to work for me
ko.observable.fn.beforeAndAfterSubscribe = function (callback, target) {
var _oldValue;
this.subscribe(function (oldValue) {
_oldValue = oldValue;
}, null, 'beforeChange');
this.subscribe(function (newValue) {
callback.call(target, _oldValue, newValue);
});
};
See more at: http://ideone.com/NPpNcB#sthash.wJn57567.dpuf
http://jsfiddle.net/MV3fN/3/
var sv = sv || {};
sv.PagedRequest = function (pageNumber, pageSize) {
var self = this;
self.pageNumber = ko.observable(pageNumber || 1);
self.numberOfPages = ko.observable(1);
self.pageSize = ko.observable(pageSize || sv.DefaultPageSize);
self.pageNumber.subscribe(function (previousValue) {
console.log(previousValue);
console.log(self.pageNumber.arguments[0]);
if (previousValue != _pagedRequest.pageNumber.arguments[0]) {
console.log('value changed');
}
else {
//This won't get executed because KO doesn't
//call the function if the value doesn't change
console.log('not changed');
}
}, _pagedRequest, "beforeChange");
};
var _pagedRequest = new sv.PagedRequest();
_pagedRequest.pageNumber(10);
_pagedRequest.pageNumber(20);
_pagedRequest.pageNumber(20);
_pagedRequest.pageNumber(5);
I don't know if you're really supposed to use arguments[0], but it seems to work.
You could also set up your own method to accomplish this in a much cleaner way:
http://jsfiddle.net/PXKgr/2/
...
self.setPageNumber = function(page) {
console.log(self.pageNumber());
console.log(page);
if (self.pageNumber() != page) {
console.log('value changed');
}
else {
console.log('not changed');
}
self.pageNumber(page);
};
...
_pagedRequest.setPageNumber(10);
_pagedRequest.setPageNumber(20);
_pagedRequest.setPageNumber(20);
_pagedRequest.setPageNumber(5);
I'm building my chrome extension and I've got weird problem. This is script I'm running in background page:
function getOpenedTabs() {
var openedTabs = [];
chrome.windows.getAll({}, function(wins) {
for (var w in wins) {
if (wins[w].id !== undefined) {
chrome.tabs.getAllInWindow(wins[w].id, function(tabs) {
for (var t in tabs) {
if (tabs[t].id !== undefined) {
openedTabs.push(tabs[t]);
}
}
});
}
}
});
return openedTabs;
}
chrome.tabs.onCreated.addListener(function(tab){
var openedTabs = getOpenedTabs();
var length = openedTabs.length;
console.log("Quantity of tabs: " + length );
if (length > 20) {
openedTabs.sort(function(a,b){return a.visitCount - b.visitCount});
var t = openedTabs.shift();
chrome.tabs.remove(t.id);
console.log("The extension closed the " + t.title + " tab");
}
});
In debugging mode openedTabs.length returns correct value. But when I removed all breakpoints then openedTabs.length returns zero all time.
What kind of problem it might be?
Thanks.
Chrome API calls are asynchronous (think ajax calls), so they are not executed in order. You can't return from such methods, you need to use callbacks.
function getOpenedTabs(callback) {
chrome.windows.getAll({populate: true}, function(wins) {
var openedTabs = [];
for (var w=0; w<wins.length;w++) {
for (var t=0;t<wins[w].tabs.length;t++) {
if (wins[w].tabs[t].id !== undefined) { //I don't think this check is needed
openedTabs.push(wins[w].tabs[t]);
}
}
}
if(callback) {
callback(openedTabs);
}
});
}
chrome.tabs.onCreated.addListener(function(tab){
getOpenedTabs(function(openedTabs) {
var length = openedTabs.length;
console.log("Quantity of tabs: " + length );
if (length > 20) {
openedTabs.sort(function(a,b){return a.visitCount - b.visitCount});
var t = openedTabs.shift();
chrome.tabs.remove(t.id);
console.log("The extension closed the " + t.title + " tab");
}
});
});
You don't need to use getAllInWindow(), you can get all tabs with getAll(). Also using in to iterate over an array isn't a good practice.