I have to make a load of subsequent http requests to load product images into the app as it has to function in an offline mode.
Around 2000 calls.
The http client seems toi have a memory leak which causes the persistent mbytes in "instruments" to rise to around 200 without being garbaged.
After use of the http client it is being set to null.
I have tried setting the file property of the httpclient without any success
I have set the unload function to only call the callback function which in turn calls the http send function again (thus looping through the 2000 products to get the respective pictures)
I changed from SDK 7.5.0.v20180824022007 to SDK 8.1.0.v20190423134840 and even SDK 9.0.0.v20181031080737 but the problem remains
the code of my http common module:
function HttpClient(options = {}) {
this.root = options.root || "ROOT_OF_API";
this.endpoint = options.endpoint || false;
this.needsChecksum = options.needsChecksum || false;
this.data = {};
this.method = options.method || "Post";
this.timeout = options.timeout || 5000;
this.calculateChecksum = function () {
var moment = require('alloy/moment');
if (!Alloy.Models.user.authenticated()) {
return false;
}
var sp = (moment().unix() - Alloy.Models.meta.get("timeDiff"))
var hash = Ti.Utils.md5HexDigest("nX" + sp + "FossilSFAapp" + Alloy.Models.user.get('token').substring(10, 14) + "CS")
var checksum = sp + "-" + hash.substring(4, 8)
this.data.checksum = checksum
}
};
HttpClient.prototype.setData = function (data) {
this.data = data
};
HttpClient.prototype.send = function (callback) {
// set new checksum for request if is needed
if (this.needsChecksum) {
this.calculateChecksum()
}
// add app version
if (this.method === "POST") {
this.data.appversion = Ti.App.version;
}
// send
var client = Ti.Network.createHTTPClient({
onload: function () {
callback({
success: true
})
},
onerror: function(e) {
callback({
message: e.messgae,
success: false
})
},
timeout: this.timeout
});
client.open(this.method, this.root + this.endpoint);
if (this.setFile) {
client.file = Ti.Filesystem.getFile(Ti.Filesystem.applicationDataDirectory, this.fileName);
}
client.setRequestHeader('Content-Type', 'application/json');
client.send(JSON.stringify(this.data));
client = null;
};
module.exports = HttpClient;
and then the module is used in the product model like so:
var HttpClient = require('./HttpClient');
var httpClient = new HttpClient();
function getImage (i) {
if (collection.at(i) && collection.at(i).get('iimage0') && collection.at(i).needsImageUpdate()) {
httpClient.endpoint = collection.at(i).get('acarticlenumber') +".jpg";
httpClient.fileName = 'productImages/' + collection.at(i).get('acarticlenumber') + '.jpg'
httpClient.send(function(e){
if (i < collection.length) {
i++
getImage(i)
} else {
finished()
}
});
} else {
if (i < collection.length) {
i++
getImage(i)
} else {
finished()
}
}
}
// start getting images at index 0
getImage(0)
anyone have an idea why these memory leaks appear ?
It only ever occurs when actually sending the http request.
Related
I need to crawl all the pages on a site (the crwling part works fine.) and so i need to run THIS script on my server using node.js. I tried implementing the following logic:
var request = require('request');
var cheerio = require('cheerio');
var URL = require('url-parse');
var jsdom = require("jsdom");
var { JSDOM } = jsdom;
var START_URL = "http://balneol.com/";
var SEARCH_FONT = "helvetica";
var MAX_PAGES_TO_VISIT = 100000;
var pagesVisited = {};
var numPagesVisited = 0;
var pagesToVisit = [];
var url = new URL(START_URL);
var baseUrl = url.protocol + "//" + url.hostname;
pagesToVisit.push(START_URL);
crawl();
function crawl() {
if(numPagesVisited >= MAX_PAGES_TO_VISIT) {
console.log("Reached max limit of number of pages to visit.");
return;
}
var nextPage = pagesToVisit.pop();
if (nextPage in pagesVisited) {
// We've already visited this page, so repeat the crawl
crawl();
} else {
// New page we haven't visited
visitPage(nextPage, crawl);
}
}
function visitPage(url, callback) {
// Add page to our set
pagesVisited[url] = true;
numPagesVisited++;
// Make the request
console.log("Visiting page " + url);
request(url, function(error, response, body) {
// Check status code (200 is HTTP OK)
console.log("Status code: " + response.statusCode);
if(response.statusCode !== 200) {
callback();
return;
}
// Parse the window.document body
// var window = jsdom.jsdom(body).defaultView();
var { window } = new JSDOM(body);
//var $ = cheerio.load(body);
var helveticaFound = searchForHelvetica(window, 'font-family');
if(helveticaFound) {
console.log('Word ' + SEARCH_FONT + ' found at page ' + url);
} else {
collectInternalLinks($);
// In this short program, our callback is just calling crawl()
// callback();
}
});
}
function searchForHelvetica( window , css) {
if(typeof getComputedStyle == "undefined")
getComputedStyle= function(elem){
return elem.currentStyle;
}
var who, hoo, values= [], val,
nodes= window.document.body.getElementsByTagName('*'),
L= nodes.length;
for(var i= 0; i<L; i++){
who= nodes[i];
console.log(nodes[i]);
if(who.style){
hoo= '#'+(who.id || who.nodeName+'('+i+')');
console.log(who.style._values);
// return false;
val= who.style.fontFamily || getComputedStyle(who, '')[css];
if(val){
if(verbose) values.push([hoo, val]);
else if(values.indexOf(val)== -1) values.push(val);
// before IE9 you need to shim Array.indexOf (shown below)
}
}
}
// console.log(values);
// return values;
}
function collectInternalLinks($) {
var relativeLinks = $("a[href^='/']");
console.log("Found " + relativeLinks.length + " relative links on page");
relativeLinks.each(function() {
pagesToVisit.push(baseUrl + $(this).attr('href'));
});
}
If you see my visit page function you will see the below two lines of code:
var { window } = new JSDOM(body);
var helveticaFound = searchForHelvetica(window, 'font-family');
as you can see on the 2nd line i am passing the window object to the searchForHelvetic function.
In my searchForHelvetic function , if i console.log(nodes[i]); , i don't get the html element and hence the rest of the script does't quite run as expected. does the jsdom window differ from the window object in the browser ? how do i get the script working ? I.E. basically use the window object to run through all the pages on the page and spit out all the fonts used on the page ?
EDIT::-
To break the problem down to a micro level, if i console.log(who); inside searchForHelvetica function , i get the following result:
HTMLElement {}
HTMLDivElement {}
HTMLDivElement {}
HTMLDivElement {}
HTMLAnchorElement {}
HTMLImageElement {}
HTMLDivElement {}
HTMLFormElement {}
HTMLDivElement {}
HTMLLabelElement {}
HTMLInputElement {}
HTMLButtonElement {}
HTMLButtonElement {}
HTMLSpanElement {}
etc..
But if i were to do the same in a web browser the result world be different Eg.
nodes = window.document.body.getElementsByTagName('*');
console.log(node[1]) // <div id="mobile-menu-box" class="hide">...</div>
How do i get a similar result in node.js ?
Dear helpful people in the internet,
i started to write an HTTP Queue. I have a Request Class, which works if I use it without context, but with my queue active, my http requests wont work. I can't figure out why.
var t = new Request('s','s', function(){});
t.perform();
When i perform the request like this in any file. It works. But when i use it with my queue (index.js,L19 to L22) no request is performed. The function Request.perform() were executed, but the HTTP-Request isn't there.
Sorry for my english, I'm not native ^^
index.js
const http = require('http');
const https = require('https');
const {Request} = require('./classes/Request');
const queue = require('./queue.js');
queue.setCallbackFunction(performRequest);
function performRequest(request){
console.log("2");
request.perform();
}
var req = new Request('','', function(response,body){
console.log(JSON.stringify(response) + " :: " + body);
});
queue.add(req);
queue.js
var queue = [];
var ratelimits = [];
module.exports.add = function(request){
queue.push(request);
run_queue();
}
module.exports.setCallbackFunction = function(cb){
call = cb;
}
module.exports.setRateLimits = function(ratelimitings){
ratelimits = [];
for(var z in ratelimitings){
var x = ratelimitings[z];
var data = {};
data.max = x[0];
data.time = x[1];
data.count = 0;
ratelimits[x[1]] = data;
}
}
function run_queue(){
var q;
if(queue.length > 0){
q = run_request(queue[0]);
while (q == true) {
queue.shift();
if(queue.length > 0)
q = run_request(queue[0]);
}
}
}
function run_request(request){
for(var z in ratelimits){
var x = ratelimits[z];
if(x.max <= x.count){
return false;
}
}
for(var z in ratelimits){
var x = ratelimits[z];
if(x.count === 0){
setTimeout(function(z){
console.log(JSON.stringify(x));
ratelimits[z].count = 0;
run_queue();
},z,z);
}
x.count++;
//console.log(JSON.stringify(x));
}
//DO REQUEST
console.log("1")
call(request);
return true;
}
Request.js
exports.Request = class{
constructor(host,path,cb){
this.path = path;
this.cb = cb;
this.host = host
}
perform(){
console.log("3");
var https = require('https');
var options = {
host: 'www.example.com',
path: '/'
};
var callback = function(response) {
//HERE THIS GETS NEVER CALLED BECAUSE OF WHATEVER WHILE ITS IN THE QUEUE
var str = '';
//another chunk of data has been recieved, so append it to `str`
response.on('data', function (chunk) {
str += chunk;
});
//the whole response has been recieved, so we just print it out here
response.on('end', function () {
console.log(str);
});
}
https.request(options, callback).end();
}
}
All 3 console.logs get printed, but the Request Callback Never gets called.
The problem is caused by function run_queue:
function run_queue(){
var q;
if(queue.length > 0){
q = run_request(queue[0]);
while (q == true) {
queue.shift();
if(queue.length > 0)
q = run_request(queue[0]);
}
}
}
After run_request() is executed successfully (HTTP request is sent), queue.shift() is called immediately, which means the req object you just added into the queue is removed from queue array and qualified for Garbage Collection. As one HTTP request/response will normally cost a few milliseconds, it is very likely that before response is retrieved, the req object is GCed (destroyed). Thus, no callback will be invoked, as the HTTP connection does not exist any more.
To change queue but keep req object from GC, you need to save it somewhere else, such as a temporary array (the infinite loop bug is also fix in following code):
var tmp = [];
function run_queue(){
var q;
if(queue.length > 0){
q = run_request(queue[0]);
while (q == true) {
tmp.push(queue.shift());
if(queue.length > 0) {
q = run_request(queue[0]);
} else {
q = false;
}
}
}
}
Please note the above code is only for demo. In production code, you need to manage the tmp array -- when one request is done, it needs to be removed from tmp. Otherwise, tmp array will keep growing...
Heyllo, everyone!
I am making a scraper which uses node-simplecrawler. Everything runs fine, but what I can't figure out is how to stop one instance when creating a new one (I want to have only one running at a time). I am using express for this and all the scraping logic is in one route. In order to cancel the crawling right now, I need to stop the node process and run the app again.
Here is the part of the code that concerns running the crawler (note: I've simplified the code a little bit, so it's shorter):
module.exports = function(socket) {
var express = require('express');
var router = express.Router();
[... requires continue...]
/* GET scaning page. */
router.post('/', function(req, res, next) {
res.render('scanning'); // Load the socket.io host page
var render = {};
var pages = [];
var timer = new Date();
// Helper func to log the requests.
function log(message) {
var now = new Date();
console.log(now - timer + 'ms', message);
timer = now;
}
// Ensure URL format, parse URL
// Check if URL exist
request(url.href, function (error, response, body) {
if (!error && response.statusCode == 200) {
// URL exists, so let's scan it
// Exclude links to the following extensions:
var exclude = ['gif', 'jpg', 'jpeg', 'png', 'ico', 'bmp', 'ogg', 'webp',
'mp4', 'webm', 'mp3', 'ttf', 'woff', 'json', 'rss', 'atom', 'gz', 'zip',
'rar', '7z', 'css', 'js', 'gzip', 'exe', 'xml', 'svg'];
var exts = exclude.join('|');
var regexReject = new RegExp('\.(' + exts + ')', 'i');
var rootURL = url.protocol + '//' + url.host + '/';
// Crawler configuration
var crawler = new Crawler(url.host);
crawler.initialPort = 80;
crawler.initialPath = url.pathname;
crawler.maxConcurrency = 1;
crawler.ignoreWWWDomain = false; // This is a little suspicious...
crawler.filterByDomain = true; // Only URLs from the current domain
crawler.scanSubdomains = true;
crawler.downloadUnsupported = false;
crawler.parseHTMLComments = false;
crawler.parseScriptTags = false;
crawler.acceptCookies = false;
// crawler.maxDepth = 1 // Debug only!
/*
* Fetch Conditions
*/
// Get only URLs, ignore feeds, only from this host
crawler.addFetchCondition(function (parsedURL) {
return (
!parsedURL.path.match(regexReject) && // Only links
(parsedURL.path.search('/feed') === -1) && // Igrnore feeds
(parsedURL.host === url.host) // Page is from this domain
);
});
// Should we only include subpages?
if(onlySubpages) {
crawler.addFetchCondition(function(parsedURL) {
// return parsedURL.path.search(url.pathname) > -1;
return parsedURL.path.startsWith(url.pathname);
// console.log(url, parsedURL);
});
}
// Exclude urls with fragments?
if(excludeUrls.length >= 1 ) {
crawler.addFetchCondition(function(parsedURL) {
var urlFragmentsOk = true;
excludeUrlFragments.forEach(function(fragment) {
if(parsedURL.path.search('/'+fragment) > -1) {
urlFragmentsOk = false;
}
});
return urlFragmentsOk;
});
}
// Include only URLs with fragments
if(includeOnlyUrls.length >= 1) {
crawler.addFetchCondition(function(parsedURL) {
var urlFragmentsOk = false;
var includeUrlFragments = includeOnlyUrls.replace(/\s/, '').split(',');
includeUrlFragments.forEach(function(fragment) {
if(parsedURL.path.search('/'+fragment) !== -1) {
urlFragmentsOk = true;
}
});
return urlFragmentsOk;
});
}
// Run the crawler
crawler.start();
// Execute for each URL, on fetchcomplete
crawler.on('fetchcomplete', function(item, responseBuffer, response) {
[Do stuff with the scraped page]
});
// Completed crawling. Now let's get to work!
crawler.on('complete', function() {
[Get all scraped pages and do something with them]
});
// Error handling
crawler.on('queueerror', function(errorData, URLData) {
console.log('Queue error:', errorData, URLData);
});
crawler.on('fetchdataerror', function(queueitem, response) {
console.log('Fetch error:', queueitem, response);
});
crawler.on('fetchtimeout', function(queueItem, crawlerTimeoutValue) {
console.log('Fetch timeout:', queueItem, crawlerTimeoutValue);
});
crawler.on('fetchclienterror', function(queueItem, errorData) {
console.log('Fetch local error:', queueItem, errorData);
});
crawler.on('fetchtimeout', function(queueItem, crawlerTimeoutValue) {
console.log('Crawler timeout:', queueItem, crawlerTimeoutValue);
});
} else if(error) {
console.log(error);
}
});
});
return router;
}
Every simplecrawler instance has a stop method that can be called to prevent the crawler from making any further requests (requests won't be stopped in flight, however).
I would probably store the crawler instance in a scope outside of the route handler, check if its defined first thing in the route handler, in that case call the stop method and then construct a new scraper.
I stripped out a lot of the meat of your code, but something like this is what I had in mind:
module.exports = function(socket) {
var express = require('express');
var router = express.Router();
var Crawler = requrie('simplecrawler');
var crawler;
router.post('/', function(req, res, next) {
// Check if URL exist
request(url.href, function (error, response, body) {
if (!error && response.statusCode == 200) {
// Stop any crawler that's already running
if (crawler instanceof Crawler) {
crawler.stop();
}
// Crawler configuration
crawler = new Crawler(url.host);
crawler.initialPort = 80;
crawler.initialPath = url.pathname;
// Run the crawler
crawler.start();
// Execute for each URL, on fetchcomplete
crawler.on('fetchcomplete', function(item, responseBuffer, response) {
// [Do stuff with the scraped page]
});
// Completed crawling. Now let's get to work!
crawler.on('complete', function() {
// [Get all scraped pages and do something with them]
});
} else if(error) {
console.log(error);
}
});
});
return router;
}
I am using 'child_process'(fork method) to handle task of saving some records across server. For this I was using jquery ajax call in the child process to save the records. But somehow that code doesn't get executed.
I have already included the jquery.min.js file in the html in which I am including the file forking child process as well.
The file forking child process:
var childProcess = require('child_process');
var syncProcess;
function launchMindwaveDataSync(){
//alert("launchMindwaveDataSync fired");
var iconSync = document.getElementById("iconSync");
iconSync.src = "images/findDevice.gif";
iconDevice.title = "Synchronizing...";
//Launch the device reader script in a new V8 process.
syncProcess = childProcess.fork('./js/mindwaveDataSync.js');
syncProcess.on('message', function(message){
console.log(message);
switch(message.msg)
{
case "connected":
global.HEADSET_CONNECTED = true;
iconDevice.src = "images/icon-power.png";
iconDevice.title = "Connected to Mindwave Mobile device";
startSynchronizing();
break;
case "disconnected":
case "error":
case "close":
case "timeout":
global.HEADSET_CONNECTED = false;
iconDevice.src = "images/error.png";
iconDevice.title = "Mindwave Mobile device is disconnected";
break;
}
});
syncProcess.on('error', function(e){
console.log(e);
});
setTimeout(function(){
console.log('sending command initialize');
syncProcess.send({cmd:'initialize'});
},1000);
};
function startSynchronizing(){
syncProcess.send({cmd: 'synchronize'});
}
The child process which is supposed to make ajax call
var recursive = require('recursive-readdir');
var SecurConf = require('../js/secureConf');
var sconf = new SecurConf();
var filesToSync = [];
var crypto = require('crypto');
var options = {
//prompt : 'File Password : ',
algo : 'aes-128-ecb',
file : {
encoding : 'utf8',
out_text : 'hex'
}
};
process.on('message', function (command){
console.log(command);
switch(command.cmd)
{
case "initialize": initializeConnection();
break;
case "synchronize": checkForFiles();
break;
case "record":
client.resume();
break;
case "pause":
client.pause();
break;
case "stop":
client.destroy();
break;
}
//process.send({msg:"Sync Process: " + command.cmd});
});
function checkForFiles(){
recursive('C:/MindWaveData/Data/', function (err, files) {
// Files is an array of filename
filesToSync = files;
decryptFiles();
//process.send({msg:files});
});
}
function decryptFiles(){
var ajaxSuccess = function(res){
process.send({msg:res});
}
for(var i = 0; i < filesToSync.length; i++){
var ef = ""+filesToSync[i];
sconf.decryptFile(ef, function(err, file, content){
if(err){
process.send({msg:"some error occurred while decrypting..."});
} else {
var parsedContent = JSON.parse(content);
var decryptedContent = JSON.stringify(parsedContent, null,'\t');
for(var j = 0; j<parsedContent.length; j++){
$.ajax({
//async: false,
type: "POST",
url: "http://192.168.14.27:8001/admin/webservice",
contentType: "application/json; charset=utf-8",
dataType: "json",
data: JSON.stringify({
'ncFunction': 'login',
'ncParams': {
'ncUserEmail': "clarity_admin#yopmail.com",
'ncUserPassword': "cl123!##"
}
}),
success: function (res) {
ajaxSuccess(res);
},
error: function (xhr, type, err) {
ajaxSuccess(res);
}
});
}
});
}
}
function initializeConnection(){
//console.log('initializeConnection::function');
//process.send({msg:"initializeConnection called"});
checkConnection()
//process.send({msg:"connected"});
//call function to send ajax request
}
function checkConnection(){
//console.log('checkConnection::function');
//call ajax request 3 times to check the connection.. once in third try we get the response OK, we can send the process message as connected
var ajaxCallCount = 0;
var makeAjaxCall = function(){
//console.log('function makeAjaxCall');
//process.send({msg:"connected"});
if(ajaxCallCount < 2){
ajaxCallCount++;
//console.log('ajaxCallCount:'+ajaxCallCount);
//process.send({msg:'value of ajaxCallCount:'+ajaxCallCount});
connectionSuccess();
}
else{
process.send({msg:"connected"});
}
};
var connectionSuccess = function(){
//console.log('function connectionSuccess');
makeAjaxCall();
};
makeAjaxCall();
}
Can we use jquery ajax call in the child process like this? Plus I have included the file forking child process in one html file and on load of its body I am calling /launchMindwaveDataSync/ from the first file shown below.
Thanks in advance
jsfiddle link: http://jsfiddle.net/T8ee7/
When I call Knockout's subscribe method is there a way I can get both the previous and new value? Right now, I can only call get these values separately.
I want to trigger some code if the old and new value are different.
I suppose I could do the following, but it can get messy...
(http://jsfiddle.net/MV3fN/)
var sv = sv || {};
sv.PagedRequest = function (pageNumber, pageSize) {
this.pageNumber = ko.observable(pageNumber || 1);
this.numberOfPages = ko.observable(1);
this.pageSize = ko.observable(pageSize || sv.DefaultPageSize);
};
var _pagedRequest = new sv.PagedRequest();
var oldValue;
_pagedRequest.pageNumber.subscribe(function (previousValue) {
console.log("old: " + previousValue);
oldValue = previousValue;
}, _pagedRequest, "beforeChange");
_pagedRequest.pageNumber.subscribe(function (newValue) {
console.log("new: " + newValue);
if (oldValue != newValue) {
console.log("value changed!");
}
});
_pagedRequest.pageNumber(10);
_pagedRequest.pageNumber(20);
I prefer using an observable extender.
http://jsfiddle.net/neonms92/xybGG/
Extender:
ko.extenders.withPrevious = function (target) {
// Define new properties for previous value and whether it's changed
target.previous = ko.observable();
target.changed = ko.computed(function () { return target() !== target.previous(); });
// Subscribe to observable to update previous, before change.
target.subscribe(function (v) {
target.previous(v);
}, null, 'beforeChange');
// Return modified observable
return target;
}
Example Usage:
// Define observable using 'withPrevious' extension
self.hours = ko.observable().extend({ withPrevious: 1 });
// Subscribe to observable like normal
self.hours.subscribe(function () {
if (!self.hours.changed()) return; // Cancel if value hasn't changed
print('Hours changed from ' + self.hours.previous() + ' to ' + self.hours());
});
This seems to work for me
ko.observable.fn.beforeAndAfterSubscribe = function (callback, target) {
var _oldValue;
this.subscribe(function (oldValue) {
_oldValue = oldValue;
}, null, 'beforeChange');
this.subscribe(function (newValue) {
callback.call(target, _oldValue, newValue);
});
};
See more at: http://ideone.com/NPpNcB#sthash.wJn57567.dpuf
http://jsfiddle.net/MV3fN/3/
var sv = sv || {};
sv.PagedRequest = function (pageNumber, pageSize) {
var self = this;
self.pageNumber = ko.observable(pageNumber || 1);
self.numberOfPages = ko.observable(1);
self.pageSize = ko.observable(pageSize || sv.DefaultPageSize);
self.pageNumber.subscribe(function (previousValue) {
console.log(previousValue);
console.log(self.pageNumber.arguments[0]);
if (previousValue != _pagedRequest.pageNumber.arguments[0]) {
console.log('value changed');
}
else {
//This won't get executed because KO doesn't
//call the function if the value doesn't change
console.log('not changed');
}
}, _pagedRequest, "beforeChange");
};
var _pagedRequest = new sv.PagedRequest();
_pagedRequest.pageNumber(10);
_pagedRequest.pageNumber(20);
_pagedRequest.pageNumber(20);
_pagedRequest.pageNumber(5);
I don't know if you're really supposed to use arguments[0], but it seems to work.
You could also set up your own method to accomplish this in a much cleaner way:
http://jsfiddle.net/PXKgr/2/
...
self.setPageNumber = function(page) {
console.log(self.pageNumber());
console.log(page);
if (self.pageNumber() != page) {
console.log('value changed');
}
else {
console.log('not changed');
}
self.pageNumber(page);
};
...
_pagedRequest.setPageNumber(10);
_pagedRequest.setPageNumber(20);
_pagedRequest.setPageNumber(20);
_pagedRequest.setPageNumber(5);