I have to make a load of subsequent http requests to load product images into the app as it has to function in an offline mode.
Around 2000 calls.
The http client seems toi have a memory leak which causes the persistent mbytes in "instruments" to rise to around 200 without being garbaged.
After use of the http client it is being set to null.
I have tried setting the file property of the httpclient without any success
I have set the unload function to only call the callback function which in turn calls the http send function again (thus looping through the 2000 products to get the respective pictures)
I changed from SDK 7.5.0.v20180824022007 to SDK 8.1.0.v20190423134840 and even SDK 9.0.0.v20181031080737 but the problem remains
the code of my http common module:
function HttpClient(options = {}) {
this.root = options.root || "ROOT_OF_API";
this.endpoint = options.endpoint || false;
this.needsChecksum = options.needsChecksum || false;
this.data = {};
this.method = options.method || "Post";
this.timeout = options.timeout || 5000;
this.calculateChecksum = function () {
var moment = require('alloy/moment');
if (!Alloy.Models.user.authenticated()) {
return false;
}
var sp = (moment().unix() - Alloy.Models.meta.get("timeDiff"))
var hash = Ti.Utils.md5HexDigest("nX" + sp + "FossilSFAapp" + Alloy.Models.user.get('token').substring(10, 14) + "CS")
var checksum = sp + "-" + hash.substring(4, 8)
this.data.checksum = checksum
}
};
HttpClient.prototype.setData = function (data) {
this.data = data
};
HttpClient.prototype.send = function (callback) {
// set new checksum for request if is needed
if (this.needsChecksum) {
this.calculateChecksum()
}
// add app version
if (this.method === "POST") {
this.data.appversion = Ti.App.version;
}
// send
var client = Ti.Network.createHTTPClient({
onload: function () {
callback({
success: true
})
},
onerror: function(e) {
callback({
message: e.messgae,
success: false
})
},
timeout: this.timeout
});
client.open(this.method, this.root + this.endpoint);
if (this.setFile) {
client.file = Ti.Filesystem.getFile(Ti.Filesystem.applicationDataDirectory, this.fileName);
}
client.setRequestHeader('Content-Type', 'application/json');
client.send(JSON.stringify(this.data));
client = null;
};
module.exports = HttpClient;
and then the module is used in the product model like so:
var HttpClient = require('./HttpClient');
var httpClient = new HttpClient();
function getImage (i) {
if (collection.at(i) && collection.at(i).get('iimage0') && collection.at(i).needsImageUpdate()) {
httpClient.endpoint = collection.at(i).get('acarticlenumber') +".jpg";
httpClient.fileName = 'productImages/' + collection.at(i).get('acarticlenumber') + '.jpg'
httpClient.send(function(e){
if (i < collection.length) {
i++
getImage(i)
} else {
finished()
}
});
} else {
if (i < collection.length) {
i++
getImage(i)
} else {
finished()
}
}
}
// start getting images at index 0
getImage(0)
anyone have an idea why these memory leaks appear ?
It only ever occurs when actually sending the http request.
I'm having trouble with error handling with my function in my bot for Discord. What I've got right now is a command that scraps information from a website, I want to make it so if there is an error (404), the user will get some feedback. How would I go about doing this? Right now I currently have something, but I'm not sure what I'm doing wrong. Here is a snippet of code:
//modules used
const rp = require('request-promise-native');
const errors = require('request-promise/errors');
const cheerio = require('cheerio');
if (message.content.startsWith(prefix + 'latest')) {
//website url variables
const website_domain = "https://hypebeast.com/";
let website_path = args[0];
let website_url = website_domain + website_path;
//extra arguments variable
let extra_arg = args.slice(1).join(" ");
if (extra_arg.length > 0) {
message.reply('too many arguments! Please refer to `h.help` for correct usage.');
} else {
//opening url and scrapping elements
function scrapData(website_url) {
return rp(website_url)
.then(body => {
let items = [],
$ = cheerio.load(body).catch(errors.StatusCodeError, function (reason) {
console.log(reason);
});
//web scrapping here
$('.post-box').each(function() {
let title = $(this).find($('.title h2 span')).first().text(),
caption = $(this).find($('.post-box-excerpt p')).first().text(),
article_url = $(this).find($('.col-hb-post-image a')).first().attr('href'),
thumbnail_long = $(this).find($('.thumbnail img')).first().attr('src');
//adding title, caption, etc to list
items.push({title, caption, article_url, thumbnail_long});
//check items in console
console.log(items);
})
return items;
})
}
I have just modified your code little try this below code.
//modules used
const rp = require('request-promise-native');
const errors = require('request-promise/errors');
const cheerio = require('cheerio');
if (message.content.startsWith(prefix + 'latest')) {
//website url variables
const website_domain = "https://hypebeast.com/";
let website_path = args[0];
let website_url = website_domain + website_path;
//extra arguments variable
let extra_arg = args.slice(1).join(" ");
if (extra_arg.length > 0) {
message.reply('too many arguments! Please refer to `h.help` for correct usage.');
} else {
var options = {
uri: website_url,
transform: function (body) {
return cheerio.load(body);
}
};
rp(options)
.then(function ($) {
// Process html like you would with jQuery...
$('.post-box').each(function() {
let title = $(this).find($('.title h2 span')).first().text(),
caption = $(this).find($('.post-box-excerpt p')).first().text(),
article_url = $(this).find($('.col-hb-post-image a')).first().attr('href'),
thumbnail_long = $(this).find($('.thumbnail img')).first().attr('src');
//adding title, caption, etc to list
items.push({title, caption, article_url, thumbnail_long});
//check items in console
console.log(items);
});
})
.catch(function (err) {
console.log(err);
});
}
When I use a request module in node.js server, I have some problem such as wait and return.
I would like to receive a "responseObject" value at requestController.
To solve this problem, I have search the best way but I still do not find it.
How can solve this problem?
Thank in advance!! :)
=========================================================================
var requestToServer = require('request');
function getRequest(requestObject) {
var urlInformation = requestObject['urlInformation'];
var headerInformation = requestObject['headerInformation'];
var jsonObject = new Object( );
// Creating the dynamic body set
for(var i = 0; i < headerInformation.length; i++)
jsonObject[headerInformation[i]['headerName']] = headerInformation[i]['headerValue'];
requestToServer({
url : urlInformation,
method : 'GET',
headers : jsonObject
}, function(error, response ,body) {
// todo response controlling
var responseObject = response.headers;
responseObject.body = body;
});
}
// Controlling the submitted request
exports.requestController = function(requestObject) {
var method = requestObject['methodInformation'];
var resultObject = null;
// Selecting the method
if(method == "GET")
resultObject = getRequest(requestObject);
else if(method =="POST")
resultObject = postRequest(requestObject);
else if(method == "PUT")
resultObject = putRequest(requestObject);
else if(method == "DELETE")
resultObject = deleteRequest(requestObject);
console.log(JSON.stringify(resultObject));
}
You can use callbacks in the following way.
function getRequest(requestObject, callback) {
// some code
requestToServer({
url : urlInformation,
method : 'GET',
headers : jsonObject
}, function(error, response ,body) {
// todo response controlling
var responseObject = response.headers;
responseObject.body = body;
callback(responseObject);
});
}
And
// Controlling the submitted request
exports.requestController = function(requestObject) {
var method = requestObject['methodInformation'];
// Selecting the method
if(method == "GET")
getRequest(requestObject, function(resultObject){
console.log(JSON.stringify(resultObject));
});
//some code
}
Hope, it helps.
Heyllo, everyone!
I am making a scraper which uses node-simplecrawler. Everything runs fine, but what I can't figure out is how to stop one instance when creating a new one (I want to have only one running at a time). I am using express for this and all the scraping logic is in one route. In order to cancel the crawling right now, I need to stop the node process and run the app again.
Here is the part of the code that concerns running the crawler (note: I've simplified the code a little bit, so it's shorter):
module.exports = function(socket) {
var express = require('express');
var router = express.Router();
[... requires continue...]
/* GET scaning page. */
router.post('/', function(req, res, next) {
res.render('scanning'); // Load the socket.io host page
var render = {};
var pages = [];
var timer = new Date();
// Helper func to log the requests.
function log(message) {
var now = new Date();
console.log(now - timer + 'ms', message);
timer = now;
}
// Ensure URL format, parse URL
// Check if URL exist
request(url.href, function (error, response, body) {
if (!error && response.statusCode == 200) {
// URL exists, so let's scan it
// Exclude links to the following extensions:
var exclude = ['gif', 'jpg', 'jpeg', 'png', 'ico', 'bmp', 'ogg', 'webp',
'mp4', 'webm', 'mp3', 'ttf', 'woff', 'json', 'rss', 'atom', 'gz', 'zip',
'rar', '7z', 'css', 'js', 'gzip', 'exe', 'xml', 'svg'];
var exts = exclude.join('|');
var regexReject = new RegExp('\.(' + exts + ')', 'i');
var rootURL = url.protocol + '//' + url.host + '/';
// Crawler configuration
var crawler = new Crawler(url.host);
crawler.initialPort = 80;
crawler.initialPath = url.pathname;
crawler.maxConcurrency = 1;
crawler.ignoreWWWDomain = false; // This is a little suspicious...
crawler.filterByDomain = true; // Only URLs from the current domain
crawler.scanSubdomains = true;
crawler.downloadUnsupported = false;
crawler.parseHTMLComments = false;
crawler.parseScriptTags = false;
crawler.acceptCookies = false;
// crawler.maxDepth = 1 // Debug only!
/*
* Fetch Conditions
*/
// Get only URLs, ignore feeds, only from this host
crawler.addFetchCondition(function (parsedURL) {
return (
!parsedURL.path.match(regexReject) && // Only links
(parsedURL.path.search('/feed') === -1) && // Igrnore feeds
(parsedURL.host === url.host) // Page is from this domain
);
});
// Should we only include subpages?
if(onlySubpages) {
crawler.addFetchCondition(function(parsedURL) {
// return parsedURL.path.search(url.pathname) > -1;
return parsedURL.path.startsWith(url.pathname);
// console.log(url, parsedURL);
});
}
// Exclude urls with fragments?
if(excludeUrls.length >= 1 ) {
crawler.addFetchCondition(function(parsedURL) {
var urlFragmentsOk = true;
excludeUrlFragments.forEach(function(fragment) {
if(parsedURL.path.search('/'+fragment) > -1) {
urlFragmentsOk = false;
}
});
return urlFragmentsOk;
});
}
// Include only URLs with fragments
if(includeOnlyUrls.length >= 1) {
crawler.addFetchCondition(function(parsedURL) {
var urlFragmentsOk = false;
var includeUrlFragments = includeOnlyUrls.replace(/\s/, '').split(',');
includeUrlFragments.forEach(function(fragment) {
if(parsedURL.path.search('/'+fragment) !== -1) {
urlFragmentsOk = true;
}
});
return urlFragmentsOk;
});
}
// Run the crawler
crawler.start();
// Execute for each URL, on fetchcomplete
crawler.on('fetchcomplete', function(item, responseBuffer, response) {
[Do stuff with the scraped page]
});
// Completed crawling. Now let's get to work!
crawler.on('complete', function() {
[Get all scraped pages and do something with them]
});
// Error handling
crawler.on('queueerror', function(errorData, URLData) {
console.log('Queue error:', errorData, URLData);
});
crawler.on('fetchdataerror', function(queueitem, response) {
console.log('Fetch error:', queueitem, response);
});
crawler.on('fetchtimeout', function(queueItem, crawlerTimeoutValue) {
console.log('Fetch timeout:', queueItem, crawlerTimeoutValue);
});
crawler.on('fetchclienterror', function(queueItem, errorData) {
console.log('Fetch local error:', queueItem, errorData);
});
crawler.on('fetchtimeout', function(queueItem, crawlerTimeoutValue) {
console.log('Crawler timeout:', queueItem, crawlerTimeoutValue);
});
} else if(error) {
console.log(error);
}
});
});
return router;
}
Every simplecrawler instance has a stop method that can be called to prevent the crawler from making any further requests (requests won't be stopped in flight, however).
I would probably store the crawler instance in a scope outside of the route handler, check if its defined first thing in the route handler, in that case call the stop method and then construct a new scraper.
I stripped out a lot of the meat of your code, but something like this is what I had in mind:
module.exports = function(socket) {
var express = require('express');
var router = express.Router();
var Crawler = requrie('simplecrawler');
var crawler;
router.post('/', function(req, res, next) {
// Check if URL exist
request(url.href, function (error, response, body) {
if (!error && response.statusCode == 200) {
// Stop any crawler that's already running
if (crawler instanceof Crawler) {
crawler.stop();
}
// Crawler configuration
crawler = new Crawler(url.host);
crawler.initialPort = 80;
crawler.initialPath = url.pathname;
// Run the crawler
crawler.start();
// Execute for each URL, on fetchcomplete
crawler.on('fetchcomplete', function(item, responseBuffer, response) {
// [Do stuff with the scraped page]
});
// Completed crawling. Now let's get to work!
crawler.on('complete', function() {
// [Get all scraped pages and do something with them]
});
} else if(error) {
console.log(error);
}
});
});
return router;
}
I want to make a script that search for a variable in a external page.
For example, i want the script to visit this page: Here,
Check whether a server is available and notify me somehow.
Can someone help me with this?
My solution is to recuperate the text of the source of a page. Then search for any strings that we want in that source. Just add another search call to extend the search. Works in Firefox.
<script>
function print(text) {
alert(text);
}
function search(where, what) {
var position = where.indexOf(what)
if ( position !== -1) {
print("Found \"" + what + "\" at " + position);
return;
} //else
print("\"" + what + "\" not found");
}
var xhr = new XMLHttpRequest();
xhr.open("GET", "https://billing.dacentec.com/hostbill/index.php?/cart/dedicated-servers/", true);
xhr.onload = function (e) {
if (xhr.readyState === 4) {
if (xhr.status === 200) {
var externalText = xhr.responseText;
search(externalText, "technology");
search(externalText, "secure");
search(externalText, "we");
} else {
print(xhr.statusText);
}
}
};
xhr.onerror = function (e) {
print(xhr.statusText);
};
xhr.send(null);
</script>