PhantomJS memory leak and process exit failure - memory-leaks

I am currently working on a project with PhantomJS that evaluates a list of web pages specified by a CSV file. I installed NPM and node.js to use in my program.
Here is the program:
var async = require("async");
var webpage = require('webpage'),
fs = require('fs');
var file_h = fs.open('C:\\Users\\morgan\\Documents\\FantasyApp\\URLPlayerListActive.txt', 'r');
var urls = [];
while (!file_h.atEnd()) {
urls.push(file_h.readLine());
}
async.eachSeries(urls, function (url, done) {
console.log(url)
var page = webpage.create();
page.open("http://"+url, function (status) {
if (status !== 'success') {
console.log('Unable to access network');
console.log(status)
var closeresults = page.close();
} else {
var evalresults = page.evaluate(function() {
try {
table2csv('pgl_basic');
try {
ga('send','event','Tool','Action','CSV');
}
catch (e) {}
var list = document.querySelectorAll('#csv_pgl_basic');
var stats = [];
for (var i = 0; i < list.length; i++) {
stats.push(list[i].innerText);
}
return stats;
var closeresults = page.close();
} catch (e) {
console.log(e);
}
});
try {
fs.write("C:\\Users\\morgan\\Documents\\FantasyApp\\Data\\"+url+".txt", evalresults.join('\n'), 'w');
var closeresults = page.close();
} catch(e) {
console.log(e);
var closeresults = page.close();
}
}
done();
});
});
phantom.exit();
My symptoms are either the process memory increases until it reaches my Windows maximum and crashes, OR it finishes my list and the process hangs around forever.
I can implement a work around for either of these problems, but because they both happen, I am unable to put this script to work.
I am looking for assistance preventing the memory leak or simply closing my process when the script is finished. It is possible that these symptoms are from the same root cause.

If the page is not correctly garbage collected, you can try to use the same instance over and over again. The other thing is that you should call phantom.exit when the script actually finished e.g. in the callback of eachSeries.
var page = webpage.create();
async.eachSeries(urls, function (url, done) {
console.log(url)
page.open("http://"+url, function (status) {
if (status !== 'success') {
console.log('Unable to access network');
console.log(status)
} else {
var evalresults = page.evaluate(function() {
try {
table2csv('pgl_basic');
try {
ga('send','event','Tool','Action','CSV');
}
catch (e) {}
var list = document.querySelectorAll('#csv_pgl_basic');
var stats = [];
for (var i = 0; i < list.length; i++) {
stats.push(list[i].innerText);
}
return stats;
} catch (e) {
console.log(e);
}
});
try {
fs.write("C:\\Users\\morgan\\Documents\\FantasyApp\\Data\\"+url+".txt", evalresults.join('\n'), 'w');
} catch(e) {
console.log(e);
}
}
done();
});
}, function(err){
phantom.exit();
});
Some other issues:
page.close doesn't return anything, so closeresults will be undefined.
Any statement that comes after return cannot be executed.
page is not defined in the page context (inside page.evaluate) and therefore page.close(); produces an error which may break your code.
Please register to the onConsoleMessage and onError events to see if there are errors.

Related

NodeJS Async/Await or Promise for net-snmp module

please someone help, I just cant get it.
Can you please help on how to make async/await or promise (doneCb), so script waits for first vlc_snmp(..) to finish and then to call next?
Example:
function doneCb(error) {
console.log(final_result);
final_result = [];
if (error)
console.error(error.toString());
}
function feedCb(varbinds) {
for (var i = 0; i < varbinds.length; i++) {
if (snmp.isVarbindError(varbinds[i]))
console.error(snmp.varbindError(varbinds[i]));
else {
var snmp_rez = {
oid: (varbinds[i].oid).toString()
value: (varbinds[i].value).toString()
};
final_result.push(snmp_rez);
}
}
}
var session = snmp.createSession(VLC_IP, "public", options);
var maxRepetitions = 20;
function vlc_snmp(OID) {
session.subtree(OID_, maxRepetitions, feedCb, doneCb);
}
vlc_snmp(OID_SERIAL_NUMBER);
//wait OID_SERIAL_NUMBER to finish and then call next
vlc_snmp(OID_DEVICE_NAME);
You should be able to use the async / await statements to wait for your done callback to be called. We wrap this callback in the vlc_snmp function, and return a Promise. This allows us to use the await statement.
I've mocked out some of the code that I don't have access to, it should behave somewhat similarly to the real code.
The key point here is, when a function returns a Promise we can await the result in an async function, that will give you the behaviour you wish.
final_result = [];
const VLC_IP = "";
const options = {};
const OID_ = "OID_";
const OID_SERIAL_NUMBER = "OID_SERIAL_NUMBER"
const OID_DEVICE_NAME = "OID_DEVICE_NAME"
// Mock out snmp to call feedcb and donecb
const snmp = {
createSession(...args) {
return {
subtree(oid, maxRepetitions, feedCb, doneCb) {
setTimeout(feedCb, 500, [{ oid, value: 42}])
setTimeout(doneCb, 1000);
}
}
},
isVarbindError(input) {
return false;
}
}
function doneCb(error) {
console.log("doneCb: final_result:", final_result);
final_result = [];
if (error)
console.error("doneCb: Error:", error.toString());
}
function feedCb(varbinds) {
for (var i = 0; i < varbinds.length; i++) {
if (snmp.isVarbindError(varbinds[i]))
console.error(snmp.varbindError(varbinds[i]));
else {
var snmp_rez = {
oid: (varbinds[i].oid).toString(),
value: (varbinds[i].value).toString()
};
final_result.push(snmp_rez);
}
}
}
var session = snmp.createSession(VLC_IP, "public", options);
var maxRepetitions = 20;
function vlc_snmp(OID) {
return new Promise((resolve,reject) => {
session.subtree(OID, maxRepetitions, feedCb, (error) => {
// This is a wrapper callback on doneCb
// Always call the doneCb
doneCb(error);
if (error) {
reject(error);
} else {
resolve();
}
});
});
}
async function run_vls_snmp() {
console.log("run_vls_snmp: Calling vlc_snmp(OID_SERIAL_NUMBER)...");
await vlc_snmp(OID_SERIAL_NUMBER);
console.log("run_vls_snmp: Calling vlc_snmp(OID_DEVICE_NAME)...");
//wait OID_SERIAL_NUMBER to finish and then call next
await vlc_snmp(OID_DEVICE_NAME);
}
run_vls_snmp();

How to get code to execute in order in node.js

I am trying to finish my script, but for some reason i don't know, it refuses to execute in the order i put it in.
I've tried placing a 'wait' function between the JoinRequest update function and the following code, but when run, it acts as if the function call and wait function were the other way round, countering the point of the wait().
const Roblox = require('noblox.js')
var fs = require('fs');
var joinRequests = []
...
function wait(ms) {
var d = new Date();
var d2 = null;
do { d2 = new Date(); }
while(d2-d < ms*1000);
};
...
function updateJReqs() {
Roblox.getJoinRequests(4745601).then((array) => {
var i;
var final = [];
for(i = 0; i < array.length; i++) {
final.push(array[i].username);
};
if(final === '') {
final = '-None';
};
joinRequests = final
console.log('Updated join requests.')
});
}
function check() {
setTimeout(() => {
fs.readFile('Request.txt',encoding = 'utf-8', function(err, data) {
if (err) {
check();
} else {
updateJReqs(); //for some reason this function is executed alongside the below, not before it.
// Tried putting wait(x) in here.
console.log('Request received: ' + data)
var solution = joinRequests
console.log('Fuffiling request with ' + solution)
fufillRequest(solution)
fs.unlink('Request.txt', function(err) {
if(err) throw err;
});
check();
}
});
}, 400)
}
check();
The script is supposed to wait until a file is created (accomplished), update the list of join requests (accomplished) and then create a new file with the list of join requests in(not accomplished).
if I understand your code you work with async code, you need to return a promise in updateJReqs and add a condition of leaving from the function because you have an infinite recursion
function updateJReqs() {
return new Promise(resolve => {
Roblox.getJoinRequests(4745601).then((array) => {
var i;
var final = [];
for(i = 0; i < array.length; i++) {
final.push(array[i].username);
};
if(final === '') {
final = '-None';
};
joinRequests = final
console.log('Updated join requests.')
resolve();
});
}
}
async function check() {
setTimeout(() => {
fs.readFile('Request.txt',encoding = 'utf-8', function(err, data) {
if (err) {
await check();
} else {
await updateJReqs();
// Tried putting wait(x) in here.
console.log('Request received: ' + data)
var solution = joinRequests
console.log('Fuffiling request with ' + solution)
fufillRequest(solution)
fs.unlink('Request.txt', function(err) {
if(err) throw err;
});
// you dont have an exit from your function check();
return 'Success';
}
});
}, 400)
}
check().then(res => console.log(res));

Nightmare Looping

Hello, I am writing an application where I need to be able to loop through an array of urls. I know there is an example of how to do this but my issue is a little different, I will explain with some code.
nightmare
.goto('some url')
.evaluate(() => {
//evaluate code
})
.then(dataArray => {
var innerRun = function* () {
var returnData = [];
for (var i = 0; i < dataArray.length; i++) {
var item = dataArray[i];
yield nightmare
.goto(item.url)
.evaluate(function () {
return false;
})
.screenshot(item.imgPath)
returnData.push(item);
}
return returnData;
}
vo(innerRun)(function (err, ads) {
if (err) {
console.log("Error running", err)
return;
}
});
});
I would like to be able to loop that code by using an array of urls. I had issues implementing this I believe because I am already doing it inside the then. It would stop running once it hit the yield nightmare inside the then
var mainLoop = function* () {
for (var j = 0; j < urlArray.length; j++) {
var url = urlArray[j];
yield nightmare.goto(url)//same code as in example above
}
}
vo(mainLoop)(function (err, d) {
if (err) {
console.log("Error running", err)
return;
}
});
The above code is what I attempted to do. If anyone has any ideas it would be a huge help thank you!
Maybe try this:
var urls = ['http://example.com', 'http://example2.com', 'http://example3.com'];
var results = [];
urls.forEach(function(url) {
nightmare.goto(url)
.wait('body')
.title()
.then(function(result) {
results.push(result);
});
});
console.dir(results)
Source: https://github.com/rosshinkley/nightmare-examples/blob/master/docs/common-pitfalls/async-operations-loops.md

web scrapy by nightmare about loop ,output is not same every time

var Nightmare = require('nightmare');
var nightmare = Nightmare({ show: true });
var fs = require('fs');
vo = require('vo');
var result;
nightmare
.goto('http://jufa-kyusyu.jp/teams/')
.wait(1000)
.evaluate(function () {
var options = document.querySelectorAll('option'),i;
var values =[]
for (i = 0; i < options.length; ++i) {
values.push(options[i].value)
}
return values;
})
.then(function (values) {
for (var i = 0; i < values.length; i++) {
if(values[i] == "#") values[i] = "/teams/181.html";
nightmare
.goto("http://www.jufa-kyusyu.jp"+values[i])
.evaluate(function () {
var abc = document.querySelector('iframe[class="autoHeight"]').src.toString()
return abc;
})
.then(function (result) {
console.log(result)
})
.catch(function (error) {
console.error('Search failed:', error);
});}
})
.catch(function (error) {
console.error('Search failed:', error);
});
I want to scrapy the web information by nightmarejs looply.I dont know why have two result link is same and the result is changed in running every time.thank you.
You have to be careful when working with async calls inside a loop with Nightmare
Check this answer and this detailed explanation about the concept.
The main idea can be sumarized by this sentence:
Executing the operations in series requires arranging them to execute
in sequential order
The documentation shows how to achieve that using plain, vanilla js and also with vo
Here is a sneak peek on how to solve this loop issue with plain Javascript:
var urls = ['http://example1.com', 'http://example2.com', 'http://example3.com'];
urls.reduce(function(accumulator, url) {
return accumulator.then(function(results) {
return nightmare.goto(url)
.wait('body')
.title()
.then(function(result){
results.push(result);
return results;
});
});
}, Promise.resolve([])).then(function(results){
console.dir(results);
});
Basically what you need to do is queue all your calls in a list and trigger them using Promise.resolve

AngularJs $resource save throwing ERR_CONTENT_LENGTH_MISMATCH

Using AngularJs $resource for updating the existing data. calling save on resource.
This is my service.
app.factory('SubscriptionsService', function ($resource, $q) {
// $resource(url, [paramDefaults], [actions], options);
var resource = $resource('/api/subscriptions');
var factory = {};
factory.updateSubscriptions = function (updatedSubscriptions) {
console.log("SubscriptionsService: In updateSubscriptions. "+JSON.stringify( updatedSubscriptions));
var deferred = $q.defer();
resource.save(updatedSubscriptions,
function(response){
deferred.resolve(response);
},
function(response){
deferred.reject(response);
}
);
return deferred.promise;
}
return factory;
});
And API looks like the following.
exports.update = function (req, res) {
console.log("In Subscriptions API: Update invoked.");
if (req.body == null || req.body.items == null || req.body.items == 'undefined') {
return res.json({"status":0,"message":"no items present."});
}
var items = req.body.items;
console.log("About to call update on db."+items);
db.update(items,function(error,result,failedItems){
if(error)
return res.json({"status":-1,"message":error.message,"failedItem":failedItems});
return res.json({"status":1,"message":result,"failedItem":failedItems});
})
}
And controller as follows
$scope.confirmUpdates = function () {
if ($scope.updatedItems.length > 0) {
var updatedSubscriptionLevels = { "items": $scope.updatedItems };
var promise = SubscriptionsService.updateSubscriptions(updatedSubscriptionLevels);
promise.then(
function(response){
console.log("Response received from API.");
var statusCode = response.status;
console.log("statusCode: "+statusCode);
},
function(message){
console.log("Error message: "+message);
}
);
}
}
Strange part is data is getting updated. But promise receiving an error.
POST xxxxxx/testapi/subscriptions net::ERR_CONTENT_LENGTH_MISMATCH
I doubt I am not implementing/consuming resource in a right way. Can anybody advise?
Thanks.
I think I found the mistake.
There were couple of things I was not handling in a right way.
I didn't post my db code in mt question where the root cause was present.
if(itemsToUpdate!=null && itemsToUpdate.length>0){
var failedItems = []; // Holds failed items while updating.
console.log("Items to update: "+itemsToUpdate);
for(var index=0;index<itemsToUpdate.length;index++){
var item = itemsToUpdate[index];
console.log(item);
subscriptionLevels.update(
{_id:item._id},
{$set:{title:item.title,daysValid:item.daysValid,subscriptionFee:item.subscriptionFee}},
function(error,rowsEffected){
if(error){
console.log(error);
// Error occurred. Push it to failed items array.
failedItems.push({"title":item.title,"message":error.message});
}
else{
console.log("Number of rows updated: "+rowsEffected);
}
}
);
}
callback(null,"SUCCESS",failedItems);
}
else {
callback(null, "NO_ITEMS", []);
}
I missed if - else condition and invoking callback multiple times which was causing the issue.

Resources