Node.JS downloading hundreds of files simultaneously - node.js

I am trying to download more that 100 files at the same time. But when I execute the downloading function my macbook freezes(unable to execute new tasks) in windows also no download(but doesn't freeze) and no download progress in both case(idle network).
Here is my download module:
var express = require('express');
var router = express.Router();
var fs = require('fs');
var youtubedl = require('youtube-dl');
var links = require('../models/Links');
router.get('/', function (req, res, next) {
links.find({dlStatus: false}, function (err, docs) {
if (err) {
console.log(err);
res.end();
} else if (!docs) {
console.log('No incomplete downloads!');
res.end();
} else {
for (var i = 0; i < docs.length; i++) {
//todo scraping
var video = youtubedl(docs[i].url, [], {cwd: __dirname});
// Will be called when the download starts.
video.on('info', function (info) {
console.log('Download started');
console.log(info);
});
video.pipe(fs.createWriteStream('./downloads/' + docs[i].id + '-' + i + '.mp4'));
video.on('complete', function complete(info) {
links.findOneAndUpdate({url: info.webpage_url}, {dlStatus: true}, function (err, doc) {
if (err)console.log(err);
else console.log('Download completed!')
});
});
}
}
});
});
module.exports = router;
Now can anyone please help me here? I am using this module for downloading files.

The solution is using async in this case.
Try it this way....with async.each()
var express = require('express');
var router = express.Router();
var fs = require('fs');
var youtubedl = require('youtube-dl');
var links = require('../models/Links');
var async = require('async')
router.get('/', function (req, res, next) {
links.find({dlStatus: false}, function (err, docs) {
if (err) {
console.log(err);
res.end();
} else if (!docs) {
console.log('No incomplete downloads!');
res.end();
} else {
async.each(docs,function(doc,cb){
var video = youtubedl(doc.url, [], {cwd: __dirname});
// Will be called when the download starts.
video.on('info', function (info) {
console.log('Download started');
console.log(info);
});
video.pipe(fs.createWriteStream('./downloads/' + docs.id + '-' + i + '.mp4'));
video.on('complete', function complete(info) {
links.findOneAndUpdate({url: info.webpage_url}, {dlStatus: true}, function (err, doc) {
if (err){
console.log(err);
cb(err);
}
else {
console.log('Download completed!');
cb()
}
});
});
},function(err){
if(err)
return console.log(err);
console.log("Every thing is done,Here!!");
})
}
});
});
module.exports = router;
And you can process every thing in batch too using async.eachLimits().

Related

sending multiple respone from nodejs using python-shell

I am trying to execute few python script inside nodejs. The code is shown below. What I am trying to do is executing different python script inside a for loop one by one. and send the json response to client as soon as one script gets over.
var PythonShell = require('python-shell');
var express = require('express'), app = express();
app.get('/', function (req, res) {
res.setHeader('Content-Type', 'text/html');
pl_list=["test", "test2"]
for (var i=0; i<= pl_list.length-1; i++) {
output="";
var pyshell = new PythonShell('./'+pl_list[i]+'.py')
pyshell.on('message', function (message)
{console.log(message);output+=message;});
pyshell.end(function (err) {
if (err){
console.log('error occured ---- '+err);
}
else{
console.log('update finished');
res.write(JSON.stringify({"finsihed":true, "product_line":pl_list[i]}));
}
});
}
//res.end()
});
app.listen(5000, function () {
console.log('The web server is running. Please open http://localhost:5000/ in your browser.');
});
unfortunately I am getting the response as {"finsihed":true} actual output must be
{"finsihed":true, "product_line":"test"}{"finsihed":true, "product_line":"test2"}
can anybody tell me what I am doing wrong here. Thanks in advance!
The execution of your python scripts is asynchronous, so when you write the response to the client with this line, the value of i changed:
res.write(JSON.stringify({"finsihed":true, "product_line":pl_list[i]})
Just display the value of i with console.log before the above line and you will see that i equals 2 twice (due to the increment of your for-loop). And because pl_list[i] is undefined, the serialization of a JSON object removes the attribute "product_line".
If you want to "save" the value of i, you have to learn what closure is.
This code should work:
var PythonShell = require('python-shell');
var express = require('express'),
app = express();
app.get('/', function (req, res) {
res.setHeader('Content-Type', 'text/html');
var nbFinishedScripts = 0;
pl_list = ["test", "test2"]
for (var i = 0; i <= pl_list.length - 1; i++) {
output = "";
var pyshell = new PythonShell('./' + pl_list[i] + '.py')
pyshell.on('message', function (message)
{
console.log(message);
output += message;
});
// closure
(function (i) {
return function () {
pyshell.end(function (err) {
if (err) {
console.log('error occured ---- ' + err);
} else {
console.log('update finished');
res.write(JSON.stringify({
"finsihed": true,
"product_line": pl_list[i]
}));
}
nbFinishedScripts++;
// end the reponse when the number of finished scripts is equal to the number of scripts
if (nbFinishedScripts === pl_list.length) {
res.end();
}
});
};
})(i)(); // immediately invoke the function
}
});
app.listen(5000, function () {
console.log('The web server is running. Please open http://localhost:5000/ in your browser.');
});
Edit code:
var PythonShell = require('python-shell');
var express = require('express'),
app = express();
var executePythonScript = function (script) {
return new Promise(function (resolve, reject) {
var pyshell = new PythonShell('./' + script + '.py');
pyshell.end(function (err) {
if (err) {
reject(err);
} else {
resolve(script);
}
});
});
};
app.get('/', function (req, res) {
res.setHeader('Content-Type', 'text/html');
var pl_list = ["test", "test2"];
Promise
.all(pl_list.map(executePythonScript))
.then(function (scripts) {
scripts.forEach(function (script) {
res.write(JSON.stringify({
finsihed: true,
product_line: script
}));
});
res.end();
})
.catch(function (err) {
res.end();
});
});
app.listen(5000, function () {
console.log('The web server is running. Please open http://localhost:5000/ in your browser.');
});

async.eachSeries runs only once with async.waterfall inside for each iteration

I am new to async library. I have used async.eachSeries and async.waterfall for each iteration. I see, the async.waterfall runs only once.
Here is my code :
var fs = require('fs'),
async = require('async'),
Client = require('node-rest-client').Client;
// REST API Call and output in jsonOutput.results
console.log(jsonOutput.results.length); // jsonOutput.results has 124 records.
async.eachSeries(jsonOutput.results, function(account, callback) {
var dataObject = {};
dataObject.updatetime = new Date();
var setAccountInfoURL = ""; // Data Update REST API Request
async.waterfall([
function setAccountInfo(updateCallback) {
// client.get(setAccountInfoURL, function (data, response) {
// var jsonOutput = JSON.parse(data.toString('utf8'));
updateCallback(null, "output", account)
// });
},
function saveAccountInfo(jsonOutput, account, updateCallback) {
var debuglog = JSON.stringify(account) + "\n" + jsonOutput;
fs.appendFile("debuginfo.json", debuglog + "\n", function (err) {
if(err) {
console.log(err);
}
console.log("JSON saved to " + "debuginfo.json");
updateCallback(null);
});
}
],function asyncComplete(err) {
if (err) {
console.warn('Error setting account info.', err);
}
console.log('async completed');
});
}, function(err){
if (err) {
console.log('error in loop');
}
console.log('loop completed');
});
Output:
124
JSON saved to debuginfo.json
async completed
Any help is really appreciated.
I found my mistake. I missed calling the callback after each iteration just after async is completed.
var fs = require('fs'),
async = require('async'),
Client = require('node-rest-client').Client;
// REST API Call and output in jsonOutput.results
console.log(jsonOutput.results.length); // jsonOutput.results has 124 records.
async.eachSeries(jsonOutput.results, function(account, callback) {
var dataObject = {};
dataObject.updatetime = new Date();
var setAccountInfoURL = ""; // Data Update REST API Request
async.waterfall([
function setAccountInfo(updateCallback) {
// client.get(setAccountInfoURL, function (data, response) {
// var jsonOutput = JSON.parse(data.toString('utf8'));
updateCallback(null, "output", account)
// });
},
function saveAccountInfo(jsonOutput, account, updateCallback) {
var debuglog = JSON.stringify(account) + "\n" + jsonOutput;
fs.appendFile("debuginfo.json", debuglog + "\n", function (err) {
if(err) {
console.log(err);
}
console.log("JSON saved to " + "debuginfo.json");
updateCallback(null);
});
}
],function asyncComplete(err) {
if (err) {
console.warn('Error setting account info.', err);
}
console.log('async completed');
callback(null); // this is the change.
});
}, function(err){
if (err) {
console.log('error in loop');
}
console.log('loop completed');
});

rendering parties of other site with express

I have a basic Express application with one function that uses nodejs request and takes some divs using selectors. After that, I want to render this with jade.
var express = require('express');
var voc = require('vocabulaire');
var async = require('async');
var router = express.Router();
router.get('/', function (req, res) {
res.render('index', {title: 'Espace de la diffusion'});
});
var result;
router.get('/search/:mot', function (req, res) {
async.series([
function () {
result = main(['conj', req.params.mot]);
console.log('in 1');
},
function () {
res.render('index', {title: 'Espace de la diffusion', data: result});
res.send(html);
console.log('in 2');
},
]);
});
module.exports = router;
var request = require('request')
, cheerio = require('cheerio');
function doit(verbe, result) {
var url = 'http://www.babla.ru/%D1%81%D0%BF%D1%80%D1%8F%D0%B6%D0%B5%D0%BD%D0%B8%D1%8F/%D1%84%D1%80%D0%B0%D0%BD%D1%86%D1%83%D0%B7%D1%81%D0%BA%D0%B8%D0%B9/' + verbe;
request(url, function (err, resp, body) {
$ = cheerio.load(body);
var temps = $('.span4.result-left h5');
if (temps.length == 0) {
console.log('results not found');
}
else {
console.log('result found');
debugger;
return $('.span4.result-left');
}
});
}
function main(arg) {
switch (arg[0]) {
case 'conj':
return doit(arg[1]);
break;
default:
console.log('unknown parameter');
break;
}
}
I used async library for be sure that my result is ready to be rendered but in console I see next:
GET /search/est - - ms - -
in 1
result found
and debugger followed me to nodejs function makeTick()..
I don't know what to do.. help me please.
Your async.series() functions are missing the callback parameter that you need to call in order for the next function to execute. However, you don't really need async to just do a single async task:
main(['conj', req.params.mot], function(err, result) {
res.render('index', {title: 'Espace de la diffusion', err: err, data: result});
});
// ...
function doit(verbe, result, callback) {
var url = 'http://www.babla.ru/%D1%81%D0%BF%D1%80%D1%8F%D0%B6%D0%B5%D0%BD%D0%B8%D1%8F/%D1%84%D1%80%D0%B0%D0%BD%D1%86%D1%83%D0%B7%D1%81%D0%BA%D0%B8%D0%B9/' + verbe;
request(url, function (err, resp, body) {
if (err)
return callback && callback(err);
$ = cheerio.load(body);
var temps = $('.span4.result-left h5');
if (temps.length == 0) {
callback && callback();
}
else {
callback && callback(null, $('.span4.result-left'));
}
});
}
function main(arg, callback) {
switch (arg[0]) {
case 'conj':
doit(arg[1], callback);
break;
default:
callback && callback(new Error('unknown parameter'));
break;
}
}

"object is not a function" error during MongoDB document insertion from a CSV stream using async.queue

I'm trying MongoDB document insertion from a CSV stream using async.queue.
But I face this following error. I've tried all the remedies given in similar SO posts.
Exact error message is:
C:\Users\admin\node_modules\mongodb\lib\mongo_client.js:406
throw err
TypeError:object is not a function
at C:\Users\admin\Desktop\mynodefile.js:13:2
at C:\Users\admin\node_modules\mongodb\lib\mongo_client.js:403:11
at process._tickCallback(node.js:355:11)
node.js code I used:
var csv = require('csv');
var async = require('async');
var fs = require('fs');
var MongoClient = require('mongodb').MongoClient;
MongoClient.connect('mongodb://localhost:27017', function(err, db) {
if (err) throw err;
var collection = db.collection('myCSVs');
var queue = async.queue(collection.insert.bind(collection), 5);
csv()
.from.path('./input.csv', { columns: true })
.transform(function (data, index, cb) {
queue.push(data, function (err, res) {
if (err) return cb(err);
cb(null, res[0]);
});
})
.on('error', function (err) {
console.log('ERROR: ' + err.message);
})
.on('end', function () {
queue.drain = function() {
collection.count(function(err, count) {
console.log('Number of documents:', count);
db.close();
});
};
});
});
You haven't mentioned the database name in your MongoClient.connect function call. You can do so like this:
MongoClient.connect('mongodb://localhost:27017/database_name',function(err, db) {
Then you can do:
var collection = db.collection('myCSVs');
If myCSVs is a collection inside database_name
Or you can also do:
MongoClient.connect('mongodb://localhost:27017',function(err, mongoclient) {
var db = mongoclient.db('database_name');
var collection = db.collection('myCSVs');
});
You have to change
var queue = async.queue(collection.insert.bind(collection), 5);
Into:
var q = async.queue(function (task, callback) {
console.log('hello ' + task.name);
callback();
}, 2);
IN this line :
queue.push(data, function (err, res) {
if (err) return cb(err);
cb(null, res[0]);
});
you are calling push with data and with a callback, but its not implemented in your
var queue = async.queue(collection.insert.bind(collection), 5);

Closure find node.js, mongodb, express

Please help me. I need variable in my search use post:
app.post('/find', function(req, res) {
var id_school = req.body.std_id;
console.log('show '+ id_sekolah);
db.collection('ak_test_score', function(err, collection) {
collection.find({'std_id':id_school}).toArray(function(err, level) {
var a = level.std_id;
var b = level.school_name;
});
});
res.redirect('/test_score'); // send to my page to get
};
var test = a; // not defined variable a not have
app.get('/test_score', function(req, res) {
var id_school = test;
console.log('show '+ id_sekolah);
db.collection('ak_test_score', function(err, collection) {
collection.find({'std_id':id_school}).toArray(function(err, level) {
res.send(level)
});
});
};
I am using this for a website search using post.
app.post('/find', function(req, res) {
var id_school = req.body.std_id;
console.log('show '+ id_sekolah);
db.collection('ak_test_score', function(err, collection) {
collection.find({'std_id':id_school}).toArray(function(err, level) {
var a = level.std_id;
app.set('data',a);
var b = level.school_name;
});
});
res.redirect('/test_score'); ///// send to my page to get
};
app.get('/test_score', function(req, res) {
var id_school = app.get('data');
console.log('show '+ id_sekolah);
db.collection('ak_test_score', function(err, collection) {
collection.find({'std_id':id_school}).toArray(function(err, level) {
res.send(level)
});
});
};

Resources