rendering parties of other site with express - node.js

I have a basic Express application with one function that uses nodejs request and takes some divs using selectors. After that, I want to render this with jade.
var express = require('express');
var voc = require('vocabulaire');
var async = require('async');
var router = express.Router();
router.get('/', function (req, res) {
res.render('index', {title: 'Espace de la diffusion'});
});
var result;
router.get('/search/:mot', function (req, res) {
async.series([
function () {
result = main(['conj', req.params.mot]);
console.log('in 1');
},
function () {
res.render('index', {title: 'Espace de la diffusion', data: result});
res.send(html);
console.log('in 2');
},
]);
});
module.exports = router;
var request = require('request')
, cheerio = require('cheerio');
function doit(verbe, result) {
var url = 'http://www.babla.ru/%D1%81%D0%BF%D1%80%D1%8F%D0%B6%D0%B5%D0%BD%D0%B8%D1%8F/%D1%84%D1%80%D0%B0%D0%BD%D1%86%D1%83%D0%B7%D1%81%D0%BA%D0%B8%D0%B9/' + verbe;
request(url, function (err, resp, body) {
$ = cheerio.load(body);
var temps = $('.span4.result-left h5');
if (temps.length == 0) {
console.log('results not found');
}
else {
console.log('result found');
debugger;
return $('.span4.result-left');
}
});
}
function main(arg) {
switch (arg[0]) {
case 'conj':
return doit(arg[1]);
break;
default:
console.log('unknown parameter');
break;
}
}
I used async library for be sure that my result is ready to be rendered but in console I see next:
GET /search/est - - ms - -
in 1
result found
and debugger followed me to nodejs function makeTick()..
I don't know what to do.. help me please.

Your async.series() functions are missing the callback parameter that you need to call in order for the next function to execute. However, you don't really need async to just do a single async task:
main(['conj', req.params.mot], function(err, result) {
res.render('index', {title: 'Espace de la diffusion', err: err, data: result});
});
// ...
function doit(verbe, result, callback) {
var url = 'http://www.babla.ru/%D1%81%D0%BF%D1%80%D1%8F%D0%B6%D0%B5%D0%BD%D0%B8%D1%8F/%D1%84%D1%80%D0%B0%D0%BD%D1%86%D1%83%D0%B7%D1%81%D0%BA%D0%B8%D0%B9/' + verbe;
request(url, function (err, resp, body) {
if (err)
return callback && callback(err);
$ = cheerio.load(body);
var temps = $('.span4.result-left h5');
if (temps.length == 0) {
callback && callback();
}
else {
callback && callback(null, $('.span4.result-left'));
}
});
}
function main(arg, callback) {
switch (arg[0]) {
case 'conj':
doit(arg[1], callback);
break;
default:
callback && callback(new Error('unknown parameter'));
break;
}
}

Related

How to properly assign payload to GET function using express.js

I am trying currently learning to build crawler using node + express +cheerio.
In the route I put this:
[index.js]
app.get('/api/crawler/android', crawlerController.android);
which calls into controller
[crawler-controller.js]
var androidCrawler = require('../crawlers/android')
module.exports.android = androidCrawler.androidget;
then I invoke the crawler (based on cheerio)
[crawler.js]
var request = require('request');
var cheerio = require('cheerio');
var androidget =request('https://www.developer-tech.com/categories/Android/', function (error, response, html){
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var result = {result:[]};
$('article').each(function (i, element) {
var Title = $(this).find("h2").text();
var Link = $(this).find("a").attr("href");
var Image = $(this).find("img").attr("src");
var payload = {
"Title":Title,
"Link":Link,
"Image":Image
};
result['result'].push(payload);
});
console.log("aaa", result);
console.log(typeof result);
return result;
}});
module.exports = {
getAndroid: function (androidget, res) {
res.send(JSON.stringify(result));
}
}
When I console log directly to crawler.js via terminal it return JSON object properly, but I think the way I export the function to be invoked by app.get is where I'm wrong and I can't figure it out.
Perhaps somebody could help me to properly invoke the crawler in my case?
There is no point of returning a result in a callback function, this will just do nothing.
What you can do is wrap your request in a function and call a callback that you create :
// file.js
const wrapFunction = (url, callback) => {
request(url, ((error, response, html) => {
// ...
callback(result);
})
}
and then use it :
// just an example
wrapFunction(yourUrl, (result) => {
// deal with your result
})
When you have that, you can export it and then use it in your middleware / controller :
// file.js
module.exports = wrapFunction;
// index.js
const wrapFunction = require('file.js'); // here is your function
app.get('/yourRoute', (req, res) => {
wrapFunction(yourUrl, (result) => {
res.send(JSON.stringify(result));
});
})
You can also use Promises :
const wrapFunction = (url) => {
return new Promise((resolve, reject) => {
request(url, ((error, response, html) => {
if (error) reject(error);
resolve(result);
});
});
};
And then :
wrapFunction(yourUrl).then(result => {
// deal with your result ...
}).catch(error => {
// deal with your error ...
});
Hope it helps,
Best regards

Mocha with chai and supertest: expected undefined to equal

I wrote the unit tests:
var app = require('../server');
var chai = require('chai');
var supertest = require("supertest")(app);
var GoogleUrl = require('google-url');
var config = require('../config');
var expect = chai.expect;
describe('Urls Tests', function () {
var url = {
author : 'Alexey',
description : 'grrggr',
full_url : 'https://github.com',
date : '30-06-2017',
time : '18:21:27',
count_click : 0,
list_tags : [
'Sport',
'Football'
]
};
var token;
beforeEach(function (done) {
agent
.post('http://localhost:8000/auth/login')
.send({email: 'Keane95#yandex.ru', password: '123456'})
.end(function (err, res) {
if (err) {
return done(err);
}
expect(res.body.userData).to.have.property('token');
token = res.body.userData.token;
done();
});
});
it('should create a url', function(done) {
var googleUrl = new GoogleUrl({
'key': config.get('google_key')
});
googleUrl.shorten(url.full_url, function (err, shortUrl) {
url.short_url = shortUrl;
supertest
.post('/urls/create')
.send(url)
.expect(401)
.end(function (err, res) {
if (err) return done(err);
expect(res.body.author).to.equal('Alexey');
url = res.body;
done();
});
});
});
it('should modify a url by id', function(done) {
url.description = 'Good description';
url.list_tags.push('Liverpool');
supertest
.put('/urls/' + url._id)
.send(url)
.expect(401)
.end(function(err, res) {
if (err) return done(err);
expect(res.body.description).to.equal('Good description');
expect(res.body.list_tags[2]).to.equal('Liverpool');
done();
});
});
it('should modify a count of clicks', function(done) {
url.count_click++;
supertest
.put('/urls/' + url._id)
.send(url)
.expect(401)
.end(function(err, res) {
if (err) return done(err);
expect(res.body).to.equal('Count of the click is updated');
done();
});
});
});
I run to execute the unit tests and get the errors:
I read the articles by unit tests.
First article: http://developmentnow.com/2015/02/05/make-your-node-js-api-bulletproof-how-to-test-with-mocha-chai-and-supertest/
Second article: https://www.codementor.io/olatundegaruba/integration-testing-supertest-mocha-chai-6zbh6sefz
I don't understand why I get these errors. Please, help me. I think that I made little error, but since I cannot fint it.
UPDATED
I added route:
var express = require('express');
var GoogleUrl = require('google-url');
var _ = require('lodash');
var token = require('../middlewares/token');
var Url = require('../models/url');
var config = require('../config');
var router = express();
router.post('/create', token.required, createShortUrl);
router.put('/count/:id', token.required, updateCountClick);
router.put('/:id', token.required, updateUrlById);
module.exports = router;
function createShortUrl(req, res) {
_.trim(req.body.list_tags);
var tags = _.split(req.body.list_tags, ',');
tags.splice(tags.length - 1, 1);
var date = returnDate();
var time = returnTime();
var googleUrl = new GoogleUrl({
'key': config.get('google_key')
});
googleUrl.shorten(req.body.full_url, function (err, shortUrl) {
if (err) {
res.status(500).json(err);
}
var url = new Url({
'author': req.payload.username,
'description': req.body.description,
'full_url': req.body.full_url,
'short_url': shortUrl,
'list_tags': tags,
'date': date,
'time': time
});
url.save(function (err, url) {
if (err) {
return res.status(500).json(err);
} else {
return res.status(200).json(url);
}
});
});
}
function updateCountClick(req, res) {
var count_click = req.body.count_click + 1;
Url.findOneAndUpdate({_id: req.params.id}, {$set: {count_click: count_click}}, {new: true}, function (err, url) {
if (err) {
return res.status(500).json(err);
}
if (url) {
return res.status(200).json('Count of the click is updated');
}
});
}
function updateUrlById(req, res) {
_.trim(req.body.list_tags);
var tags = _.split(req.body.list_tags, ',');
tags.splice(tags.length - 1, 1);
Url.findOneAndUpdate({_id: req.params.id}, {$set: {description: req.body.description, list_tags: tags}}, {new: true}, function (err, url) {
if (err) {
res.status(500).json(err);
}
if (url) {
res.status(200).json(url);
}
});
}
UPDATED 2
Authoziration was added:
var token;
beforeEach(function (done) {
agent
.post('http://localhost:8000/auth/login')
.send({email: 'Keane95#yandex.ru', password: '123456'})
.end(function (err, res) {
if (err) {
return done(err);
}
expect(res.body.userData).to.have.property('token');
token = res.body.userData.token;
done();
});
});
Also I updated code my unit-tests.
I can't see where in your code you send 401 and Url. So it seems that your test requests are getting rejected by token.required middleware with 401 status code (which means "unauthorized").
.send(url)
.expect(401) // why do you expect 401? You never send it inside your logic
So basically your test never hit actual code.
First of all, you do need to fake authorization to make token.required middleware happy.
Then expect 200 result
.send(url)
.expect(200) // normal execution flow of createShortUrl results in 200
.end(/* rest of your test logic */)

Node.JS downloading hundreds of files simultaneously

I am trying to download more that 100 files at the same time. But when I execute the downloading function my macbook freezes(unable to execute new tasks) in windows also no download(but doesn't freeze) and no download progress in both case(idle network).
Here is my download module:
var express = require('express');
var router = express.Router();
var fs = require('fs');
var youtubedl = require('youtube-dl');
var links = require('../models/Links');
router.get('/', function (req, res, next) {
links.find({dlStatus: false}, function (err, docs) {
if (err) {
console.log(err);
res.end();
} else if (!docs) {
console.log('No incomplete downloads!');
res.end();
} else {
for (var i = 0; i < docs.length; i++) {
//todo scraping
var video = youtubedl(docs[i].url, [], {cwd: __dirname});
// Will be called when the download starts.
video.on('info', function (info) {
console.log('Download started');
console.log(info);
});
video.pipe(fs.createWriteStream('./downloads/' + docs[i].id + '-' + i + '.mp4'));
video.on('complete', function complete(info) {
links.findOneAndUpdate({url: info.webpage_url}, {dlStatus: true}, function (err, doc) {
if (err)console.log(err);
else console.log('Download completed!')
});
});
}
}
});
});
module.exports = router;
Now can anyone please help me here? I am using this module for downloading files.
The solution is using async in this case.
Try it this way....with async.each()
var express = require('express');
var router = express.Router();
var fs = require('fs');
var youtubedl = require('youtube-dl');
var links = require('../models/Links');
var async = require('async')
router.get('/', function (req, res, next) {
links.find({dlStatus: false}, function (err, docs) {
if (err) {
console.log(err);
res.end();
} else if (!docs) {
console.log('No incomplete downloads!');
res.end();
} else {
async.each(docs,function(doc,cb){
var video = youtubedl(doc.url, [], {cwd: __dirname});
// Will be called when the download starts.
video.on('info', function (info) {
console.log('Download started');
console.log(info);
});
video.pipe(fs.createWriteStream('./downloads/' + docs.id + '-' + i + '.mp4'));
video.on('complete', function complete(info) {
links.findOneAndUpdate({url: info.webpage_url}, {dlStatus: true}, function (err, doc) {
if (err){
console.log(err);
cb(err);
}
else {
console.log('Download completed!');
cb()
}
});
});
},function(err){
if(err)
return console.log(err);
console.log("Every thing is done,Here!!");
})
}
});
});
module.exports = router;
And you can process every thing in batch too using async.eachLimits().

Node/Express function and callback are not breaking with return

I am creating a 'refresh data' function in Node and I cannot figure out where to place the callbacks and returns. The function continues to run. Below is a list of things the function should do. Could someone help out?
Check if a user has an api id in the local MongoDB
Call REST api with POST to receive token
Store token results in a MongoDB
Terminate function
./routes/index.js
router.post('/refresh', function(req, res) {
var refresh = require('../api/refresh');
refresh(req, function() { return console.log('Done'); });
});
../api/refresh.js
var callToken = require('./calltoken');
var User = require('../models/user'); // Mongoose Schema
module.exports = function(req, callback) {
User.findOne( {'username':req.body.username}, function(err, user) {
if(err) { console.log(err) }
if (user.api_id == 0) {
callToken.postToken(req.body.username, callback);
} else { // Do something else }
});
};
./calltoken.js
var request = require('request');
var Token = require('../models/token'); // Mongoose Schema
module.exports = {
postToken: function(user, callback) {
var send = {method:'POST', url:'address', formData:{name:user} };
request(send, function(err, res, body) {
if(err) { console.log(err) }
if (res.statusCode == 201) {
var newToken = new Token();
newToken.token = JSON.parse(body).access_token['token'];
newToken.save(function(err) {
if(err) { console.log(err) }
return callback();
});
}
});
}
};
I'm not an expert in Express but everywhere in you code in lines with if(err) { console.log(err) } you should stop execution (maybe of course not - up to you app) and return 400 or 500 to client. So it can be something like
if(err) {
console.log(err);
return callback(err); // NOTICE return here
}
On successful execution you should call return callback(null, result). Notice null as a first argument - it is according nodejs convention (error always goes as first argument).

Need to call two apis using node,js asynchronously and aggregate the response from both the apis

The code I wrote so far is as below.
I don't need the whole response but just part of it.
var request = require('request');
var async = require('async');
var asyncTasks = [];
var install;
async.series([
function (callback) {
setTimeout(function () {
request('URL', function (error, response, body) {
if (!error && response.statusCode == 200) {
console.log(body); // Show the HTML for the Google homepage.
}
});
}, 5000);
},
function (callback) {
setTimeout(function () {
request('URL', function (error, response, body) {
if (!error && response.statusCode == 200) {
console.log(body); // Show the HTML for the Google homepage.
}
});
}, 5000);
}
],
function (error, results) {
console.log(results);
});
One approach to do the above concurrently would be to use async.parallel - of the form of:
async.parallel([
function(){ ... },
function(){ ... }
], callback);
Another approach is to use a Promises library - BlueBird or Q are good choices.
Q.All is of the form of:
return Q.all([
promise1,
promise2
]).spread(function (resultFromPromise1, resultFromPromise2) {
// do something with the results...
});
You could use one of these approaches to parallelise the two calls. The outputs of each will give you an array containing the results of each call respectively.
Here is a simple illustration of each approach:
Using Async.js
var async = require('async');
var task = function (cb, count) {
setTimeout(function () {
cb(null, "complete: " + count);
}, 1000);
};
async.parallel([
function (cb) {
task(cb, 'one');
},
function (cb) {
task(cb, 'two');
}
], function (err, results) {
console.log(results);
//[ 'complete: one', 'complete: two' ]
});
Using Q:
var Q = require('q');
function task1(cb, count) {
var deferred = Q.defer();
setTimeout(function () {
return deferred.resolve(cb(null, count));
}, 1000);
return deferred.promise;
}
var myCb = function (err, count) {
return "complete: " + count;
};
Q.all([task1(myCb, 'one'), task1(myCb, 'two')])
.then(function (results) {
console.log(results);
//[ 'complete: one', 'complete: two' ]
});
Let me know if you are unclear.
Promises are there to help you out in such a case.
I would prefer to use 'Q' library.
I have modified your code to use Q library
var Q = require('q');
var request = require('request');
function makeCall() {
Q.all([req1(), req2()])
.spread(function (res1, res2) {
// This block is executed once all the functions( Provided in Q.all() ) are finished its execution.
// Use responses from called functions
}, function (err) {
// Error, If any
});
}
function req1() {
var defer = Q.defer();
var url = ''; // Specify URL
var options = {
method: 'get', // Method to use
url: url
}
request(options, function (err, res, body) {
if (err) {
return defer.reject(err);
}
return defer.resolve(body);
});
return defer.promise;
}
function req2() {
var defer = Q.defer();
var url = ''; // Specify URL
var options = {
method: 'get', // Method to use
url: url
}
request(options, function (err, res, body) {
if (err) {
return defer.reject(err);
}
return defer.resolve(body);
});
return defer.promise;
}
You can find docs for Q library here : Q docs

Resources