Web Crawling Using Node js - node.js

I am scraping medium.com. I am trying to display all the links present on the website.
var url="https://medium.com/";
request(url,function(error,response,html){
if(!error && response.statusCode==200){
var $=cheerio.load(html);
var json={content:"",link:""};
var jsonObjects=[];
var links=$('a');
//console.log(links);
$(links).each(function(i,link){
json.content=$(link).text();
json.link=$(link).attr('href');
jsonObjects.push(json);
});
}
fs.writeFile('Links.json',JSON.stringify(jsonObjects,null,4),function(err){
if(!err){
res.send("File written successfully!!!!")
}
})
})
When I am using this code, only one link is showing up again an again in Links.json file.
[
{
"content": "About",
"link": "https://about.medium.com"
},
{
"content": "About",
"link": "https://about.medium.com"
},
{
"content": "About",
"link": "https://about.medium.com"
},
{
"content": "About",
"link": "https://about.medium.com"
},
{
"content": "About",
"link": "https://about.medium.com"
},
{
"content": "About",
"link": "https://about.medium.com"
},
{
"content": "About",
"link": "https://about.medium.com"
},
{
I have tried everything but not getting even the links of main page of medium.com
Please help.

It's because you keep on editing the same object and pushing it into the array again and again.
var json={content:"",link:""};
should be inside your each loop.
var url="https://medium.com/";
request(url,function(error,response,html){
if(!error && response.statusCode==200){
var $=cheerio.load(html);
var jsonObjects=[];
var links=$('a');
//console.log(links);
$(links).each(function(i,link){
var content = $(link).text();
var link = $(link).attr('href');
jsonObjects.push({ content: content, link: link });
});
}
fs.writeFile('Links.json',JSON.stringify(jsonObjects,null,4),function(err){
if(!err){
res.send("File written successfully!!!!")
}
})
})

Related

Mongodb Schema issue

Here i am using Nodejs application to get JSON response from external API.I need to capture few key-value pair of this response and need to save it MongoDB.
I am getting the response properly, but i am unable to save the data in database.
Requirement:
Each time i get this response from External server , i need to save it in table by rewriting any documents if already exists in this collection. Here i have nearly 7 array items in json response , i need to save corresponding key value pair from all of the items automatically .
Model:
var mongoose = require('mongoose');
const getAllUsersDataSchema = new mongoose.Schema({
userRefID:[] ,
userName:[],
divisionId: [{}],
divisionName:[{}],
emailId :[{}],
})
module.exports = getAllUsers = mongoose.model('getAllUsers',getAllUsersDataSchema );
**API Call where i am capturing external API response:**
const express = require('express');
const router = express.Router();
const request = require('request');
const config = require('../config');
const fs = require('fs');
const getAllUsers = require ('../db/getAllUsersListmodel');
var mongoose = require('mongoose');
mongoose.connect ('mongodb://localhost/testdb',{ useUnifiedTopology: true , useNewUrlParser: true });
router.get('/', (req, res) => {
// token in session -> get user data and send it back to the Angular app
var data =fs.readFileSync('../teamlist.txt', {encoding:'utf8'} )
console.log(data);
if (data) {
request(
{
method: 'GET',
url: 'https://api.mypurecloud.com/api/v2/users',
headers: {
'Authorization': 'Bearer ' + data
}
},
// callback
(error, response, body) => {
let userInfoResponse = JSON.parse(body);
res.send(userInfoResponse);
console.log(userInfoResponse.entities.length)
console.log(userInfoResponse.entities[0].division.id)
getAllUsers.create({
userRefID : userInfoResponse.entities.id,
userName: userInfoResponse.entities.name,
divisionId: userInfoResponse.entities.division.id,
divisionName:userInfoResponse.entities.division.name,
emailId:userInfoResponse.entities.primaryContactInfo.address
}, (error,post)=>{
console.log(error,post);
});
}
);
}
// no token -> send nothing
else {
res.send("Token Not Present - Kindly login in back");
}
//console.log(req.session.token);
});
Data is saving in DB but not getting any array data saved in to it.
{
"_id" : ObjectId("5fd998d61439a434983702cd"),
"userRefID" : [ ],
"userName" : [ ],
"__v" : 0
}
This is exact API JSON response i am trying to save it in DB and use it for future references:
{
"entities": [
{
"id": "07f426ff-506f-4e5e-afdb-2c7397edac61",
"name": "EPS Purecloud Support",
"division": {
"id": "36852a81-ad7f-4c71-a1cd-7f431c05179f",
"name": "",
"selfUri": "/api/v2/authorization/divisions/36852a81-ad7f-4c71-a1cd-7f431c05179f"
},
"chat": {
"jabberId": "5dcc25e1db8c7e19238a287d#cognizant3.orgspan.com"
},
"email": "eps#genesys.com",
"primaryContactInfo": [
{
"address": "eps#genesys.com",
"mediaType": "EMAIL",
"type": "PRIMARY"
}
],
"addresses": [],
"state": "active",
"username": "eps#genesys.com",
"version": 3,
"acdAutoAnswer": false,
"selfUri": "/api/v2/users/07f426ff-506f-4e5e-afdb-2c7397edac61"
},
{
"id": "c5ce06dc-6265-4d16-be18-f5fc5a918295",
"name": "Generic",
"division": {
"id": "36852a81-ad7f-4c71-a1cd-7f431c05179f",
"name": "",
"selfUri": "/api/v2/authorization/divisions/36852a81-ad7f-4c71-a1cd-7f431c05179f"
},
"chat": {
"jabberId": "5ebab3dba6686314f6913b98#cognizant3.orgspan.com"
},
"email": "integration-generic-a03293c0-945d-11ea-a64c-ebeb45b9d295#webhook.com",
"primaryContactInfo": [
{
"address": "integration-generic-a03293c0-945d-11ea-a64c-ebeb45b9d295#webhook.com",
"mediaType": "EMAIL",
"type": "PRIMARY"
}
],
"addresses": [],
"state": "active",
"username": "integration-generic-a03293c0-945d-11ea-a64c-ebeb45b9d295#webhook.com",
"version": 2,
"acdAutoAnswer": false,
"selfUri": "/api/v2/users/c5ce06dc-6265-4d16-be18-f5fc5a918295"
},
{
/** 3rd User *********/
}
{
/** 4th User *********/
}
],
"pageSize": 25,
"pageNumber": 1,
"total": 7,
"firstUri": "/api/v2/users?pageSize=25&pageNumber=1",
"selfUri": "/api/v2/users?pageSize=25&pageNumber=1",
"lastUri": "/api/v2/users?pageSize=25&pageNumber=1",
"pageCount": 1
}
Entities is an array of objects, but you are trying to refer to its properties as an object:
userRefID : userInfoResponse.entities.id, // but "entities": [ {"id":"...", "name":"..."} ],
userName: userInfoResponse.entities.name,
You have to collect the data in a loop, and only then insert it into the database:
const usersArray = userInfoResponse.entities.map(el => ({
userRefID : el.id,
userName: el.name,
divisionId: el.division.id,
divisionName: el.division.name,
emailId: el.primaryContactInfo[0].address
}));
getAllUsers.insertMany(usersArray)

How to build a search endpoint in a API to find and filter results from a database

In my Node API and MongoDB, I'm trying to build an endpoint to search for data in the DB and get back the results to the client. My search goal is to show results from the Profile collection and in that way, I can build my queries to search by first name, surname, company and the combination of it as an example:
GET search?fn=joe or ?ln=doe or ?cp=Company or ?fn=...&ln=...&cp=...
Practically I can search in different ways and I can get for example all the people working for a company as a result of a search.
I would like to understand how can I achieve that with Mongoose/MongoDB and add also to the query optional a limit/pagination for the coming results.
I tried to make some simple trials but I got stuck as I do not really get it how to proceed next.
const SearchController = {
async getQuery(req, res) {
try {
const { fn, ln, cp } = req.query;
const searchResult = await Profile.find({
$or: [
{ firstname: fn },
{ surname: ln },
{
experience: {
company: cp
}
}
]
});
res.status(200).json(searchResult);
} catch (err) {
res.status(500).json({ message: err.message });
}
}
};
The JSON of a profile:
{
"imageUrl": "https://i.pravatar.cc/300",
"posts": [
"5e3cacb751f4675e099cd043",
"5e3cacbf51f4675e099cd045",
"5e3cacc551f4675e099cd046"
],
"_id": "5e2c98fc3d785252ce5b5693",
"firstname": "Jakos",
"surname": "Lemi",
"email": "lemi#email.com",
"bio": "My bio bio",
"title": "Senior IT developer",
"area": "Copenhagen",
"username": "Jakos",
"experience": [
{
"image": "https://via.placeholder.com/150",
"createdAt": "2020-02-04T13:47:37.167Z",
"updatedAt": "2020-02-04T13:47:37.167Z",
"_id": "5e3975f95fbeec9095ff3d2f",
"role": "Developer",
"company": "Google",
"startDate": "2018-11-09T23:00:00.000Z",
"endDate": "2019-01-05T23:00:00.000Z",
"area": "Copenhagen"
},
{
"image": "https://via.placeholder.com/150",
"createdAt": "2020-02-04T13:59:27.412Z",
"updatedAt": "2020-02-04T13:59:27.412Z",
"_id": "5e3978bf5e399698e20c56d4",
"role": "Developer",
"company": "IBM",
"startDate": "2018-11-09T23:00:00.000Z",
"endDate": "2019-01-05T23:00:00.000Z",
"area": "Copenhagen"
},
{
"image": "https://via.placeholder.com/150",
"createdAt": "2020-02-07T16:35:43.754Z",
"updatedAt": "2020-02-07T16:35:43.754Z",
"_id": "5e3d91dfb3a7610ec6ad8ee3",
"role": "Developer",
"company": "IBM",
"startDate": "2018-11-10T00:00:00.000Z",
"endDate": "2019-01-06T00:00:00.000Z",
"area": "Copenhagen"
}
],
"createdAt": "2020-01-25T19:37:32.727Z",
"updatedAt": "2020-02-04T23:14:37.122Z",
"__v": 0
}
The expected results are for example if I search the first name Joe I should get back all the profiles having as first name Joe. Similar for surname and company.
Please provide comments to allow me to understand if you need more scripts from the original code to see.
EDITED added the code modified of the search
// Models
const { Profile } = require("../models");
// Error handling
const { ErrorHandlers } = require("../utilities");
const SearchController = {
async getQuery(req, res) {
try {
const { fn, ln, cp } = req.query;
const query = {
$or: []
};
if (fn) query.$or.push({ firstname: fn });
if (ln) query.$or.push({ surname: ln });
if (cp) query.$or.push({ "experience.company": cp });
const searchResult = Profile.find(query, docs => {
return docs
});
if ((await searchResult).length === 0)
throw new ErrorHandlers.ErrorHandler(
404,
"Query do not provided any result"
);
res.status(200).json(searchResult);
} catch (err) {
res.status(500).json({ message: err.message });
}
}
};
module.exports = SearchController;
Have tried conditional query and modified your array search query for finding the company,
function findUser(fn, ln, cp) {
const query = {
$or: []
}
if (fn) query.$or.push({ firstname: fn })
if (ln) query.$or.push({ surname: ln })
if (cp) query.$or.push({ "experience.company": cp })
Profile.find(query, function (err, docs) {
if (err) {
console.error(err);
} else {
console.log(docs);
}
});
}
findUser("","","IBM")

Multiple records for nested/embedded schema is not getting inserted in mongodb, nodejs

Hi I am new to nodejs and mongodb, I have json file with below structure,
I have defined one shipment schema with the "comments" section as nested schema
{
"buyerId": "B58",
"sellerId": "SL8",
"comments": {
"title": "title5",
"body": "body5",
"date": "12-07-2017"
}
}
I have defined one function like below
exports.post = function(req, res) {
const comments = []
var s = new shipment();
s.sellerId = req.body.sellerId;
s.buyerId = req.body.buyerId;
s.poId = req.body.poId;
s.comments.push({
title: req.body.comments.title,
body: req.body.comments.body,
date: req.body.comments.date
});
s.save(function(err) {
if (err) {
res.send(err);
}
console.log("added");
res.send({
message: 'shipment Created !'
})
})
}
The above 'post' function will work properly when I have only one "comments" section, I mean the data gets properly
inserted into the mongodb as shown below
{
"_id": ObjectId("59689bc59058dbc812000002"),
"buyerId": "B58",
"sellerId": "SL8",
"comments": [{
"title": "title5",
"body": "body5",
"date": ISODate("2017-12-06T18:30:00Z"),
"_id": ObjectId("59689bc59058dbc812000003")
}],
"__v": 0
}
but when I have multiple "comments" section as shown below,
{
"buyerId": "B58",
"sellerId": "SL8",
"comments": [{
"title": "title5",
"body": "body5",
"date": "12-07-2017"
},
{
"title": "title8",
"body": "body7",
"date": "12-07-2017"
}
]
}
then no comments section gets inserted into the mongodb as shown below.
{
"_id": ObjectId("5968c04d4c02336800000002"),
"buyerId": "B57",
"sellerId": "SL7",
"comments": [{
"_id": ObjectId("5968c04d4c02336800000003")
}],
"__v": 0
}
what changes should I do in the function to get all the comments section being inserted into the mongodb properly ?
Instead of assigning value of every property, make instance and pass body directly into it.
const s = new shipment(req.body)
And then when you send data to the request, send in the following format
{ "buyerId": "B58", "sellerId": "SL8", "comments": [{ "title": "title5", "body": "body5", "date": "12-07-2017" }, { "title": "title8", "body": "body7", "date": "12-07-2017" } ] }
I tried like below and it worked.
for( var i = 0; i < req.body.comments.length; i++){
s.comments.push(
{ title: req.body.comments[i].title,
body : req.body.comments[i].body,
date : req.body.comments[i].date });
}
originally comments is an array in the second example comments is an array.
your function
s.comments.push({
title: req.body.comments.title,
body: req.body.comments.body,
date: req.body.comments.date
})
will only work if comments is an object. Put that in a for loop to make it work with arrays like so
for( var i = 0; i < req.body.comments.length; i++){
s.comments.push({
title: req.body.comments[i].title,
body: req.body.comments[i].body,
date: req.body.comments[i].date
})
}

Facebook messenger platform: generic template with quick replies

I was looking at some pretty popular bots like "The Guardian" and i noticed that whenever you get a generic template reply from it it also displays some quick reply buttons (see the photo attached). How did "The Guardian Bot" achieve this? How he combined quick replies and a generic template? It must be two messages involved.
This worked for me in Dialogflow, return similar Json Object in backend to achieve the result:
{
"facebook": {
"attachment":{
"type":"template",
"payload":{
"template_type":"generic",
"elements":[
{
"title":"Welcome!",
"image_url":"https://petersfancybrownhats.com/company_image.png",
"subtitle":"We have the right hat for everyone.",
"default_action": {
"type": "web_url",
"url": "https://petersfancybrownhats.com/view?item=103",
"webview_height_ratio": "tall"
},
"buttons":[
{
"type":"web_url",
"url":"https://petersfancybrownhats.com",
"title":"View Website"
},{
"type":"postback",
"title":"Start Chatting",
"payload":"DEVELOPER_DEFINED_PAYLOAD"
}
]
}
]
}
},
"quick_replies":[
{
"content_type":"text",
"title":"Search",
"payload":"<POSTBACK_PAYLOAD>",
"image_url":"http://example.com/img/red.png"
},
{
"content_type":"location"
}
]
}
}
Quick replies are usually accompanied by a 'text' property that sends a text message before the quick reply. It appears you can substitute any template for that. For example, here is the request body for a generic template carousel with quick replies:
{
"recipient":{
"id":"{{PSID}}"
},
"messaging_type": "response",
"message":{
"quick_replies": [
{
"content_type":"text",
"title":"Quick Reply 1",
"image_url":"https://raw.githubusercontent.com/fbsamples/messenger-platform-samples/master/images/Messenger_Icon.png",
"payload":"payload1"
},
{
"content_type":"text",
"title":"Quick Reply 2",
"payload":"payload2"
}
],
"attachment":{
"type":"template",
"payload":{
"template_type":"generic",
"elements":[
{
"title":"This is a generic template",
"subtitle":"Plus a subtitle!",
"image_url":"https://raw.githubusercontent.com/fbsamples/messenger-platform-samples/master/images/Messenger_Icon.png",
"buttons":[
{
"type":"postback",
"title":"Postback Button",
"payload":"<POSTBACK_PAYLOAD>"
}
]
},
{
"title":"Another generic template",
"subtitle":"Plus a subtitle!",
"image_url":"https://raw.githubusercontent.com/fbsamples/messenger-platform-samples/master/images/Messenger_Icon.png",
"buttons":[
{
"type":"postback",
"title":"Postback Button",
"payload":"<POSTBACK_PAYLOAD>"
}
]
},
{
"title":"And another!",
"subtitle":"Plus a subtitle!",
"image_url":"https://raw.githubusercontent.com/fbsamples/messenger-platform-samples/master/images/Messenger_Icon.png",
"buttons":[
{
"type":"postback",
"title":"Postback Button",
"payload":"<POSTBACK_PAYLOAD>"
}
]
}
]
}
}
}
}
I have implemented the bot in nodejs and I am using a node module called messenger-bot which makes it easier to call the messenger bot API. Here's my customized code for you
const http = require('http')
const https = require('https')
const Bot = require('messenger-bot')
var bot = new Bot({
token: 'your FB app token',
verify: 'VERIFY_TOKEN'
})
bot.on('postback', (payload, reply) => {
var postback = payload.postback.payload;
if (postback == "yes") {
function getQuickReplies() {
console.log("in next function");
var quick_list = {
"text": "Check the next article?",
"quick_replies": [{
"content_type": "text",
"title": "More stories",
"payload": "more stories"
},
{
"content_type": "text",
"title": "Sport",
"payload": "sport"
},
{
"content_type": "text",
"title": "Business",
"payload": "business"
}
]
};
bot.getProfile(payload.sender.id, (err, profile) => {
if (err) throw err
text = quick_list;
bot.sendMessage(payload.sender.id, text) {//this prints quick replies
console.log("sending message");
}
});
}
//calling generic template
var generic_temp = "message": {
"attachment": {
-- - your code-- -
}
}; //generic template refer - https://developers.facebook.com/docs/messenger-platform/send-api-reference/generic-template
bot.getProfile(payload.sender.id, (err, profile) => {
if (err) throw err
bot.sendMessage(payload.sender.id, generic_temp) {//this prints generic template
console.log("sending message");
}
});
//calling the quick replies once the generic template is sent
getQuickReplies(); //to avoid async execution issue, we will have to put this in a function.
}
});
references - Generic template, Quick replies, messenger-bot npm
Hope this helps! Happy coding ;)
NEW UPDATE
{
"facebook": {
"attachment":{
"type":"template",
"payload":{
"template_type":"generic",
"elements":[
{
"title":"Welcome!",
"image_url":"https://petersfancybrownhats.com/company_image.png",
"subtitle":"We have the right hat for everyone.",
"default_action": {
"type": "web_url",
"url": "https://petersfancybrownhats.com/view?item=103",
"webview_height_ratio": "tall"
},
"buttons":[
{
"type":"web_url",
"url":"https://petersfancybrownhats.com",
"title":"View Website"
},{
"type":"postback",
"title":"Start Chatting",
"payload":"DEVELOPER_DEFINED_PAYLOAD"
}
]
}
]
}
},
"quick_replies":[
{
"content_type":"text",
"title":"Red",
"payload":"<POSTBACK_PAYLOAD>",
"image_url":"http://example.com/img/red.png"
},{
"content_type":"text",
"title":"Green",
"payload":"<POSTBACK_PAYLOAD>",
"image_url":"http://example.com/img/green.png"
}
]
}
}

how to display the api data using jsreport studio

Html Code:
<h3>Hello Welcome</h3>
<div>{{jsonData}}</div>
Script Code:
function beforeRender(req, res, done) {
require('request')({
url:'http://samples.openweathermap.org/data/2.5/weather? lat=35&lon=139&appid=b1b15e88fa797225412429c1c50c122a1',
json:true,
method: 'GET'
}, function(err, response, body){
console.log(JSON.stringify(body));
req.data = { jsonData: body };
done();
});
}
The api is returns following Json Data:
{
"coord": {
"lon": 139.01,
"lat": 35.02
},
"weather": [
{
"id": 800,
"main": "Clear",
"description": "clear sky",
"icon": "01n"
}
],
"base": "stations",
"main": {
"temp": 285.514,
"pressure": 1013.75,
"humidity": 100,
"temp_min": 285.514,
"temp_max": 285.514,
"sea_level": 1023.22,
"grnd_level": 1013.75
},
"wind": {
"speed": 5.52,
"deg": 311
},
"clouds": {
"all": 0
},
"dt": 1485792967,
"sys": {
"message": 0.0025,
"country": "JP",
"sunrise": 1485726240,
"sunset": 1485763863
},
"id": 1907296,
"name": "Tawarano",
"cod": 200
}
This is the jsreport generation code. Now am trying to get the data through the rest api. I don't know how the api data is print in console, i need help to how to iterate using jsrender following json data and display in console.
the out will comes in all the jsreport http ajax in following object in script section
"jsreport.data"
In this object will have all our data. you just print the following code in script
console.log("data is available are :",jsreport.data);

Resources