I am looking to scrape data from specific URLs and updating an existing data in MongoDB. I am also using CRON (not sure if that has any affect on the code). Below is the code:
module.exports = new CronJob('0 */59 * * * *', function() {
nightmare
.goto('http://www.xyzxyzxyz.com/')
.wait(2000)
.evaluate(function() {
var itemS = [];
var itemList = document.querySelectorAll('.item-list');
for(var i = 0; i < item-list.length; i++) {
var item = {};
item['name'] = itemList[i].querySelector('.item-title').innerHTML;
item['image'] = itemList[i].querySelector('.item-image').src;
itemS.push(item);
}
return itemS;
})
//.end() -- removed because CRON does not restart
.then(function(result) {
var id = mongoose.Types.ObjectId("589b8d8fe860591071d4e2c6");
mongooseModel.findByIdAndUpdate(id, {$set: {key: result}}, function(err, res) {
if(err) {
console.log(err);
} else {
console.log('success!');
}
});
console.log(result);
})
.catch(function(error) {
console.error('Search failed:', error);
});
}, null, true);
the results are console.log(ged) (console.log(result)) but the console.log("success!") callback does not execute.
Also, unrelated, when I pull images from some sites, I'm getting Data URLs instead of the img url. I tried putting the data URL directly into a tag, but the image shows up blank -- I also tried decoding the image, using online resources, but the image still does not render. any possible solutions for this?
Thanks so much, in advance!
Related
I am new to Javascript and am just getting familiar with promises so forgive my ignorance.
What I'm trying to do is request all the records from an Airtable base and filter them them based on a checkbox called "Email Sent" being unsent. When I have the records filtered there are linked records in 2 fields that I need to do additional requests for to get the values for. All of those values (various data, and recipients as an object) then are plugged into an HTML email template and fired off using AWS-SES. Then the "Email Sent" check box is checked for those records to prevent emails from being sent multiple times. The plan is to have this all running on an interval in AWS Lambda. And as records are added to the base they are automatically emailed to a list.
I am able to get the records, and filter them. I am also comfortable with executing the code within Lambda and using SES. But I have been struggling for days to get the values for the linked records. Both of which are lists of people and 1 of which is a recipient email address.
The numerous things I've tried end up either returning the original record called in the first step or returns undefined. I'm chaining a lot of promises and I don't think I'm doing it right because it seems they are running out of order. I'm also at the point of trying weird things like using a global array instead of an object that I'm passing down through the promises and updating.
Any help is appreciated. Thank you in advance.
var Airtable = require('airtable');
var base = new Airtable({apiKey: 'xxxxxxxxxx'}).base('xxxxxxxxxx');
var nodemailer = require("nodemailer")
var handlebars = require('handlebars');
const path = require('path');
var AWS = require("aws-sdk")
var ses = new AWS.SES();
var fs = require('fs');
var mainArray = [];
var readHTMLFile = function(path, callback) {
fs.readFile(path, {encoding: 'utf-8'}, function (err, html) {
if (err) {
throw err;
callback(err);
}
else {
callback(null, html);
}
});
};
function getRecords(){
return new Promise(function(resolve, reject) {
var reqArr = [];
base('Edit Requests').select({
// Selecting the first 3 records in Grid view:
maxRecords: 50,
view: "Grid view"
}).eachPage(function page(records, fetchNextPage) {
// This function (`page`) will get called for each page of records.
records.forEach(function(record) {
// console.log("108: ", record._rawJson.id)
var obj = {}
// obj = record.fields;
// obj.id = record._rawJson.id;
console.log("172", record.fields["Editor Preference"])
obj = record.fields;
obj.id = record._rawJson.id;
if(record.fields["Editor Preference"] != undefined){
obj["Editor Preference"] = obj["Editor Preference"]
// obj["Editor Preference"] = getEditorWrap(record.fields["Editor Preference"])
} else {
obj["Editor Preference"] = "";
}
if(record.fields["Production Manager"] != undefined){
obj["Production Manager"] = obj["Production Manager"]
} else {
obj["Production Manager"] = "";
}
mainArray.push(obj)
// console.log(record.fields["Email"])
// console.log('Retrieved', record.fields['Requested By']);
})
// To fetch the next page of records, call `fetchNextPage`.
// If there are more records, `page` will get called again.
// If there are no more records, `done` will get called.
fetchNextPage();
// console.log("123", reqArr)
// resolve(reqArr)
}, function done(err) {
if (err) { console.error(err); return; }
// console.log("123", mainArray)
resolve(mainArray)
});
// resolve(reqArr)
});
}
function filterRecords(arr){
return new Promise(function(resolve, reject) {
var filtered = []
mainArray = [];
// console.log("245", arr)
for (i in arr){
if(arr[i]['Email Sent'] == undefined){
// console.log("247", arr[i])
mainArray.push(arr[i])
};
}
console.log("filtered: ", mainArray)
resolve(mainArray)
});
}
function setObject(array){
return new Promise(function(resolve, reject) {
for (i in array){
var obj = array[i];
if(obj.id != undefined){
base('Editors').find(obj.id, function(err, record) {
if (err) { console.error(err); return; }
// console.log("281", record.fields);
});
}
}
resolve(mainArray)
});
}
function main1(){
return new Promise(function(resolve, reject) {
getRecords().
then(function(recordArr){
filterRecords(mainArray).
then(function(resultArr){
setObject(mainArray).
then(function(mainArray){
})
})
})
});
}
main1().
then(function(resultArray){
console.log(resultArray)
})
I'm new to node.js and currently working on a project using keystonejs cms and MongoDB. Now I'm stuck in getting data related to multiple collections. Because of this callback functions, I couldn't return an array with relational data. My code something similar to this sample code.
var getAgenda = function(id, callback){
callback = callback || function(){};
if(id){
AgendaDay.model.find({summit:id}).exec(function (err, results3) {
var arr_agenda = [];
var arr_agenda_item = [];
for(var key3 in results3){
AgendaItem.model.find({agendaDay:results3[key3]._id}).exec(function (err, results2){
for(var key2 in results2){
arr_agenda_item.push(
{
item_id: results2[key2]._id,
item_name: results2[key2].name,
from_time: results2[key2].time_from,
to_time: results2[key2].time_to,
desc: results2[key2].description,
fatured: results2[key2].featured,
}
);
}
arr_agenda.push(
{
name: results3[key3].name,
date: results3[key3].date,
description: results3[key3].description,
item_list:arr_agenda_item
}
);
return callback(arr_agenda);
});
}
});
}
}
exports.list = function (req, res) {
var mainarray = [];
Summit.model.find().exec(function (err, resultssummit) {
if (err) return res.json({ err: err });
if (!resultssummit) return res.json('not found');
Guest.model.find().exec(function (err, resultsguset) {
for(var key in resultssummit){
var agen_arr = [];
for(var i=0; i<resultssummit[key].guests.length; i++){
var sumid = resultssummit[key]._id;
//this is the function im trying get data and assign to mainarray
getAgenda(sumid, function(arr_agenda){
agen_arr = arr_agenda;
});
mainarray.push(
{
id: resultssummit[key]._id,
name: resultssummit[key].name,
agenda_data: agen_arr,
}
);
}
res.json({
summit: mainarray,
});
}
});
}
}
If anyone can help me out, that would be really great :)
You need to restructure this whole thing. You should not be calling mongo queries in a for loop and expecting their output at the end of the loop. Also, your response is in a for loop. That won't work.
I'll tell you how to do it. I cannot refactor all of that code for you.
Instead of putting mongodb queries in a for loop, you need to convert it in a single query. Just put the _ids in a single array and fire a single query.
AgendaItem.model.find({agendaDay:{$in:ARRAY_OF_IDS}})
You need to do the same thing for AgendaDay.model.find({summit:id}) as well.
I want to use an module to get and process data from my MongoDB database. (It should generate an object that represents my Express.js site's navbar)
I thought of doing something like this:
var nav = { Home: "/" };
module.exports = function() {
MongoClient.connect(process.env.MONGO_URL, function(err, db) {
assert.equal(err, null);
fetchData(db, function(articles, categories) {
combine(articles, categories, function(sitemap) {
// I got the data. What now?
console.log("NAV: ", nav);
})
});
});
};
var fetchData = function(db, callback) {
db.collection('articles').find({}).toArray(function(err, result) {
assert.equal(err);
articles = result;
db.collection('categories').find({}).toArray(function(err, result) {
assert.equal(err);
categories = result;
db.close();
callback(articles, categories);
});
});
};
var combine = function(articles, categories, callback) {
categories.forEach(function(category) {
nav[category.title] = {};
articles.forEach(function(article) {
if(article.category == category.name) {
nav[category.title][article.title] = "link";
}
})
});
callback(nav);
};
As of line 6, I do have all data I need.
(An object, currenty like { Home: '/', Uncategorized: { 'Hello world!': 'link' } })
But since I'm in an anonymous function, I don't know how to return that value. I mean, return would just return it the function that called it... And in the end, MongoClient.connect would receive my data.
If I set a variable instead, it would be set as module.exports returned before Node can even query the data from the database, right?
What can I do in order to make this work?
It should result in some kind of function, like
var nav = require('nav');
console.log(nav());
Thanks in advance!
Add another callback:
var nav = { Home: "/" };
module.exports = function(cb) {
MongoClient.connect(process.env.MONGO_URL, function(err, db) {
assert.equal(err, null);
fetchData(db, function(articles, categories) {
combine(articles, categories, function(sitemap) {
cb(sitemap);
})
});
})
});
And then use this way:
var nav = require('nav');
nav(function(sitemap){ console.log(sitemap); });
You can use mongoose module or monk module. These modules have been tested properly .
Just use
npm install mongoose or monk
The suggestion about mongoose is great and you can look into it, however I think you've already done the job with the fetching of the data from the db. You just need to access it in your main node flow.
You can try this:
module.exports.generateNav = function() {
MongoClient.connect(process.env.MONGO_URL, function(err, db) {
assert.equal(err, null);
var output = fetchData(db, function(articles, categories) {
combine(articles, categories, function(sitemap) {
})
});
return (output);
});
};
And then in your main application you can call it in the following way:
var nav = require('nav');
navigation = nav.generateNav();
console.log(navigation);
Check below algorithm...
users = getAllUsers();
for(i=0;i<users.length;i++)
{
contacts = getContactsOfUser(users[i].userId);
contactslength = contacts.length;
for(j=o;j<contactsLength;j++)
{
phones = getPhonesOfContacts(contacts[j].contactId);
contacts[j].phones = phones;
}
users[i].contacts = contacts;
}
return users;
I want to develop such same logic using node.js.
I have tried using async with foreach and concat and foreachseries functions. But all fail in the second level.
While pointer is getting contacts of one user, a value of i increases and the process is getting started for next users.
It is not waiting for the process of getting contacts & phones to complete for one user. and only after that starting the next user. I want to achieve this.
Actually, I want to get the users to object with proper
Means all the sequences are getting ruined, can anyone give me general idea how can I achieve such a series process. I am open to change my algorithm also.
In node.js you need to use asynchronous way. Your code should look something like:
var processUsesrs = function(callback) {
getAllUsers(function(err, users) {
async.forEach(users, function(user, callback) {
getContactsOfUser(users.userId, function(err, contacts) {
async.forEach(contacts, function(contact, callback) {
getPhonesOfContacts(contacts.contactId, function(err, phones) {
contact.phones = phones;
callback();
});
}, function(err) {
// All contacts are processed
user.contacts = contacts;
callback();
});
});
}, function(err) {
// All users are processed
// Here the finished result
callback(undefined, users);
});
});
};
processUsers(function(err, users) {
// users here
});
You could try this method without using async:
function getAllUserContacts(users, callback){
var index = 0;
var results = [];
var getUserContacts = function(){
getContactsOfUser(users[index].userId, function(contacts){
var index2 = 0;
var getContactsPhones = function(){
getPhonesOfContacts(contacts[index2].contactId, function(phones){
contacts[index2].phones = phones;
if(index2 === (contacts.length - 1)){
users[index].contacts = contacts;
if(index === (users.length - 1)){
callback(users)
} else {
index++;
getUserContacts();
}
}else{
index2++;
getContactsPhones();
}
});
}
getContactsPhones();
});
}
getUserContacts();
}
//calling the function
getAllUsers(function(users){
getAllUsersWithTheirContacts(users, function(usersWithContacts){
console.log(usersWithContacts);
})
})
//Asynchronous nested loop
async.eachSeries(allContact,function(item, cb){
async.eachSeries(item,function(secondItem,secondCb){
console.log(secondItem);
return secondCb();
}
return cb();
},function(){
console.log('after all process message');
});
In a node.js / Mongoose project, I have a schema which contains references to external image files.
var PageSchema = new Schema({
title: String
, media: {
digest: String
, name: String
}
});
Those files have additional properties which are stored in the file itself: url, width, height, exif fields, etc. Those fields will need to be populated before the model being sent to res.render().
For some fields, things are synchronous and a virtual just does the job:
PageSchema.virtual('media.url').get(function () {
return appPaths.fileUrl(this.media);
});
However, width / height, or exif fields require async calls. I thought of using middleware to populate them, but this does not seem to work:
PageSchema.post('init', function(next) {
var media = this.media;
var fileName = filedb.absoluteFilePath(media);
im.identify(fileName, function(err, features) {
if (err) {
media.width = 0;
media.height = 0;
} else {
media.width = features.width;
media.height = features.height;
}
next();
});
});
What am I doing wrong? Is there a common design pattern for solving this kind of problem? (Other than duplicating this information in the database itself?)
The real problem here is that mongoose currently seems to have a wonky implementation of post callbacks. While pre('init',function(next){ ... }); works as you expect, post('init',function(next){ ... }); does not actually get passed a next function. In fact, the post init callback does not receive any arguments whatsoever when it is called.
As such, I usually write a wrapper for my query callbacks to make a sort of DIY middleware:
var setAsyncVirtuals = function(callback){
return function(err, docs){
if(err) return callback(err);
var i = done = docs.length;
if(i > 0)
while(i--){
(function(i){
var filename = getFilename();
im.identify(filename, function(err, features) {
if (err) {
docs[i].media.width = 0;
docs[i].media.height = 0;
} else {
docs[i].media.width = features.width;
docs[i].media.height = features.height;
}
done--;
if(done <= 0) callback(null, docs);
});
})(i); // bind i to hold value for async call
}
else callback(null, docs);
}
}
then
Page.find({}, setAsyncVirtuals(function(err,docs){
res.send(docs); // these have media.width & media.height assigned
}));