I need to build an application that does these things (in order):
on load:
01- connect to MongoDB 'db'
02- creates a collection 'cas'
03- check if a web page has updates, if yes go to step 04, if not go to step 07
04- do web scraping (using Cheerio) of the web site and get a $ variable like that $ = cheerio.load(body);
05- elaborate this object to get only informations I'm interested in and organize them in a jsons object like this one:
var jsons = [
{year: 2015, country: Germany, value: 51},
{year: 2015, country: Austria, value: 12},
{year: 2016, country: Germany, value: 84},
{year: 2016, country: Bulgaria, value: 104},
...
];
06- insert each of these elements ({year: 2015, country: Germany, value: 51}, ...) in the collection 'cas' of database 'db'
07- download the data (for example in a csv file)
08- create a web page for data visualization of these data using D3.js
09- disconnect from 'db'
If Node.js were synchronous, I could write something like this:
var url = 'http://...';
var jsons = [];
connectDb('db');
createCollection('db', 'cas');
if(checkForUpdates(url)) {
var $ = scrape(url);
jsons = elaborate($);
for(var i = 0; i < jsons.length; i++) {
saveDocumentOnDbIfNotExistsYet('db', 'cas', jsons[i]);
}
}
downloadCollectionToFile('db', 'cas', './output/casData.csv');
createBarChart('./output/casData.csv');
disconnectDb('db');
But Node.js is asynchronous so this code would not work properly.
I've read that I can use Promise to get the code to run in a certain order.
I read the documentation about the Promise and some sites that showed simple tutorials.
The structure of a Promise is:
// some code (A)
var promise = new Promise(function(resolve, reject) {
// some code (B)
});
promise.then(function() {
// some code (C)
});
promise.catch(function() {
// some code (D)
});
// some code (E)
If I understood correctly, in this case the execution (if Node.js were synchronous) would be equivalent to:
// some code (A)
// some code (E)
if(some code (B) not produce errors) {
// some code (C)
}
else {
// some code (D)
}
or (swap between code A and E, because they are asynchronous)
// some code (E)
// some code (A)
if(some code (B) not produce errors) {
// some code (C)
}
else {
// some code (D)
}
So now I wonder what is the right structure for my application.
I thought about:
var cheerio = require('cheerio');
var express = require('express');
var fs = require('fs');
var MongoClient = require('mongodb').MongoClient;
var dbUrl = 'mongodb://localhost:27017/';
var dbName = 'db';
var collectionName = 'cas';
const app = express(); // run using > node app.js
// connect to db
var connect = function(url) {
return new Promise(function(resolve, reject) {
MongoClient.connect(url + dbName, function(err, db) {
if(err) {
reject(err);
}
else {
console.log('Connected');
resolve(db);
}
});
});
}
// create collection
connect.then(function(db) {
db.createCollection(collectionName, function(err, res) {
if(err) {
throw err;
}
else {
console.log('Collection', collectionName, 'created!');
}
});
});
// connection error
connect.catch(function(err) {
console.log('Error during connection...');
throw err;
});
It's right? If yes, how can I proceed with other steps?
I can I improve my code?
EDIT 1
Following the example of Андрей Щербаков, I modified my code in this way:
app.js:
// my files
var db = require('./middlewares/db.js');
var url = 'mongodb://localhost:27017/';
var dbName = 'db';
var collectionName = 'cas';
const start = async function() {
const connect = await db.connectToMongoDb(url, dbName);
const cas = await connect.createYourCollection(collectionName);
const isPageHasUpdates = oneMoreFunction(); // i don't know how you gonna check it
if(isPageHasUpdates) {
await step 4;
await step 5;
await step 6;
}
await step 7
return something; // if you want
}
start()
.then(res => console.log(res)) // here you can use result of your start function if you return something or skip this then
.catch(err => console.log(err)); // do something with your error
middlewares/db.js:
var MongoClient = require('mongodb').MongoClient;
let dbInstance;
var methods = {};
methods.connectToMongoDb = function(url, dbName) {
if(dbInstance) {
return dbInstance;
}
else {
MongoClient.connect(url + dbName, function(err, db) {
if(!err) {
dbInstance = db;
return db;
}
});
}
}
methods.createYourCollection = function(collectionName) {
?.createCollection(collectionName, function(err, res) {
if(err) {
throw err;
}
});
}
module.exports = methods;
But I'm not sure I'm doing well.
How can I separate function in different files? For example I want to put all the function about db in file middlewares/db.js. But I have some problems in line ?.createCollection(collectionName, function(err, res).
If you are running node version 7.6 or higher, better way will be to use async await which works with promises.
So your code will look like
const start = async() => {
const connect = await connectToMongoDb(url);
const cas = await connect.createYourCollection();
const isPageHasUpdates = oneMoreFunction(); // i don't know how you gonna check it
if(isPageHasUpdates) {
await step 4;
await step 5;
await step 6;
}
await step 7
return something; // if you want
}
start()
.then(res => console.log(res)) // here you can use result of your start function if you return something or skip this then
.catch(err => console.log(err)); // do something with your error
Sure any function you are gonna await should be promisified as you did with your connect function( but if you are using https://www.npmjs.com/package/mongodb functions already promisified)
Update
The best way will be to use mongoose, but if you want to work with native mongodb you can write your mongodb like this https://pastebin.com/BHHc0uVN (just an example)
You can expand this example as you want.
You can create function createCollection
const createCollection = (connection, collectionName) => {
return connection.createCollection(collectionName); // actually i'm not sure that this function exists in mongodb driver
}
And usage will be:
const mongodbLib = require('./lib/mongodb'); //path to db.js file
mongodbLib.init()
.then(connection => mongodbLib.createCollection(connection, 'cas'))
.then(() => doSmthElse())
Or if you are sure that init is done(you can do it once before you main script like starting server or whatever you doing)
const mongodbLib = require('./lib/mongodb'); //path to db.js file
const connection = mongodbLib.getConnection();
Or if you want to simple work with collection like in step 6, add your cas collection(like user in example file). But this you can use when your init function is done as well.
So usage will be
const mongodbLib = require('./lib/mongodb');
const cas = mongodbLib.collections.cas;
cas().insertMany(docs)
.then()
.catch()
Related
I'm just starting to use Mongodb without mongoose (to get away from the schemas), and wanted to create a simple module with various exported functions to use in the rest of my app. I've pasted the code below.
The problem I'm having is that the databasesList.databases comes back as undefined, and I'm not sure why. There should be 2 databases on my cluster, and one collection in each database.
As a tangential question, I thought maybe I would check the collections instead (now commented out), but though I found this page (https://docs.mongodb.com/manual/reference/method/db.getCollectionNames/) the function getCollectionNames seems not to exist. Now I'm wondering if I'm using the wrong documentation and that is why my databases are coming back undefined.
const client = new MongoClient(uri)
const connection = client.connect( function (err, database) {
if (err) throw err;
else if (!database) console.log('Unknown error connecting to database');
else {
console.log('Connected to MongoDB database server');
}
});
module.exports = {
getDatabaseList: function() {
console.log('start ' + client);
databasesList = client.db().admin().listDatabases();
//collectionList = client.db().getCollectionNames();
//console.log("Collections: " + collectionList);
console.log("Databases: " + databasesList.databases);
//databasesList.databases.forEach(db => console.log(` - ${db.name}`));
}
}```
your code is correct Just need to change few things.
module.exports = {
getDatabaseList: async function() {
console.log('start ' + client);
databasesList = await client.db().admin().listDatabases();
//collectionList = await client.db().getCollectionNames();
//console.log("Collections: " + collectionList);
console.log("Databases: " + databasesList.databases);
databasesList.databases.forEach(db => console.log(` - ${db.name}`));
}
}
You have to declare async function and use await also.
The async and await keywords enable asynchronous, promise-based behaviour to be written in a cleaner style, avoiding the need to explicitly configure promise chains.
You can use this modular approach to build your database access code:
index.js: Run your database application code, like list database names, collection names and read from a collection.
const connect = require('./database');
const dbFunctions = require('./dbFunctions');
const start = async function() {
const connection = await connect();
console.log('Connected...');
const dbNames = await dbFunctions.getDbNames(connection);
console.log(await dbNames.databases.map(e => e.name));
const colls = await dbFunctions.getCollNames(connection, 'test');
console.log(await colls.map(e => e.name));
console.log(await dbFunctions.getDocs(connection, 'test', 'test'));
};
start();
database.js:: Create a connection object. This connection is used for all your database access code. In general, a single connection creates a connection pool and this can be used throughout a small application
const { MongoClient } = require('mongodb');
const url = 'mongodb://localhost:27017/';
const opts = { useUnifiedTopology: true };
async function connect() {
console.log('Connecting to db server...');
return await MongoClient.connect(url, opts );
}
module.exports = connect;
dbFunctions.js:: Various functions to access database details, collection details and query a specific collection.
module.exports = {
// return list of database names
getDbNames: async function(conn) {
return await conn.db().admin().listDatabases( { nameOnly: true } );
},
// return collections list as an array for a given database
getCollNames: async function(conn, db) {
return await conn.db(db).listCollections().toArray();
},
// return documents as an array for a given database and collection
getDocs: async function(conn, db, coll) {
return await conn.db(db).collection(coll).find().toArray();
}
}
I think the rendering takes place before the searching of the string on the files, i have tried different methods but don't seems to get this working. any help will be appreciated. im a noob on to the nodejs. im trying to get the id of the user and query and get all the data and there after see if he is in any of the lists given and finally render the page.
const j = [];
let name = '';
const filename = [];
var ext = '';
module.exports = function(app, express) {
app.use(bodyParser.urlencoded({ extended: false }));
app.use(bodyParser.json());
app.post('/cusdetails', isLoggedIn, function (req, res) {
var cusid=req.body.cusid;
var insertQuerys = "SELECT * FROM customer WHERE cusid=? ORDER BY rowid DESC LIMIT 1";
connection.query(insertQuerys,[cusid],
function(err, rows){
rows.forEach( (row) => {
name=row.fncus;
});
fs.readdir('./views/iplist', function(err, files) {
if (err)
throw err;
for (var index in files) {
j.push(files[index])
}
j.forEach(function(value) {
var k = require('path').resolve(__dirname, '../views/iplist/',value);
fs.exists(k, function(fileok){
if(fileok) {
fs.readFile(k, function(err, content) {
if (err) throw err;
if (content.indexOf(name) > -1) {
ext = path.extname(k);
filename.push(path.basename(k, ext));
}
});
}
else {
console.log(" FileNotExist ");
}
});
});
});
console.log(filename);
res.render('cusdetails.ejs', {rows: rows, user:req.user , aml: filename });
});
})
You can create simple Promise wrapper and then use it inside async/await function to pause execution until resolved.
// use mysql2 package as it provides promise, less work to write promise wrappers
const mysql = require('mysql2/promise');
// create the connection to database
const connection = mysql.createConnection({
host: 'localhost',
user: 'root',
database: 'test'
});
// sample wrapper
function some(k) {
// more advisable to have local variables, why do you need this to be array?
var filename = [];
return new Promise((resolve, reject) => {
// doing this is also not recommended check nodejs documentation **fs.exists** for more info
fs.exists(k, function(fileok){
if(fileok) {
fs.readFile(k, function(err, content) {
if (err) reject(err);
if (content.indexOf(name) > -1) {
ext = path.extname(k);
filename.push(path.basename(k, ext));
resolve(filename)
}
});
}
else {
// reject(new Error("FileNotExist"))
console.log(" FileNotExist ");
}
});
})
}
// note the use of async
app.post('/cusdetails', isLoggedIn, async function (req, res) {
var cusid=req.body.cusid;
var insertQuerys = "SELECT * FROM customer WHERE cusid=? ORDER BY rowid DESC LIMIT 1";
// using await to pause excution, waits till query is finished
const [rows] = await connection.query(insertQuerys,[cusid])
rows.forEach( (row) => {
name=row.fncus;
});
// then you can
var result = await some(k)
...
Note however this way you loose the advantage of concurrent execution, as it's kindoff blocking. If the result of one call is not used in another, you can execute in parallel and await for result to achieve sequencing like
const [rows] = connection.query(insertQuerys,[cusid])
var result = some(k)
console.log(await rows) // do something
console.log(await result) // do something
JavaScript is asynchronous. This means that if you have a function with a callback (i.e. your query), the callback will be called asynchronously, at an unknown time, while the other code executes.
You need to look up some tutorials how to deal with callbacks, to get a proper understanding of it. Another method is using async/await and/or promises.
Basically, if you take the following code:
console.log("this will print first");
setTimeout(function () {
console.log("this will print last");
}, 1000);
console.log("this will print second");
If you run the code above, the top level is executed synchronously, so, it first calls console.log, then it executes setTimeout, which is synchronous. It sets a timeout, then says "I'm ready", and the code continues to the other console.log. After 1 second (1000 milliseconds), the callback in the setTimeout function is executed, and only then that console.log is called. You can not make the rest of the code wait this way, you need to restructure your code or read into promises.
I am trying to write a simple function to grab the id of a specific instance based on matching criteria from mongodb using the official node package 'mongodb'.
My function works as I can console log the data but I am unable to return the data to use it as I intended to do as you can see.
const mongo = require('mongodb').MongoClient;
const url = 'mongodb://localhost:27017';
// Function for finding database id of device based on deviceKey, The database is written into
// the code under the const 'db' as is the collection.
async function fetchId(deviceKey) {
const client = await mongo.connect(url, { useNewUrlParser: true });
const db = client.db('telcos');
const collection = db.collection('device');
try {
await collection.find({"deviceKey": deviceKey}).toArray((err, response) => {
if (err) throw err;
console.log(response[0]._id); // << works logs _id
return response[0]._id; // << does nothing... ?
})
} finally {
client.close();
}
}
// # fetchId() USAGE EXAMPLE
//
// fetchId(112233); < include deviceKey to extract id
//
// returns database id of device with deviceKey 112233
// Run test on fetchId() to see if it works
fetchId("112233")
.then(function(id) {
console.dir(id); // << undefined
})
.catch(function(error) {
console.log(error);
});
Why does my test return undefined but my console.log() inside the function works?
It looks like you're combining callback code with async/await code in an odd way. Your function fetchId isn't returning anything at all, which is why you don't see id after fetching.
try {
const response = await collection.find(...).toArray()
return response[0]._id
}...
If we weren't able to await collection.find(...).toArray() and needed to manually convert this from using callbacks to promises, we'd have to do something like:
function fetchId (id) {
// this function returns a promise
return new Promise((resolve, reject) => {
...
collection.find(...).toArray((err, response) => {
// within the callback, returning values doesn't do anything
if (err) return reject(err);
return resolve(response[0]._id);
})
});
}
You are returning a value but handled like a promise is being returned.Please try this code.I had not tested it.
const mongo = require('mongodb').MongoClient;
const url = 'mongodb://localhost:27017';
// Function for finding database id of device based on deviceKey, The database is written into
// the code under the const 'db' as is the collection.
async function fetchId(deviceKey) {
return new Promise((resolve,reject)=>{
const client = await mongo.connect(url, { useNewUrlParser: true });
const db = client.db('telcos');
const collection = db.collection('device');
try {
await collection.find({"deviceKey": deviceKey}).toArray((err, response) => {
if (err) throw err;
console.log(response[0]._id); // << works logs _id
return resolve(response[0]._id); // << does nothing... ?
})
}
catch(error){
return reject(error);
}
finally {
client.close();
}
});
}
// # fetchId() USAGE EXAMPLE
//
// fetchId(112233); < include deviceKey to extract id
//
// returns database id of device with deviceKey 112233
// Run test on fetchId() to see if it works
fetchId("112233")
.then(function(id) {
console.dir(id); // << undefined
})
.catch(function(error) {
console.log(error);
});
I have tried for more hours than I care to admit to get an openWhisk function to call a postgre sql datbase on Compose.io. Here is my code:
My latest incarnation is this:
function myAction(params) {
return new Promise(function(resolve, reject) {
console.log('Connecting to Compose database');
// console.log('Params ---> ', params);
var mysql = require('promise-mysql');
var fs = require('fs');
var pg = require('pg');
var request = require('request')
var Promise = require('promise/lib/es6-extensions');
var connString = "postgres:xxxx";
pg.connect(connString, function(err, client, done) {
console.log("connectiong..", err, client, done);
if (err) {
console.log('[connectToCompose] failed to fetch client from pool', err);
reject(err);
} else {
params.client = client; params.done = done;
console.log('[connectToCompose] obtained a Compose client');
return(params);
}
})
// params.client.done();
// console.log("closing connectiong");
})
}
exports.main = myAction;
I have a similar example where I connect to a different SQL database (not Compose) and use sql promise not postgre and it works. What am I doing wrong?
To work with OpenWhisk and any database offering, you need to use promisified JavaScript code; the event loop as it's used in ordinary nodejs isn't available. I have an example that uses pg-promise (taken more or less exactly from the project docs) and it works fine for me. Try something like this:
const promise = require('bluebird');
const initOptions = {
promiseLib: promise // overriding the default (ES6 Promise);
};
const pgp = require('pg-promise')(options);
const conn_info = {...connection info...};
const db = pgp(conn_info);
module.exports.main = function main(args) {
db.any('SELECT * FROM items')
.then(data => {
console.log('DATA:', data);
// return whatever data you wanted
resolve({message: 'success'});
})
.catch(error => {
console.log('ERROR:', error);
});
}
Not all of the dependencies here are available on OpenWhisk by default, so when you deploy the action, include both your *.js file and the whole of node_modules/ in a zip file, and upload that. It's definitely possible to use the Compose Postgres with OpenWhisk, if that helps to encourage you :)
I want to drop some mongodb collections, but that's an asynchronous task. The code will be:
var mongoose = require('mongoose');
mongoose.connect('mongo://localhost/xxx');
var conn = mongoose.connection;
['aaa','bbb','ccc'].forEach(function(name){
conn.collection(name).drop(function(err) {
console.log('dropped');
});
});
console.log('all dropped');
The console displays:
all dropped
dropped
dropped
dropped
What is the simplest way to make sure all dropped will be printed after all collections has been dropped? Any 3rd-party can be used to simplify the code.
Use Promises.
var mongoose = require('mongoose');
mongoose.connect('your MongoDB connection string');
var conn = mongoose.connection;
var promises = ['aaa', 'bbb', 'ccc'].map(function(name) {
return new Promise(function(resolve, reject) {
var collection = conn.collection(name);
collection.drop(function(err) {
if (err) { return reject(err); }
console.log('dropped ' + name);
resolve();
});
});
});
Promise.all(promises)
.then(function() { console.log('all dropped)'); })
.catch(console.error);
This drops each collection, printing “dropped” after each one, and then prints “all dropped” when complete. If an error occurs, it is displayed to stderr.
Previous answer (this pre-dates Node’s native support for Promises):
Use Q promises or Bluebird promises.
With Q:
var Q = require('q');
var mongoose = require('mongoose');
mongoose.connect('your MongoDB connection string');
var conn = mongoose.connection;
var promises = ['aaa','bbb','ccc'].map(function(name){
var collection = conn.collection(name);
return Q.ninvoke(collection, 'drop')
.then(function() { console.log('dropped ' + name); });
});
Q.all(promises)
.then(function() { console.log('all dropped'); })
.fail(console.error);
With Bluebird:
var Promise = require('bluebird');
var mongoose = Promise.promisifyAll(require('mongoose'));
mongoose.connect('your MongoDB connection string');
var conn = mongoose.connection;
var promises = ['aaa', 'bbb', 'ccc'].map(function(name) {
return conn.collection(name).dropAsync().then(function() {
console.log('dropped ' + name);
});
});
Promise.all(promises)
.then(function() { console.log('all dropped'); })
.error(console.error);
I see you are using mongoose so you are talking about server-side JavaScript. In that case I advice looking at async module and use async.parallel(...). You will find this module really helpful - it was developed to solve the problem you are struggling with. Your code may look like this
var async = require('async');
var calls = [];
['aaa','bbb','ccc'].forEach(function(name){
calls.push(function(callback) {
conn.collection(name).drop(function(err) {
if (err)
return callback(err);
console.log('dropped');
callback(null, name);
});
}
)});
async.parallel(calls, function(err, result) {
/* this code will run after all calls finished the job or
when any of the calls passes an error */
if (err)
return console.log(err);
console.log(result);
});
The way to do it is to pass the tasks a callback that updates a shared counter. When the shared counter reaches zero you know that all tasks have finished so you can continue with your normal flow.
var ntasks_left_to_go = 4;
var callback = function(){
ntasks_left_to_go -= 1;
if(ntasks_left_to_go <= 0){
console.log('All tasks have completed. Do your stuff');
}
}
task1(callback);
task2(callback);
task3(callback);
task4(callback);
Of course, there are many ways to make this kind of code more generic or reusable and any of the many async programing libraries out there should have at least one function to do this kind of thing.
Expanding upon #freakish answer, async also offers a each method, which seems especially suited for your case:
var async = require('async');
async.each(['aaa','bbb','ccc'], function(name, callback) {
conn.collection(name).drop( callback );
}, function(err) {
if( err ) { return console.log(err); }
console.log('all dropped');
});
IMHO, this makes the code both more efficient and more legible. I've taken the liberty of removing the console.log('dropped') - if you want it, use this instead:
var async = require('async');
async.each(['aaa','bbb','ccc'], function(name, callback) {
// if you really want the console.log( 'dropped' ),
// replace the 'callback' here with an anonymous function
conn.collection(name).drop( function(err) {
if( err ) { return callback(err); }
console.log('dropped');
callback()
});
}, function(err) {
if( err ) { return console.log(err); }
console.log('all dropped');
});
I do this without external libaries:
var yourArray = ['aaa','bbb','ccc'];
var counter = [];
yourArray.forEach(function(name){
conn.collection(name).drop(function(err) {
counter.push(true);
console.log('dropped');
if(counter.length === yourArray.length){
console.log('all dropped');
}
});
});
All answers are quite old. Since the beginning of 2013 Mongoose started to support promises gradually for all queries, so that would be the recommended way of structuring several async calls in the required order going forward I guess.
With deferred (another promise/deferred implementation) you can do:
// Setup 'pdrop', promise version of 'drop' method
var deferred = require('deferred');
mongoose.Collection.prototype.pdrop =
deferred.promisify(mongoose.Collection.prototype.drop);
// Drop collections:
deferred.map(['aaa','bbb','ccc'], function(name){
return conn.collection(name).pdrop()(function () {
console.log("dropped");
});
}).end(function () {
console.log("all dropped");
}, null);
If you are using Babel or such transpilers and using async/await you could do :
function onDrop() {
console.log("dropped");
}
async function dropAll( collections ) {
const drops = collections.map(col => conn.collection(col).drop(onDrop) );
await drops;
console.log("all dropped");
}