Does anybody know how one would go about using the $out operator to push the results of a MongoDB aggregation function into a new collection in node.js?
This is what I have:
var fs = require('fs');
var assert = require('assert');
var ObjectId = require('mongodb').ObjectID;
var MongoClient = require('mongodb').MongoClient
, format = require('util').format;
var createGroups = function(db, callback) {
db.collection('people').aggregate(
[
{ $group: { "_id": "$code", "sendees" : {$push : "$email"}, "count": { $sum: 1 } } }
]
).toArray(function(err, result) {
assert.equal(err, null);
console.log(result);
callback(result);
});
};
MongoClient.connect('mongodb://localhost:12121/systest', function(err, db) {
assert.equal(null, err);
createGroups(db, function() {
db.close();
});
});
Which outputs to the console fine, exactly as I'd expect - but I'm having little luck trying to export this to a new collection.
Thanks!
You just need to supply $out in your aggregation. See the docs.
So your code should look like:
db.collection('people').aggregate(
[
{
$group: {
"_id": "$code",
"sendees" : {$push : "$email"},
"count": { $sum: 1 }
}
},
{
$out: "collection name"
}
]
)
You could try use the mongo-aggregate-out package which saves aggregation results to a collection for Mongo versions < 2.6. If your MongoDB version is 2.6 or newer, the module would behave as passthrough and will use the native feature:
var aggregateOut = require('mongo-aggregate-out'),
pipelineArray = [
{
$group: {
"_id": "$code",
"sendees" : {$push : "$email"},
"count": { $sum: 1 }
}
}
];
var createGroups = function(db, callback) {
aggregateOut(db.collection('people'), pipelineArray, { out: "newCollection"},
function (err) {
assert.equal(err, null);
var cur = db.collection('newCollection').find();
callback(cur);
});
};
Thanks for your answers folks - both very helpful. As it happens I was using an outdated version of mongo from before $out came into being - but once I sorted this out and did as you suggested above it worked perfectly.
Related
I'm using mongodb and sails framework, Production.find({}) is working normally
but Production.aggregate([]) is returning an error
Production.aggregate() is not a function
module.exports = {
list : function(req,res) {
Production.aggregate([{
$project: {
data: { $substr: ["$pt",0,10] },
prodTempo: { $substr: ["$sis",0,10]}
}
}])
.exec(function(err,collection ){
if(err){
res.send(500,{error:"DataBase Error"});
}
res.view('list',{producao:collection});
});
}
};
As of Sails v1.0 the .native() method is deprecated in favor of getDatastore().manager.
https://sailsjs.com/documentation/reference/waterline-orm/models/native
Due to a bug with the current version of sails-mongo (v1.0.1) which doesn't support the new required cursor method I've actually switched to using Mongo View's to manage aggregate queries.
The pattern below is "supposed" to work but currently returns no results because toArray() of an aggregate() function is currently not properly supported. It returns an AggregateCursor which does not support the toArray() method.
WHAT I ENDED UP DOING
const myView = sails.getDatastore().manager.collection("view_name");
myView.find({...match/filter criteria...}).toArray((err, results) => {
if (err) {
// handle error 2
}
// Do something with your results
});
The entire Aggregate query I put into the Mongo DB View and added additional columns to support filter/match capabilities as needed. The only portion of "match" I did not place into Mongo are the dynamic fields which I use above in the find() method. That's why you need the additional fields since find() will only query the columns available in the query and not the underlying model
WHAT SHOULD HAVE WORKED
So the pattern for aggregate would now be as follows:
const aggregateArray = [
{
$project: {
data: { $substr: ['$pt', 0, 10] },
prodTempo: { $substr: ['$sis', 0, 10] }
}
}
];
sails.getDatastore('name of datastore').manager.collection('collection name')
.aggregate(aggregateArray)
.toArray((err, results) => {
if (err) {
// handle error 2
}
// Do something with your results
});
For aggregations you need to call the native function first. Then it looks like this:
const aggregateArray = [
{
$project: {
data: { $substr: ['$pt', 0, 10] },
prodTempo: { $substr: ['$sis', 0, 10] }
}
}
];
Production.native(function(err, prodCollection) {
if (err) {
// handle error 1
} else {
prodCollection
.aggregate(aggregateArray)
.toArray((err, results) => {
if (err) {
// handle error 2
}
// Do something with your results
});
}
});
const regexForFileName = '.*' + fileName + '.*';
var db = model.getDatastore().manager;
var rawMongoCollection = db.collection(model.tableName);
rawMongoCollection.aggregate(
[
{
$project : {
"_id" : 0,
"fileId" : 1,
"fileName" : 1,
"fileSize" : 1,
"createdTime" : 1
}
},
{
$match : {
"fileName" : {
$regex: regexForFileName,
$options: 'i'
}
}
},
{
$sort: {
"createdTime" : -1
}
},
{
$skip: pageNumber * numberOfResultsPerPage
},
{
$limit: numberOfResultsPerPage
}
]
).toArray((err, results) => {
if (err) {
console.log(err);
}
console.log("results: " + JSON.stringify(results));
});
I am new in mongodb and node. I am trying to find the max value for a field (userId). But it returns nothing.
My code is
EventSchema.static("createUser",function(event,user,callback){
var That = this;
var max_usr_Id = '';
async.waterfall([
function(callback) {
That.find({"userId" : {"$ne" : ""}, "$and" : [{"userId" : {"$exists" : 1}}]}).sort({"_id" : -1}).limit(1).select("userId").exec(function(err, doc)
{
if(err)
{
console.log('User ID ERROR-');
callback({error:err,message:"Error getting max User ID"});
}else {
console.log('User ID-');
console.log(doc.userId);
max_usr_Id = doc.userId;
console.log(max_usr_Id);
}
});
console.log(max_usr_Id);
},
});
For some reason the control doesn't go inside the find function. When I try the following query in mongodb shell it works.
db.users.find({
"userId" : {
"$ne" : ""
},
"$and" : [
{
"userId" : {
"$exists" : true
}
}
]
}).sort({
"_id" : -1.0
}).limit(1);
Any help is highly appreciated. Thanks in advance.
The $and is not used in the proper way, try with:
That.find({ $and: [
{ "userId": { $ne: "" } },
{ "userId": { $exists: true } }
] }).sort( ...
Take a look at the $and documentation.
Edit
After seen the comments, the problem must be in the way the logging is made. You need to call toArray to get a collection, and then iterate over it (with forEach for instance):
...find( ... ).toArray(function(err, docs) {
// Print each document returned
docs.forEach(function(doc) {
console.log(doc.userId);
});
});
I have to mongodb collections, like this:
UserGroup collection
fields:
name: String
group_id: Number
User collection
fields:
user_name: String
group_id: Number
I want generate a report like this:
ADMINISTRATORS
--------------------------
jlopez
rdiaz
OPERATORS
--------------------------
amiralles
dcamponits
But I get the following report:
ADMINISTRATORS
--------------------------
OPERATORS
--------------------------
jlopez
rdiaz
amiralles
dcamponits
Following is the code to generate the report:
UserGroup.find({}, (err, groups) => {
for(var i in groups){
console.log(groups[i].name)
console.log("--------------------")
User.find( {group_id : groups[i].group_id}, (err, users) =>{
for(var j in users){
console.log(users[j].user_name)
}
})
}
})
Clearly, this is a problem of the NodeJs/Mongoose asynchronicity.
QUESTION: How do I make the first For cycle wait until the internal cycle ends for each UserGrop?
Thanks in advance,
David.
You can run an aggregation pipeline that uses $lookup to do a "left-join" to another collection in the same database to filter in documents from the "joined" collection for processing. With this you won't need any async library:
UserGroup.aggregate([
{
"$lookup": {
"from": "users",
"localField": "group_id",
"foreignField": "group_id",
"as": "users"
}
},
{
"$project": {
"_id": 0,
"name": 1,
"users": {
"$map": {
"input": "$users",
"as": "user",
"in": "$$user.user_name"
}
}
}
}
], (err, groups) => {
if (err) throw err;
console.log(JSON.stringify(groups, null, 4));
})
Sample Output
[
{
"name": "ADMINISTRATORS",
"users": ["jlopez", "rdiaz"]
},
{
"name": "OPERATORS",
"users": ["amiralles", "dcamponits"]
}
]
Add support for promises to mongoose. I use q, but you can use bluebird too.
mongoose.Promise = require('q').Promise;
Then you can use q.all to resolve once all of the user queries have completed.
var promises = [];
var groups = [];
UserGroup.find({}, (err, groups) => {
for(var i in groups){
groups.push(groups[i]);
promises.push(User.find( {group_id : groups[i].group_id}));
}
});
q.all(promises).then( function(usersByGroup){
var indx = 0;
usersByGroup.forEach(function(users){
var grp = groups[indx];
console.log(groups[i].name);
console.log("--------------------");
for(var j in users){
console.log(users[j].user_name)
}
indx++;
});
});
This is a good use case for asyc, you can get a get basic idea from following code. it is based on async each & waterfall. [ Please add proper error handling for the following code yourself.]
UserGroup.find({}, (err, groups) => {
async.each(groups, (group, callback) =>{
async.waterfall([
(wCallback) => {
User.find({group_id : group.group_id}, wCallback)
},
(users, wCallback) => {
console.log(group.name)
console.log("--------------------")
for(var j in users){
console.log(users[j].user_name)
}
wCallback()
}
], callback)
})
})
How I can output result of MongoDB aggregation into collection without replacing the collection from another aggregation output?
I need to get data only with $out: 'tempCollection', because I have 500mln documents, and getting pipeline stage limit
var q = [
{$match: query},
{$group: {_id: '$hash'}},
{$out: 'tempCollection'}
];
async.parallel([
function(callback) {
firstCollection.aggregate(q, callback);
},
function(callback) {
secondCollection.aggregate(q, callback);
},
...
], function() {
// I want to get all from tempCollection (with pagination) here
});
The bottom line here is that the $out option only ever "replaces" output on the target collection. So to do anything else you must work through a client connection rather than just outputting to the server.
Your best option here with mongoose is to step straight into the underlying driver and get access to the node stream interface as supported by the driver.
Trival example, but it shows the basic way to structure:
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/aggtest');
var testSchema = new Schema({},{ "_id": false, strict: false });
var ModelA = mongoose.model( 'ModelA', testSchema ),
ModelB = mongoose.model( 'ModelB', testSchema ),
ModelC = mongoose.model( 'ModelC', testSchema );
function processCursor(cursor,target,callback) {
cursor.on("end",callback);
cursor.on("error",callback);
cursor.on("data",function(data) {
cursor.pause();
target.update(
{ "_id": data._id },
{ "$setOnInsert": { "_id": data._id } },
{ "upsert": true },
function(err) {
if (err) callback(err);
cursor.resume();
}
);
});
}
async.series(
[
// Clean data
function(callback) {
async.each([ModelA,ModelB,ModelC],function(model,callback) {
model.remove({},callback);
},callback);
},
// Sample data
function(callback) {
async.each([ModelA,ModelB],function(model,callback) {
async.each([1,2,3],function(id,callback) {
model.create({ "_id": id },callback);
},callback);
},callback);
},
// Run merge
function(callback) {
async.parallel(
[
function(callback) {
var cursor = ModelA.collection.aggregate(
[
{ "$group": { "_id": "$_id" } }
],
{ "batchSize": 25 }
);
processCursor(cursor,ModelC,callback)
},
function(callback) {
var cursor = ModelB.collection.aggregate(
[
{ "$group": { "_id": "$_id" } }
],
{ "batchSize": 25 }
);
processCursor(cursor,ModelC,callback)
}
],
callback
);
},
// Get merged
function(callback) {
ModelC.find({},function(err,results) {
console.log(results);
callback(err);
});
}
],
function(err) {
if (err) throw err;
mongoose.disconnect();
}
);
Oustide of that, then you are going to need to $out to "separate" collections, and then merge them in with a similar .update() process, but to keep it "server side" then you need to use .eval().
It's not nice, but that is the only way to keep operations on the server. You can also modify this with "Bulk" operations ( again through the same native .collection interface ) for a bit more throughput. But the options come down to "read through the client" or "eval".
I have following data in my Mongodb.
{
"_id" : ObjectId("54a0d4c5bffabd6a179834eb"),
"is_afternoon_scheduled" : true,
"employee_id" : ObjectId("546f0a06c7555ae310ae925a")
}
I would like to use populate with aggregate, and want to fetch employee complete information in the same response, I need help in this. My code is:
var mongoose = require("mongoose");
var empid = mongoose.Types.ObjectId("54a0d4c5bffabd6a179834eb");
Availability.aggregate()
.match( { employee_id : empid} )
.group({_id : "$employee_id",count: { $sum: 1 }})
.exec(function (err, response) {
if (err) console.log(err);
res.json({"message": "success", "data": response, "status_code": "200"});
}
);
The response i am getting is
{"message":"success","data":{"_id":"54a0d4c5bffabd6a179834eb","count":1},"status_code":"200"}
My expected response is:
{"message":"success","data":[{"_id":"54aa34fb09dc5a54232e44b0","count":1, "employee":{fname:abc,lname:abcl}}],"status_code":"200"}
You can call the model form of .populate() on the result objects from an aggregate operation. But the thing is you are going to need a model to represent the "Result" object returned by your aggregation in order to do so.
There are a couple of steps, best explained with a complete listing:
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
var employeeSchema = new Schema({
"fname": String,
"lname": String
})
var availSchema = new Schema({
"is_afternoon_scheduled": Boolean,
"employee_id": {
"type": Schema.Types.ObjectId,
"ref": "Employee"
}
});
var resultSchema = new Schema({
"_id": {
"type": Schema.Types.ObjectId,
"ref": "Employee"
},
"count": Number
});
var Employee = mongoose.model( "Employee", employeeSchema );
var Availability = mongoose.model( "Availability", availSchema );
var Result = mongoose.model( "Result", resultSchema, null );
mongoose.connect('mongodb://localhost/aggtest');
async.series(
[
function(callback) {
async.each([Employee,Availability],function(model,callback) {
model.remove({},function(err,count) {
console.log( count );
callback(err);
});
},callback);
},
function(callback) {
async.waterfall(
[
function(callback) {
var employee = new Employee({
"fname": "abc",
"lname": "xyz"
});
employee.save(function(err,employee) {
console.log(employee),
callback(err,employee);
});
},
function(employee,callback) {
var avail = new Availability({
"is_afternoon_scheduled": true,
"employee_id": employee
});
avail.save(function(err,avail) {
console.log(avail);
callback(err);
});
}
],
callback
);
},
function(callback) {
Availability.aggregate(
[
{ "$group": {
"_id": "$employee_id",
"count": { "$sum": 1 }
}}
],
function(err,results) {
results = results.map(function(result) {
return new Result( result );
});
Employee.populate(results,{ "path": "_id" },function(err,results) {
console.log(results);
callback(err);
});
}
);
}
],
function(err,result) {
if (err) throw err;
mongoose.disconnect();
}
);
That's the complete example, but taking a closer look at what happens inside the aggregate result is the main point:
function(err,results) {
results = results.map(function(result) {
return new Result( result );
});
Employee.populate(results,{ "path": "_id" },function(err,results) {
console.log(results);
callback(err);
});
}
The first thing to be aware of is that the results returned by .aggregate() are not mongoose documents as they would be in a .find() query. This is because aggregation pipelines typically alter the document in results from what the original schema looked like. Since it is just a raw object, each element is re-cast as a mongoose document for the Result model type defined earlier.
Now in order to .populate() with data from Employee, the model form of this method is called on the array of results in document object form along with the "path" argument to the field to be populated.
The end result fills is the data as it comes from the Employee model it was related to.
[ { _id:
{ _id: 54ab2e3328f21063640cf446,
fname: 'abc',
lname: 'xyz',
__v: 0 },
count: 1 } ]
Different to how you process with find, but it is necessary to "re-cast" and manually call in this way due to how the results are returned.
This is working like applied populate with aggregate using inner query.
var mongoose = require("mongoose");
var empid = mongoose.Types.ObjectId("54a0d4c5bffabd6a179834eb");
Availability.aggregate()
.match( { employee_id : empid} )
.group({_id : "$employee_id",count: { $sum: 1 }})
.exec(function (err, response) {
if (err) console.log(err);
if (response.length) {
var x = 0;
for (var i=0; i< response.length; i++) {
empID = response[i]._id;
if (x === response.length -1 ) {
User.find({_id: empID}, function(err, users){
res.json({"message": "success", "data": users, "status_code": "200"});
});
}
x++;
}
}
}
);