I'm trying to aggregate datas by Date in Mongo, but I can't quite achieve what I want.
Right now, I'm using this:
db.aggregData.aggregate( { $group: {_id: "$Date".toString(),
tweets: { $sum: "$CrawledTweets"} } },
{ $match:{ _id: {$gte: ISODate("2013-03-19T12:31:00.247Z") }}},
{ $sort: {Date:-1} }
)
It results with this:
"result" : [
{
"_id" : ISODate("2013-03-19T12:50:00.641Z"),
"tweets" : 114
},
{
"_id" : ISODate("2013-03-19T12:45:00.631Z"),
"tweets" : 114
},
{
"_id" : ISODate("2013-03-19T12:55:00.640Z"),
"tweets" : 123
},
{
"_id" : ISODate("2013-03-19T12:40:00.628Z"),
"tweets" : 91
},
{
"_id" : ISODate("2013-03-19T12:31:00.253Z"),
"tweets" : 43
},
{
"_id" : ISODate("2013-03-19T13:20:00.652Z"),
"tweets" : 125
},
{
"_id" : ISODate("2013-03-19T12:31:00.252Z"),
"tweets" : 30
}
],
"ok" : 1
It seems to do the job, but with further inspection, we see that there is repetition:
ISODate("2013-03-19T12:31:00.253Z") and ISODate("2013-03-19T12:31:00.252Z").
The only thing that changes is the last bit before the Z.
So here is my question. What is this part ? And how can I do to ignore it in the aggregation ?
Thank you in advance.
EDIT: I wanna aggregate by date, so whole year/month/day + hour and minute. I don't care of the rest.
EDIT: My db in on mongolab, so I'm on 2.2
Well, I did it another way: I save all my date with seconds/milliseconds at 0. So I can keep a simple aggregate, with not a little more code server side, thanks to moment.js
You are trying to aggregate by "whole" date, in other words to drop the time from ISODate(), right? There are several ways to do it, I describe them in detail on my blog in the post called
Stupid Date Tricks with Aggregation Framework.
You can see the full step-by-step breakdown there, but to summarize you have two choices:
if you don't care about the aggregated-on value to be an ISODate() then you can use the {$year}, {$month} and {$dayOfMonth} operators in {$project} phase to pull out just Y-M-D to then {$group} on.
if you do care about the grouped-on value staying an ISODate you can {$subtract} the time part in {$project} phase and be left with ISODate() type - the caveat is that this method requires MongoDB 2.4 (just released) which adds support for date arithmetic and for $millisecond operator (see exact code in the blog post).
Here is probably what you want:
db.aggregData.aggregate([
{
$project:{
CrawledTweets: 1,
newDate: {
year:{$year:"$Date"},
month: {$month:"$Date"},
day: {$dayOfMonth:"$Date"},
hour: {$hour: "$Date"},
min: {$minute: "$Date"}
}
}
},
{
$group: {
_id: "$newDate",
tweets: { $sum: "$CrawledTweets"}
}
}
])
Without being a Mongo expert and without knowing your db fields I'd come up with something like this. Perhaps you can build upon this:
db.aggregData.aggregate(
{
$project:{
CrawledTweets: 1,
groupedTime: {
year:{$year:"$_id"},
month: {$month:"$_id"},
day: {$dayOfMonth:"$_id"},
hour: {$hour: "$_id"},
min: {$minute: "$_id"}
}
}
},
{
$group: {
_id: { groupedTime: "$CrawledTweets" },
tweets: { $sum: "$tweets"}
}
}
)
You can now use the MongoDB date aggregation operators, I have a post on my blog that goes over the Schema setup, using it in Node.js, etc:
http://smyl.es/how-to-use-mongodb-date-aggregation-operators-in-node-js-with-mongoose-dayofmonth-dayofyear-dayofweek-etc/
Related
Below is an example of a document in a User collection below.
{
"_id" : 1,
"username" : bob,
"pause" : true,
"pause_date" : ISODate("2021-07-16T07:13:48.680Z"),
"learnt_item" : [
{
"memorized" : false,
"character" : "一",
"next_review" : ISODate("2021-07-20T11:02:44.979Z")
},
{
"memorized" : false,
"character" : "二",
"next_review" : ISODate("2021-07-20T11:02:44.979Z")
},
...
]
}
I need to update all the nested document in "learnt_item" if the "memorized" field is false.
The updates are:
"pause_date" to Null
"pause" to False
Update the ISOdate in "next_review" based on the duration that has passed between "pause_date" and the current time.
E.g. pause_date is 4 hours ago, then I want to add 4 hours to the "next_review"
I was able to achieve 1 & 2 using findOneAndUpdate with arrayFilters and also tested no.3 by updating the "next_review" field with a current date to make sure it is updating correctly.
User.findOneAndUpdate({"_id": req.user._id},
{$set:{"learnt_item.$[elem].next_review": DateTime.local(),"pause_date": null, "pause": value }},
{new:true, arrayFilters: [{"elem.memorized": false}]}).exec((err, doc) =>{if (err){res.send(err)} else {res.send(doc)}});
I was thinking of using the $add aggregation operator to increase the date base
"learnt_item.$[elem].next_review": {$add: ["$learnt_item.$[elem].next_review","$pause_date"]}
However, according to the documentation, arrayFilters is not available for updates that use an aggregation pipeline.
Is there another way more efficient way that I can update the ISOdate?
If you are running MongoDB 4.2 or later you can use a pipeline as the second parameter for the update function, this way you can use the operator $map with $cond to find the entries where the property memorized is equal to false and then add 4 days in milliseconds to the next_review date:
db.collection.update({
"_id": 1
},
[
{
$set: {
"pause_date": null,
"pause": false,
"learnt_item": {
$map: {
input: "$learnt_item",
as: "item",
in: {
$cond: [
{
$eq: [
"$$item.memorized",
false
]
},
{
memorized: "$$item.memorized",
character: "$$item.character",
next_review: {
$add: [
"$$item.next_review",
345600000
]
}
},
"$$item"
]
}
}
},
}
}
],
{
new: true,
});
You can check a running example here: https://mongoplayground.net/p/oHh1JWiP8vs
Imaging I have an array of objects, available before the aggregate query:
const groupBy = [
{
realm: 1,
latest_timestamp: 1318874398, //Date.now() values, usually different to each other
item_id: 1234, //always the same
},
{
realm: 2,
latest_timestamp: 1312467986, //actually it's $max timestamp field from the collection
item_id: 1234,
},
{
realm: ..., //there are many of them
latest_timestamp: ...,
item_id: 1234,
},
{
realm: 10,
latest_timestamp: 1318874398, //but sometimes then can be the same
item_id: 1234,
},
]
And collection (example set available on MongoPlayground) with the following schema:
{
realm: Number,
timestamp: Number,
item_id: Number,
field: Number, //any other useless fields in this case
}
My problem is, how to $group the values from the collection via the aggregation framework by using the already available set of data (from groupBy) ?
What have been tried already.
Okay, let skip crap ideas, like:
for (const element of groupBy) {
//array of `find` queries
}
My current working aggregation query is something like that:
//first stage
{
$match: {
"item": 1234
"realm" [1,2,3,4...,10]
}
},
{
$group: {
_id: {
realm: '$realm',
},
latest_timestamp: {
$max: '$timestamp',
},
data: {
$push: '$$ROOT',
},
},
},
{
$unwind: '$data',
},
{
$addFields: {
'data.latest_timestamp': {
$cond: {
if: {
$eq: ['$data.timestamp', '$latest_timestamp'],
},
then: '$latest_timestamp',
else: '$$REMOVE',
},
},
},
},
{
$replaceRoot: {
newRoot: '$data',
},
},
//At last, after this stages I can do useful job
but I found it a bit obsolete, and I already heard that using [.mapReduce][1] could solve my problem a bit faster, than this query. (But official docs doesn't sound promising about it) Does it true?
As for now, I am using 4 or 5 stages, before start working with useful (for me) documents.
Recent update:
I have checked the $facet stage and I found it curious for this certain case. Probably it will help me out.
For what it's worth:
After receiving documents after the necessary stages I am building a representative cluster chart, that you may also know as a heatmap
After that I was iterating each document (or array of objects) one-by-one to find their correct x and y coordinated in place which should be:
[
{
x: x (number, actual $price),
y: y (number, actual $realm),
value: price * quantity,
quantity: sum_of_quantity_on_price_level
}
]
As for now, it's old awful code with for...loop inside each other, but in the future, I will be using $facet => $bucket operators for that kind of job.
So, I have found an answer to my question in another, but relevant way.
I was thinking about using $facet operator and to be honest, it's still an option, but using it, as below is a bad practice.
//building $facet query before aggregation
const ObjectQuery = {}
for (const realm of realms) {
Object.assign(ObjectQuery, { `${realm.name}` : [ ... ] }
}
//mongoose query here
aggregation([{
$facet: ObjectQuery
},
...
])
So, I have chosen a $project stage and $switch operator to filter results, such as $groups do.
Also, using MapReduce could also solve this problem, but for some reason, the official Mongo docs recommends to avoid using it, and choose aggregation: $group and $merge operators instead.
I have a variable var correctAnswers;
In my MongoDB I have the following document (below). I am trying to write a query that takes all of the "correct" fields from the "quiz" field and put them into their own array, so I can set that array equal to var correctAnswers;.
"title" : "Economics questions"
"quiz": "[{
"question": "Which of these involves the analysis of of a business's financial statements, often used in stock valuation?",
"choices": ["Fundamental analysis", "Technical analysis"],
"correct": 0
}, {
"question": "What was the name of the bond purchasing program started by the U.S. Federal Reserve in response to the 2008 financial crisis?",
"choices": ["Stimulus Package", "Mercantilism", "Quantitative Easing"],
"correct": 2
}, {
"question": "Which term describes a debt security issued by a government, company, or other entity?",
"choices": ["Bond", "Stock", "Mutual fund"],
"correct": 0
}, {
"question": "Which of these companies has the largest market capitalization (as of October 2015)?",
"choices": ["Microsoft", "General Electric", "Apple", "Bank of America"],
"correct": 2
}, {
"question": "Which of these is a measure of the size of an economy?",
"choices": ["Unemployment rate", "Purchasing power index", "Gross Domestic Product"],
"correct": 2
}]"
How should I go about that, or can someone point me in the right direction? I have tried projections, but should I do an aggregation? Thank you for any help.
Edit for clarity: the output I am looking for in this example is an array, [0,2,0,2,2]
you can get this result
[{correct:0},{correct:2},{correct:0},{correct:2}] but [0,2,0,2,2] type of result is not possible unless we use distinct
db.quiz.aggregate(
// Initial document match (uses index, if a suitable one is available)
{ $match: {
"title" : "Economics questions"
}},
// Convert embedded array into stream of documents
{ $unwind: '$quiz' },
},
// Note: Could add a `$group` by _id here if multiple matches are expected
// Final projection: exclude fields with 0, include fields with 1
{ $project: {
_id: 0,
score: "$quiz.correct"
}} )
db.users.find( { }, { "quiz.correct": 1,"_id":0 } )
// above query will return following output :
{
"quiz" : [
{
"correct" : 0
},
{
"correct" : 2
},
{
"correct" : 0
},
{
"correct" : 2
},
{
"correct" : 2
}
]
}
Process this output as per requirement in the node js.
Try this:
db.getCollection('quize').aggregate([
{$match:{_id: id }},
{$unwind:'$quiz'},
{$group:{
_id:null,
score: {$push:"$quiz.correct"}
}}
])
It will give you the expected output.
One way to achieve this through aggregation
db.collectionName.aggregate([
// use index key in match pipeline,just for e.g using title here
{ $match: { "title" : "Economics questions" }},
{ $unwind: "$quiz" },
{ $group: {
_id:null,
quiz: { $push: "$quiz.correct" }
}
},
//this is not required, use projection only if you want to exclude/include fields
{
$project: {_id: 0, quiz: 1}
}
])
Above query will give you the following output
{
"quiz" : [ 0, 2, 0, 2, 2 ]
}
Then simply process this output as per your need.
I have a collection in mongodb as mentioned below.
{ "UserID" : "User1",
"TaskId" : "Task1",
"SubTaskID" : "Subtask1",
"StartDate" : ISODate("2016-02-06T05:00:00Z"),
"EndDate" : ISODate("2016-02-06T05:00:00Z"),
"Hours" : 8, //no of hours worked between StartDate and EndDate on the subtask
"Department:"DEPT1"; //I can have multiple departments
"__v" : 0}
Now I want to calculate the total number of hours worked between startdate and enddate and the total number of hours entered for that subtask in the department by all members within that duration. My output should look like
{
"TaskId":"TaskId1",
"UserId":"UserId1",
"startDate":"2016-02-06",
"endDate":"2016-02-06",
"totalHours": 10.0,
"deptTotalHours":100.0,
"subtasks": [
{
"SubTaskID": "SubTask1",
"totalHours": 4.0,
"deptTotalHours":40.0
},
{
"SubTaskID": "SubTask2",
"totalHours": 6.0,
"deptTotalHours":60.0
}]
}
I have tried with $match,$project and $group but I could not be ble to get the response in the specified format. Can someone please suggest me how to get the response in the above specified format?
Use the following pipeline operation which will give you all the information required for subtask. Then totalHours, deptTotalHours across all subtasks is just sum of records from the sub task result (which you can do it in your application). Rest of the information is available as part of your input itself.
pipeline = [
{
$match: {} // Add criteria based on start and end date, don't include UserID criteria here
},
{
$group: {
_id: '$SubTaskID',
deptTotalHours: {$sum: '$Hours'},
totalHours: {$sum: {$cond:[
{$eq: ['$UserID', inputUserId]},
'$Hours',
0.0
]}
}
}
}
];
I have a database with 800+ different bars, clubs and restaurants across Australia.
I want to build a list of links for my website counting the number of different venues across different suburbs and primary categories.
Like this:
Restaurants, Bowen Hills (15)
Restaurants, Dawes Point (6)
Clubs, Sydney (138)
I could do it the hard way by first getting all venues. Then run a Venue.distinct('details.location.suburb') to get all the unique suburbs.
From here I could run subsequent queries to get the count for the number of venues in that particular suburb and category.
It will be a lot of calls though. There's got to be better way?
Can the Mongo aggregation framework help here?
It seems to be impossible to do this in a single query.
Here's the Venue model:
{
"name" : "Johnny's Bar & Grill",
"meta" : {
"category" : {
"all" : [
"restaurant",
"bar"
],
"primary" : "restaurant"
}
},
"details" : {
"location" : {
"streetNumber" : "180",
"streetName" : "abbotsford road",
"suburb" : "bowen hills",
"city" : "brisbane",
"postcode" : "4006",
"state" : "qld",
"country" : "australia"
},
"contact" : {
"phone" : [
"(07) 5555 5555"
]
}
}
}
}
Here's the prettified solution from BatScream that I ended up using:
Venue.aggregate([
{
$group: {
_id: {
primary: '$meta.category.primary',
suburb: '$details.location.suburb',
country: '$details.location.country',
state: '$details.location.state',
city: '$details.location.city'
},
count: {
$sum: 1
},
type: {
$first: '$meta.category.primary'
}
}
},
{
$sort: {
count: -1
}
},
{
$limit: 50
},
// Reshapes each document in the stream, such as by adding new fields or removing existing fields. For each input document, outputs one document.
{
$project: {
_id: 0,
type : '$type',
location : '$_id.suburb',
count: 1
}
}
],
function(err, res){
next(err, res);
});
}
You can get a very useful and easily transformable output using the following aggregation operation.
Group the records based on their country, category, state, city and
suburb.
Get the count of the records in each group.
Obtain the type of the group from the first record of the group.
Project the necessary fields.
Code:
db.collection.aggregate([
{$group:{"_id":{"primary":"$meta.category.primary",
"suburb":"$details.location.suburb",
"country":"$details.location.country",
"state":"$details.location.state",
"city":"$details.location.city"},
"count":{$sum:1},
"type":{$first:"$meta.category.primary"}}},
{$sort:{"count":-1}},
{$project:{"_id":0,
"type":"$type",
"location":"$_id.suburb",
"count":1}}
])
sample o/p:
{ "count" : 1, "type" : "restaurant", "location" : "bowen hills" }