Loop through Mongo Collection and update a field in every document - node.js

I have Dates in one Collection that were inserted incorrectly, and are in a simple "2015-09-10" string format.
I'd like to update them to correct ISO Date format.
I've tried looping through Mongo with forEach() but I don't know the shell well enough on how to update each document in the collection.
So far I'm at this point:
db.getCollection('schedules').find({}).forEach(function (doc) {
doc.time = new Date( doc.time ).toUTCString();
printjson( doc.time );
// ^ This just prints "Invalid Date"
// Also none of the below work when I try saving them
//doc.save();
//db.getCollection('schedules').save(doc);
});
What's missing here?

The best way to do this is using "Bulk" operations
var collection = db.getCollection('schedules');
var bulkOp = collection.initializeOrderedBulkOp();
var count = 0;
collection.find().forEach(function(doc) {
bulkOp.find({ '_id': doc._id }).updateOne({
'$set': { 'time': new Date(doc.time) }
});
count++;
if(count % 100 === 0) {
// Execute per 100 operations and re-init
bulkOp.execute();
bulkOp = collection.initializeOrderedBulkOp();
}
});
// Clean up queues
if(count > 0) {
bulkOp.execute();
}

Just write a for loop using .find(), and update each result. For instance, in Python/PyMongo, let's assume we have a collection called 'movies' that we want to update by adding a field called 'reviews', whose value we want to be an array of five objects with 'name' and 'rating' fields. we'll use Faker and random to create some random info for these fields:
from pymongo import MongoClient
from faker import Faker
faker = Faker()
import random
client = MongoClient()
db = client.test
for res in db.movies.find():
db.movies.updata_one(res, {'$set':{'reviews':[{'name':faker.name(), 'rating': random.randint(1,5)} for _ in range(5)]}})
Note that if you're using native Mongo, then you should be using updateOne instead of update_one. I presume a similar approach works in JavaScript, just using for (let res of db.movies.find()) syntax

Related

How to search data in mongodb with dynamic fields using mongoose?

I've a node.js api in which user sends the required fields as an array to be fetched from the mongodb database. I need to find the data of that fields using Find query. I've written forEach statement to loop through that array and got the array elements. But when I try to get the results by inserting the array elements in the query, it doesn't giving the required results. Could any one please help me in resolving the issue by seeing the code below?
templateLevelGraphData: async function(tid,payload){
let err, templateData, respData = [], test, currentValue;
[err,templateData] = await to(Template.findById(tid));
var templateId = templateData.templateId;
payload.variables.forEach(async data=>{
console.log(data); //data has the array elements like variables=["humidity"]
[err, currentValue] = await to(mongoose.connection.db.collection(templateId).find({},{data:1}).sort({"entryDayTime":-1}).limit(1).toArray());
console.log(currentValue);
});
return "success";
}
The expected output is,
[ { humidity: 36 } ]
But I'm getting only _id like,
[ { _id: 5dce3a2df89ab63ee4d95495 } ]
I think data is not applying in the query. But I'm printing the data in the console where it's giving the correct results by displaying the array elements like, humidity. What I need to do to make it work?
When you are passing {data: 1} you are passing an array where is expecting name of column.
You have to create an object where the keys are going to be the elements of the array and set them to 1.
const projection = data.reduce((a,b) => (a[b]=1, a), {});
[...] .find({}, projection) [...]
Actually I got the solution.
for(let i=0;i<payload.variables.length;i++){
var test = '{"'+ payload.variables[i] +'":1,"_id":0}';
var query = JSON.parse(test);
[err, currentValue] = await to(mongoose.connection.db.collection(templateId).find({"deviceId":deviceId},query).sort({"entryDayTime":-1}).limit(1).toArray());
console.log(currentValue); //It's giving the solution
}

CoreMongooseArray to Normal Array

I'm shortlisting the 2 elements from one schema and want to update in another schema. for that i used slice method to shortlist first 2 elements from an array. but am Getting
CoreMongooseArray ['element1','element2']
instead of ["element1", "element2"]
How do i remove "CoreMongooseArray" ?
connection.connectedusers.find({}, async (err, docs) => {
if(err) throw err;
var users = docs[0].connectArray;
if (docs[0] != null && users.length >= 2) {
var shortListed = users.slice(0, 2);
try {
await connection.chatschema.updateMany({}, { $push: { usersConnected: [shortListed] } }, { upsert: true });
} catch (err) {
res.status(201).json(err);
}
}
You need to add lean() to your query.
From the docs:
Documents returned from queries with the lean option enabled are plain javascript objects, not Mongoose Documents. They have no save method, getters/setters, virtuals, or other Mongoose features.
If you already got the mongoose array and like to convert to simple js array
const jsArray = mongooseArray.toObject();
https://mongoosejs.com/docs/api/array.html#mongoosearray_MongooseArray-toObject
For some reason .toObject() didn't work for me. lean() option works, but it's not suitable when you already have an object with mongoose array in it. So in case if you already have mongoose array and you want just to convert it to plain js array you can use following code:
function mongooseArrayToArray(mongooseArray) {
const array = [];
for (let i = 0; i < mongooseArray.length; i += 1) {
array.push(mongooseArray[0]);
}
return array;
};
usage:
const array = mongooseArrayToArray(mongooseArray);
If you just want to convert the CoreMongooseArray to a normal array without changing anything else:
const jsArray = [...mongooseArray];

MongoDB: How to get timestamp and ObjectID from newest document in collection?

I would like to get last created documents in collection and return their objectID and timestamps. For example if yesterday I created 10 documents I would like to return them with db.collection and then
const lastTimeStamp = will be the timestamp from the last created element
const lastTimeStampArray = will be array of timestamps from yesterdays records
const lastObjectId = will be ObjectID of last created document
const lastObjectIdsArray = array of last objectIds
I am using:
MongoDB's _id field has info about date stored in itself. The timestamp is contained in the first 4 bytes of a mongoDB id.
You can use ObjectId.getTimestamp() function to get time from _id of a document.
Sorting on an _id field that stores ObjectId values is roughly equivalent to sorting by creation time.
For you question:
// To get lastTimeStamp
db.collection.find().sort({ '_id': -1}).limit(1).forEach(
function(doc){
lastTimeStamp = doc._id.getTimestamp();
}
)
// to get lastObjectId
db.collection.find().sort({ '_id': -1}).limit(1).forEach(
function(doc){
lastObjectId = doc._id;
}
)
Now, to get all records inserted yesterday might be a bit of hard work. You need to extract all records inserted yesterday and from that you need to extract information you need.
// to get lastTimeStampArray and lastObjectIdsArray
var yesterdayStart = new Date();
yesterdayStart.setDate(yesterdayStart.getDate() - 1);
yesterdayStart.setHours(0,0,0,0);
var startId = Math.floor(yesterdayStart.getTime() / 1000).toString(16) + "0000000000000000";
var yesterdayEnd = new Date();
yesterdayEnd.setDate(yesterdayEnd.getDate() - 1);
yesterdayEnd.setHours(23,59,59,999);
var endId = Math.floor(yesterdayEnd.getTime() / 1000).toString(16) + "0000000000000000";
var lastTimeStampArray = [];
var lastObjectIdsArray = [];
db.collection("records")
.find( { _id: {
$gte: ObjectId(startId),
$lte: ObjectId(endId)
}
}
).forEach(
function(doc){
lastObjectIdsArray.push(doc._id);
lastTimeStampArray.push(doc._id.getTimestamp());
});
These are mongo shell commands you can write your node.js accordingly.
You can get last inserted record with timestamp using the following:
db.collection.find().sort({ '_id': -1 }).limit(1).forEach(
function(doc){
print("record:"+doc._id.getTimestamp());
})
_id is the Mongodb objectID

Nested transactions with pg-promise

I am using NodeJS, PostgreSQL and the amazing pg-promise library. In my case, I want to execute three main queries:
Insert one tweet in the table 'tweets'.
In case there is hashtags in the tweet, insert them into another table 'hashtags'
Them link both tweet and hashtag in a third table 'hashtagmap' (many to many relational table)
Here is a sample of the request's body (JSON):
{
"id":"12344444",
"created_at":"1999-01-08 04:05:06 -8:00",
"userid":"#postman",
"tweet":"This is the first test from postman!",
"coordinates":"",
"favorite_count":"0",
"retweet_count":"2",
"hashtags":{
"0":{
"name":"test",
"relevancetraffic":"f",
"relevancedisaster":"f"
},
"1":{
"name":"postman",
"relevancetraffic":"f",
"relevancedisaster":"f"
},
"2":{
"name":"bestApp",
"relevancetraffic":"f",
"relevancedisaster":"f"
}
}
All the fields above should be included in the table "tweets" besides hashtags, that in turn should be included in the table "hashtags".
Here is the code I am using based on Nested transactions from pg-promise docs inside a NodeJS module. I guess I need nested transactions because I need to know both tweet_id and hashtag_id in order to link them in the hashtagmap table.
// Columns
var tweetCols = ['id','created_at','userid','tweet','coordinates','favorite_count','retweet_count'];
var hashtagCols = ['name','relevancetraffic','relevancedisaster'];
//pgp Column Sets
var cs_tweets = new pgp.helpers.ColumnSet(tweetCols, {table: 'tweets'});
var cs_hashtags = new pgp.helpers.ColumnSet(hashtagCols, {table:'hashtags'});
return{
// Transactions
add: body =>
rep.tx(t => {
return t.one(pgp.helpers.insert(body,cs_tweets)+" ON CONFLICT(id) DO UPDATE SET coordinates = "+body.coordinates+" RETURNING id")
.then(tweet => {
var queries = [];
for(var i = 0; i < body.hashtags.length; i++){
queries.push(
t.tx(t1 => {
return t1.one(pgp.helpers.insert(body.hashtags[i],cs_hashtags) + "ON CONFLICT(name) DO UPDATE SET fool ='f' RETURNING id")
.then(hash =>{
t1.tx(t2 =>{
return t2.none("INSERT INTO hashtagmap(tweetid,hashtagid) VALUES("+tweet.id+","+hash.id+") ON CONFLICT DO NOTHING");
});
});
}));
}
return t.batch(queries);
});
})
}
The problem is with this code I am being able to successfully insert the tweet but nothing happens then. I cannot insert the hashtags nor link the hashtag to the tweets.
Sorry but I am new to coding so I guess I didn't understood how to properly return from the transaction and how to perform this simple task. Hope you can help me.
Thank you in advance.
Jean
Improving on Jean Phelippe's own answer:
// Columns
var tweetCols = ['id', 'created_at', 'userid', 'tweet', 'coordinates', 'favorite_count', 'retweet_count'];
var hashtagCols = ['name', 'relevancetraffic', 'relevancedisaster'];
//pgp Column Sets
var cs_tweets = new pgp.helpers.ColumnSet(tweetCols, {table: 'tweets'});
var cs_hashtags = new pgp.helpers.ColumnSet(hashtagCols, {table: 'hashtags'});
return {
/* Tweets */
// Add a new tweet and update the corresponding hash tags
add: body =>
db.tx(t => {
return t.one(pgp.helpers.insert(body, cs_tweets) + ' ON CONFLICT(id) DO UPDATE SET coordinates = ' + body.coordinates + ' RETURNING id')
.then(tweet => {
var queries = Object.keys(body.hashtags).map((_, idx) => {
return t.one(pgp.helpers.insert(body.hashtags[i], cs_hashtags) + 'ON CONFLICT(name) DO UPDATE SET fool = $1 RETURNING id', 'f')
.then(hash => {
return t.none('INSERT INTO hashtagmap(tweetid, hashtagid) VALUES($1, $2) ON CONFLICT DO NOTHING', [+tweet.id, +hash.id]);
});
});
return t.batch(queries);
});
})
.then(data => {
// transaction was committed;
// data = [null, null,...] as per t.none('INSERT INTO hashtagmap...
})
.catch(error => {
// transaction rolled back
})
},
NOTES:
As per my notes earlier, you must chain all queries, or else you will end up with loose promises
Stay away from nested transactions, unless you understand exactly how they work in PostgreSQL (read this, and specifically the Limitations section).
Avoid manual query formatting, it is not safe, always rely on the library's query formatting.
Unless you are passing the result of transaction somewhere else, you should at least provide the .catch handler.
P.S. For the syntax like +tweet.id, it is the same as parseInt(tweet.id), just shorter, in case those are strings ;)
For those who will face similar problem, I will post the answer.
Firstly, my mistakes:
In the for loop : body.hashtag.length doesn't exist because I am dealing with an object (very basic mistake here). Changed to Object.keys(body.hashtags).length
Why using so many transactions? Following the answer by vitaly-t in: Interdependent Transactions with pg-promise I removed the extra transactions. It's not yet clear for me how you can open one transaction and use the result of one query into another in the same transaction.
Here is the final code:
// Columns
var tweetCols = ['id','created_at','userid','tweet','coordinates','favorite_count','retweet_count'];
var hashtagCols = ['name','relevancetraffic','relevancedisaster'];
//pgp Column Sets
var cs_tweets = new pgp.helpers.ColumnSet(tweetCols, {table: 'tweets'});
var cs_hashtags = new pgp.helpers.ColumnSet(hashtagCols, {table:'hashtags'});
return {
/* Tweets */
// Add a new tweet and update the corresponding hashtags
add: body =>
rep.tx(t => {
return t.one(pgp.helpers.insert(body,cs_tweets)+" ON CONFLICT(id) DO UPDATE SET coordinates = "+body.coordinates+" RETURNING id")
.then(tweet => {
var queries = [];
for(var i = 0; i < Object.keys(body.hashtags).length; i++){
queries.push(
t.one(pgp.helpers.insert(body.hashtags[i],cs_hashtags) + "ON CONFLICT(name) DO UPDATE SET fool ='f' RETURNING id")
.then(hash =>{
t.none("INSERT INTO hashtagmap(tweetid,hashtagid) VALUES("+tweet.id+","+hash.id+") ON CONFLICT DO NOTHING");
})
);
}
return t.batch(queries);
});
}),

MongoDB race conditions or concurency issues

I have the following code in my chat application based on NodeJS and MongoDB to change admin for room:
export function setAdmin(room, user) {
const userGuid = getGuid(user);
if (room.users && room.users.length) {
// filter current and new
room.users = room.users.filter(guid =>
guid !== room.adminGuid && guid !== userGuid
);
} else {
room.users = [];
}
room.users.push(userGuid);
room.adminGuid = userGuid;
return roomsCollection.save(room, { w: 1 });
}
Each room have only 2 users: admin and customer.
To update several rooms:
const rooms = await roomsCollection.find({ adminGuid: currentAdminGuid }).toArray();
for (const room of rooms) {
await setAdmin(room, newAdminUser);
}
I had some problems under highload with my application. They were resolved with help of indexes.
But after, working with MongoDB dump I found out that I have rooms with 3 users guids in room.users array. I think that save works as update for exist document, but how it updates array? If $set, why I have such 3 users rooms. Any thoughts how it would be possible?
save() behaves as insert if document does not exist. You have to use update() with {upsert: false}
some resources to update array: Array Query, array Operator
simple tutorial to nest array inside document, here
advanced example of dealing with multiple nested array, here

Resources