Elasticsearch Reindex missing out some records - node.js

I have a import job happening once a week, which inserts all the records from MongoDB to ElasticSearch.
What i am doing is the following:
Records already exist in 'main' index
I insert all the new records into 'main-temp' index
I delete the 'main' index
I reindex 'main-temp' to 'main'
I delete the 'main-temp' index
I am running the operation locally on the same data set.
What i am noticing is that the number of records in the new 'main' index does not match the number of records that got imported to the 'main-temp' index.
Here is the code that i am using
try {
await client.indices.delete({index: "main"})
Logger.info('Old Index Deleted')
await client.indices.create({ index: 'main' })
Logger.info('New Index Created')
await client.reindex({
waitForCompletion: true,
refresh: true,
body: {
source: {
index: 'main-temp'
},
dest: {
index: 'main'
}
}
})
Logger.info('Temp Index Reindexed/Cloned')
await client.indices.delete({index: "main-temp"})
Logger.info('Temp Index Deleted')
} catch(e) {
Logger.error(e)
}
I am using Elastic search 6.8.9, so i can't use Clone API since it is part of 7.X
Check the screenshot below for the results, thing is whenever it reindex's the number of records is different (usually smaller few thousands)
https://i.stack.imgur.com/g1u0J.png
UPDATE: Here is what i get from reindex as response (if i do let result = await )
Sometimes it gets the correct number, sometimes not.
took: 22357,
timed_out: false,
total: 673637,
updated: 0,
created: 673637,
deleted: 0,
batches: 674,
version_conflicts: 0,
noops: 0,
retries: { bulk: 0, search: 0 },
throttled_millis: 0,
requests_per_second: -1,
throttled_until_millis: 0,
failures: []

I fixed this by introducing timeouts after creating/deleting the old index and after reindexing.
Here is the code
try {
await client.indices.delete({index: "main"})
Logger.info('Old Index Deleted')
await client.indices.create({ index: 'main' })
Logger.info('New Index Created')
await new Promise(resolve => setTimeout(resolve, 10000))
await client.reindex({
waitForCompletion: true,
refresh: true,
body: {
source: {
index: 'main-temp'
},
dest: {
index: 'main'
}
}
})
await new Promise(resolve => setTimeout(resolve, 15000))
Logger.info('Temp Index Reindexed/Cloned')
await client.indices.delete({index: "main-temp"})
Logger.info('Temp Index Deleted')
} catch(e) {
Logger.error(e)
}
It seems elasticsearch needs some time to get everything working.

Related

Mongodb How to update documents already found

I'm building a movie rating system.
After entering the user ID, content ID, and rating registered in the rating document,
It was implemented by updating the rating in the content document.
How can I update the content document while I have already found it like the code below?
router.post('/', authenticate, async (req: IRequest<IRating>, res) => {
try {
const document = await Rating.create({
contentId: req.body.contentId,
contentType: req.body.contentType,
rating: req.body.rating,
review: req.body.review,
userId: req.user?._id,
});
const content = await Content.findOne({
type: req.body.contentType,
_id: document._id,
});
if (content) {
await content.updateOne(
{},
{
average: (content.average + document.rating) / content.count + 1,
count: content.count + 1,
},
);
}
res.json({ success: true, document, content });
} catch (err) {
res.status(404).json({ success: false, message: 'sever error' });
}
});
You can update with pipeline instead of use 2 queries, which for your current code can look like:
await Content.findOneAndUpdate(
{
type: req.body.contentType,
_id: document._id,
},
[{$set: {
average: {$divide: [{$add: ["$content.average", document.rating]},
{$add: ["$content.count", 1]}]},
count: {$add: ["$content.count", 1]}
}}])
But I don't think this is the right way to calculate an average. You should consider multiplying the existing $content.average by $content.count before adding the new document.rating

How to show list of field values of a collection of objects

I am making an API that shows a collection of ads with MongoDB and Node.js
I need to display the list of collection tags in a JSON string.
Example: 'home', 'mobile', 'motor'
This is the API initializer code:
const readline = require('readline');
const Product = require('./models/Product');
async function main() {
const advance = await question('Are you sure to continue with the deletion of the database? (yes or no) ')
if (!advance) {
process.exit();
}
const connection = require('./lib/connectMongoose')
await initProducts();
connection.close();
}
async function initProducts() {
const deleted = await Product.deleteMany();
console.log(`Remove ${deleted.deletedCount} products.`);
const inserted = await Product.insertMany([
{name: 'Table', sale: true, price: 150, photo: 'Table.png', tags: ['home']},
{name: 'Iphone 13 pro', sale: false, price: 950, photo: 'Iphone 13 pro.png', tags: ['mobile']},
{name: 'Car Mini Cooper', sale: true, price: 1000, photo: 'Car Mini Cooper.png', tags: ['motor']}
]);
console.log(`Create ${inserted.length} products.`)
}
main().catch(err => console.log('Hubo un error', err))
function question(text) {
return new Promise((resolve, reject) => {
const interface = readline.createInterface({
input: process.stdin,
output: process.stdout
});
interface.question(text, answer => {
interface.close();
if (answer.toLowerCase() === 'yes') {
resolve(true);
return;
}
resolve(false);
})
})
}
I need to find a MongoDB method that allows me to show when the API route calls the list that shows in JSON format all the tags that the collection includes
If I've understood correctly, one option is $unwind the tags array to get all tags as strings and be able to $group adding to a set to avoid duplicates.
db.collection.aggregate([
{
"$unwind": "$tags"
},
{
"$group": {
"_id": null,
"tags": {
"$addToSet": "$tags"
}
}
}
])
I think this works but $unwind and $group the entire collection is not always a good idea. It may be a slow process.
Example here

Mongoose: updateMany() is not working as expected

I'm using mongoose to handle my DB queries. I'm trying to update a set of records entirely using this method. Mode code looks like this:
// prepare database query
const filter = { type: 'company' };
const update = req.body.payload; // payload contains the array of objects (i.e. updated records)
const options = { new: true, runValidators: true }
// find and update the taxonomy record
await Taxonomy.updateMany(filter, update, options);
But whenever I run this query I'm getting following error in the console:
Error [MongooseError]: Invalid update pipeline operator: "_id"
I suppose there is something wrong in my update payload. The req.body.payload looks like this:
[
{
_id: '5ef3d08c745428001d92f896',
type: 'company',
name: 'Company Size',
__v: 0
},
{
_id: '5ef3cdc5745428001d92f893',
type: 'company',
name: 'Company Industry',
__v: 0
}
]
Can you please tell me what actually is wrong here?
This is not the right usage of updateMany() - it is aimed to update many documents with a single change.
To update many documents use bulkwrite() (docs) :
async function myUpdateMany(Model, objectsArray) {
try {
let ops = []
for (let obj of (objectsArray || [])) {
ops.push({
updateOne: {
filter: { platformId: obj.platformId },
update: obj,
upsert: false, // set "true" if you want to add a new document if it doesn't exist
}
})
}
Model.bulkWrite(ops, { ordered: false });
} catch (err) {
throw Error("myUpdateMany error: " + err)
}
}
Regarding runValidators, according to this, it seems to work by default.

How to use mongoose populate and moongose pagination together?

Full code here: https://github.com/kenpeter/dl_r18_img_back
There is a nice package to hook pagination with mongoose:
According to this, it seems I can use populate with pagination, but I am not able to get it working.
list: function(page = 1, limit = 100){
return new Promise(function(resolve, reject){
let options = {
page: parseInt(page),
limit: parseInt(limit),
sort:{
createdDate: -1 //Sort by Date Added DESC
}
};
/*
Image
.paginate({}, options)
.then(function(res) {
resolve && resolve(res);
});
*/
// NOT WOKRING!!!!!
Image
.populate('category')
.execPopulate()
.paginate({}, options)
.then(function(res) {
resolve && resolve(res);
});
/*
Image
.find({})
..populate('category')
.exec()
.then(function(res) {
resolve && resolve(res);
});
*/
});
As you can see there are 2 commented-out code block Image.xxxxx. They are working individually.
How do I put them together?
The documentation says to put a populate option for populating.
Here is a snippet from the doc:
var options = {
select: 'title date author',
sort: { date: -1 },
populate: 'author',
lean: true,
offset: 20,
limit: 10 };
maybe use populate : "category"

Conditionally create new entries using promises in Waterline (Sails.js)

I have an array of "products".
I want to save these products to the database if the database is empty, and when all of the db operations finish i want to display a message.
I could not manage to do it using bluebird promises (using .all or .map). I was able to create an item by just returning Product.create(products[0]). I can't wrap my head around it, I am new to promises.
This is the bootstrap file of my sails.js project but this question is about how to use bluebird promises. How can I manage to wait for multiple async tasks (create 3 products) to finish and then continue?
products = [
{
barcode: 'ABC',
description: 'seed1',
price: 1
},
{
barcode: 'DEF',
description: 'seed2',
price: 2
},
{
barcode: 'GHI',
description: 'seed3',
price: 3
}
];
Product.count()
.then(function(numProducts) {
if (numProducts > 0) {
// if database is not empty, do nothing
console.log('Number of product records in db: ', numProducts);
} else {
// if database is empty, create seed data
console.log('There are no product records in db.');
// ???
return Promise.map(function(product){
return Product.create(product);
});
}
})
.then(function(input) {
// q2) Also here how can decide to show proper message
//console.log("No seed products created (no need, db already populated).");
// vs
console.log("Seed products created.");
})
.catch(function(err) {
console.log("ERROR: Failed to create seed data.");
});
Figured it out...
products = [
{
barcode: 'ABC',
description: 'seed1',
price: 1
},
{
barcode: 'DEF',
description: 'seed2',
price: 2
},
{
barcode: 'GHI',
description: 'seed3',
price: 3
}
];
Product.count()
.then(function(numProducts) {
//if (numProducts > 0) {
if(false) {
// if database is not empty, do nothing
console.log('Number of product records in db: ', numProducts);
return [];
} else {
// if database is empty, create seed data
console.log('There are no product records in db.');
return products;
}
})
.map(function(product){
console.log("Product created: ", product);
return Product.create(product);
})
.then(function(input) {
console.log("Seed production complete.");
})
.catch(function(err) {
console.log("ERROR: Failed to create seed data.");
});

Resources