Extend sphinx search output with index names - node.js

I have 2 sphinx indexes: index1 and index2.
When I search in index1: I've got two matches:
{ error: '',
warning: '',
status: [ 0 ],
fields: [ 'name' ],
attrs: [],
matches:
[ { id: 5731, weight: 2, attrs: {} },
{ id: 17236, weight: 2, attrs: {} } ],
total: 2,
total_found: 2,
time: 0,
words: [ { word: '*foo*', docs: 2, hits: 4 } ] }
Now I can fetch those 2 records from database and return to client.
When I search same term in index2: I've got three matches:
{ error: '',
warning: '',
status: [ 0 ],
fields: [ 'name' ],
attrs: [],
matches:
[ { id: 28, weight: 1, attrs: {} },
{ id: 41, weight: 1, attrs: {} },
{ id: 42, weight: 1, attrs: {} } ],
total: 3,
total_found: 3,
time: 0,
words: [ { word: '*foo*', docs: 3, hits: 3 } ] }
Now I can fetch those 3 records from database and return to client.
When I search in all indexes I've got five records:
{ error: '',
warning: '',
status: [ 0 ],
fields: [ 'name' ],
attrs: [],
matches:
[ { id: 5731, weight: 2, attrs: {} },
{ id: 17236, weight: 2, attrs: {} },
{ id: 28, weight: 1, attrs: {} },
{ id: 41, weight: 1, attrs: {} },
{ id: 42, weight: 1, attrs: {} } ],
total: 5,
total_found: 5,
time: 0,
words: [ { word: '*foo*', docs: 5, hits: 7 } ] }
The problem is indexes build on different database tables. So I don't know actually what to do with matches cause ids reference to different tables.
How can I get index names with search results or sources or something to know what exactly have been found?
I'm using sphinxapi node.js client if it matters.

Add an explicit attribute to the indexes :)
source index1 {
sql_query = SELECT id, title, 1 as idx FROM ...
sql_attr_uint = idx:2
source index2 {
sql_query = SELECT id, title, 2 as idx FROM ...
sql_attr_uint = idx:2
(the number in sql_attr_uint is the number of bits for the attribute)

Related

Mongoose DB - Find inside an array

I have the following data inside a mongoose collection:
{
map: 'Sydney',
Postcode: 2000,
mapItems: [
{
team: 'NONE',
teamIcon: 20,
x: 0.6092914,
y: 0.28168318,
flags: 0
},
{
team: 'Alpha',
teamIcon: 33,
x: 0.63026464,
y: 0.41642973,
flags: 0
},
{
team: 'Bravo',
teamIcon: 20,
x: 0.63026464,
y: 0.41642973,
flags: 0
},
{
team: 'Alpha',
teamIcon: 20,
x: 0.63026464,
y: 0.41642973,
flags: 0
}
}
I'm trying to return just the mapItems that have the team as "Alpha" and teamIcon is 33 or 52.
dyDB
.find({
$or: [
{
"mapItems.teamIcon": 33,
},
{
"mapItems.teamIcon": 52,
},
],
$and: [
{
"mapItems.teamId": "Alpha" },
},
],
})
.then((data) => {
for (const dyn of data) {
console.log(dyn);
}
});
But it just returns everything and doesn't seem to filter it. I'm not sure what else to try. Can anyone give some pointers?
You have to use or inside and:
dyDB.find({
mapItems: {
$elemMatch: {
$and: [
{ $or: [{ teamIcon: 52 }, { teamIcon: 33 }] },
{ team: "Alpha" },
],
},
},
})
.then((data) => {
for (const dyn of data) {
console.log(dyn);
}
});

Counting value of subdocuments in mongoose subquery

I have a collection of documents that look like this:
[
{ group_id: 1, value: 'foo' },
{ group_id: 1, value: 'bar' },
{ group_id: 1, value: 'bar' },
{ group_id: 1, value: 'bar' },
{ group_id: 2, value: 'bar' },
{ group_id: 2, value: 'foo' },
{ group_id: 2, value: 'foo' },
{ group_id: 2, value: 'foo' }
]
For each group_id I want to return the value that occurs the most. So my output should look something like this...
[
{ group_id: 1, maxValue: 'bar', maxValueCount: 3 },
{ group_id: 2, maxValue: 'foo', maxValueCount: 3 }
]
How would you do this using the Mongoose aggregate function?
Update:
This is as far as I've gotten, I just need to return the value of the maximum count now...
const records = await Record.aggregate([
{
$group: {
_id: {
id: '$group_id',
value: '$value'
},
count: { $sum: 1 }
}
}
])
You may achieve your goal by
grouping by group_id/value (and counting the occurrences of value)
then grouping by group_id and pushing all the found values within with their count
finally keeping from the array the value having the max count
data=[
{ group_id: 1, value: 'foo' },
{ group_id: 1, value: 'bar' },
{ group_id: 1, value: 'bar' },
{ group_id: 1, value: 'bar' },
{ group_id: 1, value: 'bar' },//one more added
{ group_id: 2, value: 'bar' },
{ group_id: 2, value: 'foo' },
{ group_id: 2, value: 'foo' },
{ group_id: 2, value: 'foo' }
]
db.products.remove({})
db.products.insert(data)
const stages = [
{
$group: {
_id: {
group_id: '$group_id',
value: '$value'
},
n: { $sum: 1 }
}
},
{
$group: {
_id: '$_id.group_id',
values: {
$push: {
value: '$_id.value',
n: '$n'
}
}
}
},
{
$project: {
group_id:1,
best: {
$reduce: {
input: '$values',
initialValue: { n: 0, value: ''},
in: {
$cond: [
{
$lt: ['$$value.n', '$$this.n']
},
'$$this',
'$$value'
]
}
}
}
},
},
{
$project: {
group_id: 1,
value: '$best.value',
maxValue: '$best.n'
}
}
]
printjson(db.products.aggregate(stages).toArray())
playground

$lookup on object array with table

I have data object that contains array. And I have providers table.
if array's Id should equals to provider's table id Id == id
if id is repeated take the repeated count as membersCounts else membersCounts = 0
Add the membersCounts with data object
data object
const data = {
Milk: [
{
Id: 1,
name: 'a'
},
{
Id: 2,
name: 'b'
},
{
Id: 3,
name: 'c'
},
{
Id: 4,
name: 'd'
},
{
Id: 52,
name: 'e'
}
],
Grocery: [
{
Id: 8,
name: '2a'
},
{
Id: 22,
name: '2b'
},
{
Id: 32,
name: '2c'
},
{
Id: 42,
name: '2d'
}
]
}
providers table
const providers = [
{
id: 1,
status: 'active'
},
{
id: 1,
status: 'active'
},
{
id: 1,
status: 'active'
},
{
id: 1,
status: 'active'
},
{
id: 4,
status: 'active'
},
{
id: 2,
status: 'active'
},
{
id: 3,
status: 'active'
},
{
id: 3,
status: 'active'
},
{
id: 52,
status: 'active'
},
{
id: 1,
status: 'active'
}
]
here javascript code
this code is working good but I want perform this with mongodb queries. So that performance is good .
Is possible to do with mongodb query. I need to covert the javascript code to mongodb query.
getMembersWithVendors(data, providers) {
for (var key in data) {
var arr = data[key]
arr.forEach((element) => {
element.memberCounts = 0
element.new = true
providers.map((el) => {
if (element._id == el.vendorId) {
(element.memberCounts = element.memberCounts + 1),
(element.new = false)
}
})
})
}
return data
}
output
{ Milk:
[ { Id: 1, name: 'a', memberCounts: 5 },
{ Id: 2, name: 'b', memberCounts: 1 },
{ Id: 3, name: 'c', memberCounts: 2 },
{ Id: 4, name: 'd', memberCounts: 1 },
{ Id: 52, name: 'e', memberCounts: 1 } ],
Grocery:
[ { Id: 8, name: '2a', memberCounts: 0 },
{ Id: 22, name: '2b', memberCounts: 0 },
{ Id: 32, name: '2c', memberCounts: 0 },
{ Id: 42, name: '2d', memberCounts: 0 } ] }
Thanks !!
this code is working good but I want perform this with mongodb queries. So that performance is good
That is not good idea to do all the operations in query, it may cause performance issues, because your input data is so big, but you can improve some things,
$group by query id and get count, this will return unique ids and its total counts
let providers = await db.providers.aggregate([
{
$group: {
_id: "$id",
count: { $sum: 1 }
}
}
]);
iterate loop of object's array
find from providers on the base of id
get count from filtered document
for (let key in data) {
data[key].forEach(e => {
let p = providers.find(p => p._id === e.Id);
e.memberCounts = p ? p.count : 0;
})
}
console.log(data);
Repl Playground

Mongodb use findAndModify on specific document among multiple match

I have documents that look like below
let object = [
{
id: 4,
parents:
[
1, 2,
],
children: 2
},
{
id: 5,
parents:
[
1, 2,
],
children: 1
},
{
id: 8,
parents:
[
1, 2, 4
],
children: 0
},
{
id: 9,
parents:
[
1, 2, 4
],
children: 0
},
{
id: 10,
parents:
[
1,2,5
],
children:0
}
]
I would like to use findAndModify in order to update the children value. It has to be findAndModify because I will be working in an multithreaded environment so selection and update must happen in single transaction.
The conditions that I am looking for is when '2' is included in the parents array and children value is less than 2 where the children value is highest and parents count is lowest among the sufficing documents. Then I would like to increment the value of children of the first matching document.
The query that I have come up with right now is below
let query =
{
parents:
{
$elemMatch:2
},
children:
{
$lt: 2
}
};
which suffices first few conditionals but unfortunately I don't know how I can select out
{
id: 5,
parents:
[
1, 2,
],
children: 1
},
out of
{
id: 8,
parents:
[
1, 2, 4
],
children: 0
},
{
id: 9,
parents:
[
1, 2, 4
],
children: 0
},
{
id: 10,
parents:
[
1,2,5
],
children:0
}
which is what is currently selected with my query. Again, it has to be a single transaction so writing out another set of query is not possible.
As to why it has to be single transaction, refer to this post How to limit maximum reference of the parental node in mongodb
Try this
query.findAndModify({
query: { $and: [ { children :{$lt : 2 }}, { parents: { $size: 2 } } ] },
update: { according to you }
});

MongoDB sort used more than the maximum RAM, but is using an index?

I'm debugging a site search feature with paginated results that is failing after an arbitrary number of pages.
search: Executor error during find command: OperationFailed: Sort operation used more than the maximum 33554432 bytes of RAM. Add an index, or specify a smaller limit.
However, I'm confident that it is using an index:
winningPlan:
{ stage: 'PROJECTION',
transformBy:
{ score: { '$meta': 'textScore' },
shootId: 1,
title: 1,
publishDate: 1,
'media.images.indexImage': 1,
'media.images.adImage': 1,
'media.images.galleryImages': 1,
'media.images.portrait': 1,
'media.promos.images.410': 1,
'media.full.full.length': 1,
contentProducer: 1,
performers: 1,
site: 1 },
inputStage:
{ stage: 'TEXT',
indexPrefix: {},
indexName: 'textSearchIndex',
parsedTextQuery:
{ terms: [ 'milk' ],
negatedTerms: [],
phrases: [],
negatedPhrases: [] },
textIndexVersion: 3,
inputStage:
{ stage: 'TEXT_MATCH',
inputStage:
{ stage: 'TEXT_OR',
filter:
{ '$and':
[ { 'status.publishStatus': { '$eq': 'PUBLISHED' } },
{ publishDate: { '$lte': 2017-03-17T21:30:29.254Z } } ] },
inputStage:
{ stage: 'IXSCAN',
keyPattern:
{ _fts: 'text',
_ftsx: 1,
'status.publishStatus': 1,
publishDate: -1 },
indexName: 'textSearchIndex',
isMultiKey: true,
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'backward',
indexBounds: {} } } } } },
rejectedPlans: [] },
The collection is ~1gb, 35k objects, average size 28.3 KiB.
The index, which is 22.8 MiB:
{
"description" : "text",
"performers.performerName" : "text",
"site.shortName" : "text",
"summary" : "text",
"title" : "text",
"status.publishStatus" : 1,
"publishDate" : -1
}
The query:
{
'status.publishStatus': 'PUBLISHED',
'publishDate': {
'$lte': new Date("Fri, 17 Mar 2017 21:30:29 GMT")
},
'$text': {
'$search': 'milk'
}
}
The sort:
{
score: {
$meta: 'textScore'
},
publishDate: -1
}
Practically, it's doing a find, sort, skip, limit, lean, select and finally exec.
Is it actually indexed? If it's not, what should I do differently?
If it is indexed, should I be increasing the internalQueryExecMaxBlockingSortBytes ?

Resources