Multi-field, multi-word, match without query_string - search

I would like to be able to match a multi word search against multiple fields where every word searched is contained in any of the fields, any combination. The catch is I would like to avoid using query_string.
curl -X POST "http://localhost:9200/index/document/1" -d '{"id":1,"firstname":"john","middlename":"clark","lastname":"smith"}'
curl -X POST "http://localhost:9200/index/document/2" -d '{"id":2,"firstname":"john","middlename":"paladini","lastname":"miranda"}'
I would like the search for 'John Smith' to match only document 1. The following query does what I need but I would rather avoid using query_string in case the user passes "OR", "AND" and any of the other advanced params.
curl -X GET 'http://localhost:9200/index/_search?per_page=10&pretty' -d '{
"query": {
"query_string": {
"query": "john smith",
"default_operator": "AND",
"fields": [
"firstname",
"lastname",
"middlename"
]
}
}
}'

What you are looking for is the multi-match query, but it doesn't perform in quite the way you would like.
Compare the output of validate for multi_match vs query_string.
multi_match (with operator and) will make sure that ALL terms exist in at least one field:
curl -XGET 'http://127.0.0.1:9200/_validate/query?pretty=1&explain=true' -d '
{
"multi_match" : {
"operator" : "and",
"fields" : [
"firstname",
"lastname"
],
"query" : "john smith"
}
}
'
# {
# "_shards" : {
# "failed" : 0,
# "successful" : 1,
# "total" : 1
# },
# "explanations" : [
# {
# "index" : "test",
# "explanation" : "((+lastname:john +lastname:smith) | (+firstname:john +firstname:smith))",
# "valid" : true
# }
# ],
# "valid" : true
# }
While query_string (with default_operator AND) will check that EACH term exists in at least one field:
curl -XGET 'http://127.0.0.1:9200/_validate/query?pretty=1&explain=true' -d '
{
"query_string" : {
"fields" : [
"firstname",
"lastname"
],
"query" : "john smith",
"default_operator" : "AND"
}
}
'
# {
# "_shards" : {
# "failed" : 0,
# "successful" : 1,
# "total" : 1
# },
# "explanations" : [
# {
# "index" : "test",
# "explanation" : "+(firstname:john | lastname:john) +(firstname:smith | lastname:smith)",
# "valid" : true
# }
# ],
# "valid" : true
# }
So you have a few choices to achieve what you are after:
Preparse the search terms, to remove things like wildcards, etc, before using the query_string
Preparse the search terms to extract each word, then generate a multi_match query per word
Use index_name in your mapping for the name fields to index their data into a single field, which you can then use for search. (like your own custom all field):
As follows:
curl -XPUT 'http://127.0.0.1:9200/test/?pretty=1' -d '
{
"mappings" : {
"test" : {
"properties" : {
"firstname" : {
"index_name" : "name",
"type" : "string"
},
"lastname" : {
"index_name" : "name",
"type" : "string"
}
}
}
}
}
'
curl -XPOST 'http://127.0.0.1:9200/test/test?pretty=1' -d '
{
"firstname" : "john",
"lastname" : "smith"
}
'
curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1' -d '
{
"query" : {
"match" : {
"name" : {
"operator" : "and",
"query" : "john smith"
}
}
}
}
'
# {
# "hits" : {
# "hits" : [
# {
# "_source" : {
# "firstname" : "john",
# "lastname" : "smith"
# },
# "_score" : 0.2712221,
# "_index" : "test",
# "_id" : "VJFU_RWbRNaeHF9wNM8fRA",
# "_type" : "test"
# }
# ],
# "max_score" : 0.2712221,
# "total" : 1
# },
# "timed_out" : false,
# "_shards" : {
# "failed" : 0,
# "successful" : 5,
# "total" : 5
# },
# "took" : 33
# }
Note however, that firstname and lastname are no longer searchable independently. The data for both fields has been indexed into name.
You could use multi-fields with the path parameter to make them searchable both independently and together, as follows:
curl -XPUT 'http://127.0.0.1:9200/test/?pretty=1' -d '
{
"mappings" : {
"test" : {
"properties" : {
"firstname" : {
"fields" : {
"firstname" : {
"type" : "string"
},
"any_name" : {
"type" : "string"
}
},
"path" : "just_name",
"type" : "multi_field"
},
"lastname" : {
"fields" : {
"any_name" : {
"type" : "string"
},
"lastname" : {
"type" : "string"
}
},
"path" : "just_name",
"type" : "multi_field"
}
}
}
}
}
'
curl -XPOST 'http://127.0.0.1:9200/test/test?pretty=1' -d '
{
"firstname" : "john",
"lastname" : "smith"
}
'
Searching the any_name field works:
curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1' -d '
{
"query" : {
"match" : {
"any_name" : {
"operator" : "and",
"query" : "john smith"
}
}
}
}
'
# {
# "hits" : {
# "hits" : [
# {
# "_source" : {
# "firstname" : "john",
# "lastname" : "smith"
# },
# "_score" : 0.2712221,
# "_index" : "test",
# "_id" : "Xf9qqKt0TpCuyLWioNh-iQ",
# "_type" : "test"
# }
# ],
# "max_score" : 0.2712221,
# "total" : 1
# },
# "timed_out" : false,
# "_shards" : {
# "failed" : 0,
# "successful" : 5,
# "total" : 5
# },
# "took" : 11
# }
Searching firstname for john AND smith doesn't work:
curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1' -d '
{
"query" : {
"match" : {
"firstname" : {
"operator" : "and",
"query" : "john smith"
}
}
}
}
'
# {
# "hits" : {
# "hits" : [],
# "max_score" : null,
# "total" : 0
# },
# "timed_out" : false,
# "_shards" : {
# "failed" : 0,
# "successful" : 5,
# "total" : 5
# },
# "took" : 2
# }
But searching firstname for just john works correctly:
curl -XGET 'http://127.0.0.1:9200/test/test/_search?pretty=1' -d '
{
"query" : {
"match" : {
"firstname" : {
"operator" : "and",
"query" : "john"
}
}
}
}
'
# {
# "hits" : {
# "hits" : [
# {
# "_source" : {
# "firstname" : "john",
# "lastname" : "smith"
# },
# "_score" : 0.30685282,
# "_index" : "test",
# "_id" : "Xf9qqKt0TpCuyLWioNh-iQ",
# "_type" : "test"
# }
# ],
# "max_score" : 0.30685282,
# "total" : 1
# },
# "timed_out" : false,
# "_shards" : {
# "failed" : 0,
# "successful" : 5,
# "total" : 5
# },
# "took" : 3
# }

I would rather avoid using query_string in case the user passes "OR", "AND" and any of the other advanced params.
In my experience, escaping the special characters with backslash is a simple and effective solution. The list can be found in the documentation http://lucene.apache.org/core/4_5_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#package_description, plus AND/OR/NOT/TO.

Nowadays you can use cross_fields type in multi_match
GET /_validate/query?explain
{
"query": {
"multi_match": {
"query": "peter smith",
"type": "cross_fields",
"operator": "and",
"fields": [ "firstname", "lastname", "middlename" ]
}
}
}
Cross-fields take a term-centric approach. It treats all of the fields as one big field, and looks for each term in any field.
One thing to note though is that if you want it to work optimally, all fields analyzed should have the same analyzer (standard, english, etc.):
For the cross_fields query type to work optimally, all fields should
have the same analyzer. Fields that share an analyzer are grouped
together as blended fields.
If you include fields with a different analysis chain, they will be
added to the query in the same way as for best_fields. For instance,
if we added the title field to the preceding query (assuming it uses a
different analyzer), the explanation would be as follows:
(+title:peter +title:smith) ( +blended("peter", fields: [first_name,
last_name]) +blended("smith", fields: [first_name, last_name]) )

I think "match" query is what you are looking for:
"The match family of queries does not go through a “query parsing” process. It does not support field name prefixes, wildcard characters, or other “advance” features. For this reason, chances of it failing are very small / non existent, and it provides an excellent behavior when it comes to just analyze and run that text as a query behavior (which is usually what a text search box does)"
http://www.elasticsearch.org/guide/reference/query-dsl/match-query.html

Related

How to fetch particular array elements from object's array in mongo db

I've a row, which contains data like:
{
"_id" : ObjectId("5bcef76b0c9a4c194cf6d0a7"),
"userid" : ObjectId("5bc5ae4355418805b8caabb3"),
"transactionid" : "ch_1DON67EzCa9AoDtY51kialzs",
"adminid" : [
"5b5af339bc69c511816e8a2f",
"5b87948d97b752099c086708"
],
"amount" : 3220,
"ispaid" : true,
"products" : [
{
"productid" : ObjectId("5bceba35003c87043997a1d4"),
"quantity" : 2,
"price" : 200,
"type" : "product",
"isCanceled" : false,
"isDeliverd" : false,
"createdby" : "schooladmin",
"userid" : ObjectId("5b87948d97b752099c086708"),
"isReadyToPickup" : false,
"name" : "The Second Product",
"description" : " "
},
{
"productid" : ObjectId("5bc5b2df55418805b8caabbd"),
"quantity" : 2,
"price" : 100,
"type" : "product",
"isCanceled" : false,
"isDeliverd" : false,
"createdby" : "superadmin",
"userid" : ObjectId("5b5af339bc69c511816e8a2f")
},
{
"productid" : ObjectId("5bc5bc5fe84c3d028aaa269c"),
"quantity" : 2,
"price" : 100,
"type" : "product",
"isCanceled" : false,
"isDeliverd" : false,
"createdby" : "superadmin",
"userid" : ObjectId("5b5af339bc69c511816e8a2f")
}
],
"paymentUsing" : "card",
"cardBrand" : "Visa",
"country" : "US",
"paymentDate" : "2018-10-23T10:26:51.856Z"
}
I want to perform search on products object's all element. And if any of them match, that entire object will be store in array variable.
Suppose, I try to search The Second Product string from name object
of products array. then it would be give me the all element. like
[
{
"productid" : ObjectId("5bceba35003c87043997a1d4"),
"quantity" : 2,
"price" : 200,
"type" : "product",
"isCanceled" : false,
"isDeliverd" : false,
"createdby" : "schooladmin",
"userid" : ObjectId("5b87948d97b752099c086708"),
"isReadyToPickup" : false,
"name" : "The Second Product",
"description" : " "
}
]
And if two or more elements founds then it will return it array accordingly.
Please suggest me the solution.
Thank You in Advance. :)
Something like this:
db.collection.find({products: {$elemMatch: {name:'The Second Product'}}})
Using aggregation pipeline we can get the desired result.
Approach 1:
A simpler approach if we don't have nested arrays
$filter is used within $project to get the desired result
db.collection_name.aggregate([
{
$project: {
products: {
$filter: {
input: "$products",
as: "product",
cond: {
$eq: [
"$$product.name",
"The Second Product"
]
}
}
}
}
}
])
Approach 2:
$unwind to unwind the products array
$match to retrieve the matching documents
$project to project only the required output elements
db.collection_name.aggregate([
{ $unwind: "$products" },
{ $match:{"products.name":"The Second Product"} },
{ $project:{products:1}}
])
Output of the above queries(Approach 1 or Approach 2)
{
"_id" : ObjectId("5bcef76b0c9a4c194cf6d0a7"),
"products" : {
"productid" : ObjectId("5bceba35003c87043997a1d4"),
"quantity" : 2,
"price" : 200,
"type" : "product",
"isCanceled" : false,
"isDeliverd" : false,
"createdby" : "schooladmin",
"userid" : ObjectId("5b87948d97b752099c086708"),
"isReadyToPickup" : false,
"name" : "The Second Product",
"description" : " "
}
}

How do I access an element in a nested schema?

{
"_id" : ObjectId("5b8d1ecbb745685c31ad8603"),
"name" : "abc",
"email" : "abc#gmail.com",
"projectDetails" : [
{
"technologies" : [
"abc",
"abc"
],
"_id" : ObjectId("5b8d1ecbb745685c31ad8604"),
"projectName" : "abc",
"projectDescription" : "abc",
"manager" : "abc",
"mentor" : "abc"
}
],
"__v" : 0
}
Here, projectDetails is an array of objects. I want to update the element "projectName" in projectDetails. How do I write a PUT request for the same in Postman?
Try below query
db.users.update({ "email" : "abc#gmail.com","projectDetails._id":ObjectId("5b8d1ecbb745685c31ad8604")},{ $set: { "projectDetails.$.projectName" : "test" } })
Your url should be like this
http://localhost:3000/project/5b8d1ecbb745685c31ad8604
Your put request as mentioned below
router.route("/updateProject",function(req,res){
var id = req.query.project_id; // Check syntax for framework you are using
});

How to push new field in array mongodb

Imagine I have database like
{
"_id" : ObjectId("594994d1ab6c161168aa5969"),
"user_id" : ObjectId("594994d1ab6c161168aa5968"),
"active" : [
{
"type" : "active"
},
{
"type" : "inactive"
}
]
}
I want add to first object in active array . Here is result I expected
{
"_id" : ObjectId("594994d1ab6c161168aa5969"),
"user_id" : ObjectId("594994d1ab6c161168aa5968"),
"active" : [
{
"type" : "active",
"note": [{
content: 'here is content',
title : ' here is title'
}]
},
{
"type" : "inactive"
}
]
}
Here is code I tried
db.collection('..').update({'user_id' : ObjectId(userId)} ,
{$push:{ "active.0": "note": [{
content: 'here is content',
title : ' here is title'
}] } )
But I get The field 'active.0' must be an array but is of type object in document . Where is my wrong ? Please help
Starting with your document like this:
{
"_id" : ObjectId("594994d1ab6c161168aa5969"),
"user_id" : ObjectId("594994d1ab6c161168aa5968"),
"active" : [
{
"type" : "active"
},
{
"type" : "inactive"
}
]
}
You run the $push like this:
db.collection.update(
{ "_id" : ObjectId("594994d1ab6c161168aa5969") },
{ "$push":{ "active.0.note": { content: 'here is content', title : ' here is title' } } }
)
Which creates the new array within the first element like so:
{
"_id" : ObjectId("594994d1ab6c161168aa5969"),
"user_id" : ObjectId("594994d1ab6c161168aa5968"),
"active" : [
{
"type" : "active",
"note" : [
{
"content" : "here is content",
"title" : " here is title"
}
]
},
{
"type" : "inactive"
}
]
}
Everything here is cut and paste from my shell.

How to find how to find subdocuments within an array where array in each subdocument contains certain keyword?

I am creating twitter clone using mongodb and want to search tweets which have certain hashtag in them. I came up with following document structure for my database -
{
"_id" : ObjectId("56f88c038c297eb4048e5dff"),
"twitterHandle" : "abhayp",
"firstName" : "Abhay",
"lastName" : "Pendawale",
"emailID" : "abhayp#gmail.com",
"password" : "278a2c36eeebde495853b14e6e5525fd12074229",
"phoneNumber" : "12394872398",
"location" : "San Jose",
"birthYear" : 1992,
"birthMonth" : 0,
"birthDay" : 1,
"followers" : [
"abhayp",
"deepakp",
"john",
"madhuras",
"nupurs",
"vgalgali"
],
"following" : [
"abhayp",
"abhinavk",
"ankitac",
"anupd",
"arpits"
],
"tweets" : [
{
"tweet_id" : "3f0fe01f8231356f784d07111efdf9d8ead28133",
"tweet_text" : "This new twitter sounds good!",
"created_on" : ISODate("2016-03-13T01:47:37Z"),
"firstName" : "Abhay",
"lastName" : "Pendawale",
"twitterHandle" : "abhayp",
"tags" : [ ]
},
{
"tweet_id" : "4e57b6d7d6b47d69054f0be55c238e8038751d84",
"tweet_text" : "#CMPE273 Node.js is #awesome",
"created_on" : ISODate("2016-03-07T23:16:39Z"),
"firstName" : "Abhay",
"lastName" : "Pendawale",
"twitterHandle" : "abhayp",
"tags" : [
"awesome",
"CMPE273"
]
},
{
"tweet_id" : "e5facd5f37c44313d5be02ffe0a3ca7190affd6b",
"tweet_text" : "Getting an incredible welcome in #Pune #travel #adventure #film #india ",
"created_on" : ISODate("2016-03-07T23:37:27Z"),
"firstName" : "Abhay",
"lastName" : "Pendawale",
"twitterHandle" : "abhayp",
"tags" : [
"adventure",
"film",
"india",
"Pune",
"travel"
]
},
{
"tweet_id" : "f5a735c1f747732f3e04f6cb2c092ff44750c0fd",
"tweet_text" : "The D Day today!\n#TheDDay",
"created_on" : ISODate("2016-03-18T22:24:57Z"),
"firstName" : "Abhay",
"lastName" : "Pendawale",
"twitterHandle" : "abhayp",
"tags" : [ ]
}
]}
I want to find out the tweets which have "Pune" in the tags array of the tweet. I tried following command
db.users.find({ "tweets.tags" : {$all: ["Pune"] }}, { tweets : 1 }).pretty();
but this command returns me ALL tweets of users who have 'Pune' entry in tags of one of their tweets. How can search only those tweets which have 'Pune' entry in their tags array?
Note: I don't want users who have tweeted #Pune, rather I want all tweets which contain #Pune. The duplicate marked question does not solve this problem.
Running following query -
db.users.aggregate([{
$match: {
'tweets.tags': 'Pune'
}
}, {
$project: {
tweets: {
$filter: {
input: '$tweets',
as: 'tweet',
cond: {
$eq: ['$$tweet.tags', 'Pune']
}
}
}
}
}]);
returns following results -
{ "_id" : ObjectId("56f88c038c297eb4048e5df1"), "tweets" : [ ] }
{ "_id" : ObjectId("56f88c038c297eb4048e5dff"), "tweets" : [ ] }
{ "_id" : ObjectId("56f88c038c297eb4048e5e07"), "tweets" : [ ] }
Which is certainly not what I want! :(
I don't think that's possible, beside using some convoluted aggregation stages.
The projection operator is close but only returns the first match.
My advice would be to put the tweets in a separate collection, that would be much more convenient to browse them, and probably more scalable. You're already duplicating the relevant user infos in them, so you don't have to change their content.

Update array object based on id?

I'm having a bit of a mongo issue. I was wondering if there was a way to do the following in a mongo console command rather then multiple find and update calls.
{
"_id" : ObjectId("50b429ba0e27b508d854483e"),
"array" : [
{
"id" : "1",
"letter" : "a"
},
{
"id" : "2",
"letter" : "b"
}
],
"tester" : "tom"
}
I want to update the object with this new array item
{
"id": "2",
"letter": "c"
}
I used this, addToSet is limited, it won't insert an item into the array if it is already there but it will not update an item based on an identifier. In this case I would really like to update this entry based on the id.
db.soup.update({
"tester": "tom"
}, {
$addToSet: {
"array": {
"id": "2",
"letter": "c"
}
}
});
This gives me:
{
"_id" : ObjectId("50b429ba0e27b508d854483e"),
"array" : [
{
"id" : "1",
"letter" : "a"
},
{
"id" : "2",
"letter" : "b"
},
{
"id" : "2",
"letter" : "c"
}
],
"tester" : "tom"
}
When what I really wanted was:
{
"_id" : ObjectId("50b429ba0e27b508d854483e"),
"array" : [
{
"id" : "1",
"letter" : "a"
},
{
"id" : "2",
"letter" : "c"
}
],
"tester" : "tom"
}
You can use the $ positional operator to do this:
db.soup.update(
{_id: ObjectId("50b429ba0e27b508d854483e"), 'array.id': '2'},
{$set: {'array.$.letter': 'c'}})
The $ in the update object acts as a placeholder for the first element of array to match the query selector.
Here you go:
> db.collection.insert( { array : [ { id : 1, letter : 'a' }, { id : 2, letter : 'b' } ], tester : 'tom' } );
> db.collection.findOne();
{
"_id" : ObjectId("50b431a69a0358d590a2f5f0"),
"array" : [
{
"id" : 1,
"letter" : "a"
},
{
"id" : 2,
"letter" : "b"
}
],
"tester" : "tom"
}
> db.collection.update( { tester : 'tom' }, { $set : { 'array.1' : { id : 2, letter : 'c' } } }, false, true );
> db.collection.findOne();
{
"_id" : ObjectId("50b431a69a0358d590a2f5f0"),
"array" : [
{
"id" : 1,
"letter" : "a"
},
{
"id" : 2,
"letter" : "c"
}
],
"tester" : "tom"
}
The trick lies in the false, true, false.
That is: true for upsert, false for update multiple.
For more details check out:
http://www.mongodb.org/display/DOCS/Updating#Updating-update%28%29

Resources