Map/reduce in CouchDB take a field if id contains a subString - couchdb

I'll explain: I have this function
function (doc) {
if(doc.MovieId == "1721")
emit(doc.Rating, 1);
}
but it return me some document that are not relevant (for example they haven't the Rating field). My document _id is composed of partitionName:id, so I thought to do if(doc.MovieId == "1721" && doc._id.contains("ratings"){...} but it doesn't work.
Is there a way to do this?
-----EDIT 1-----
The docs in the circle are not relevant.
Do you need the schema of the JSON document?
-----EDIT 2-----
the following documents are NOT RELEVANT
1.
{
"_id": "movies : 1721",
"_rev": "1-d7e0e3c8152d6978073d280e0aef7457",
"MovieId": "1721",
"Title": "Titanic (1997)",
"Genres": [
"Drama",
"Romance"
]
}
2.
{
"_id": "tags : 1490",
"_rev": "1-14c20c9cfb3ee1964a298777f80333d5",
"MovieId": "1721",
"UserId": "474",
"Tag": "shipwreck",
"Timestamp": "1138031879"
}
3.
{
"_id": "tags : 2791",
"_rev": "1-e4d6c9573fcdae726a69d5fc6255de27",
"MovieId": "1721",
"UserId": "537",
"Tag": "romance",
"Timestamp": "1424141922"
}
documets like this are RELEVANT:
{
"_id": "ratings : 31662",
"_rev": "1-446665286337faaf51e23e40b527ec2d",
"MovieId": "1721",
"UserId": "219",
"Rating": "0.5",
"Timestamp": "1214043346"
}

Following view should just emit documents whose _id starts with "ratings :":
function (doc) {
var id_prefix = "ratings :";
if(doc._id.substr(0, id_prefix.length) === id_prefix && doc.MovieId == "1721")
emit(doc.Rating, 1);
}

Related

Elasticsearch - Query based on text length

I'm using the official Elasticsearch NodeJS client library, to query the following index structure:
{
"_index": "articles",
"_type": "context",
"_id": "1",
"_version": 1,
"found": true,
"_source": {
"article": "this is a paragraph",
"topic": "topic A"
}
}
{
"_index": "articles",
"_type": "context",
"_id": "2",
"_version": 1,
"found": true,
"_source": {
"article": "this is a paragraph this is a paragraph this is a paragraph",
"topic": "topic B"
}
}
I would like to query my index using the term "this is a paragraph" and boost the result with the most similar text length, IE: document _id:1
Can I do this without re-indexing and adding a field to my index (as described here)?
The below query uses Groovy to look at the length of the actual text indexed into ES (using _source.article.length()) and at the length of the text to be searched. As a very simple basic query, I used match_phrase and then rescored the documents based on how long the text to search is compared to how long the original text is.
GET /articles/context/_search
{
"query": {
"function_score": {
"query": {
"match_phrase": {
"article": "this is a paragraph"
}
},
"functions": [
{
"script_score": {
"script": {
"inline": "text_to_search_length=text_to_search.length(); compared_length=_source.article.length();return (compared_length-text_to_search_length).abs()",
"params": {
"text_to_search": "this is a paragraph"
}
}
}
}
]
}
},
"sort": [
{
"_score": {
"order": "asc"
}
}
]
}

Cloudant/CouchDB query by content of nested array

In my cloudant database I have objects like these. I'd like to query for objects that based on properties in a nested array.
In the example below, how to I query for all objects where there is a vote with userId=="user1"? The query should return both objects. When I search for userId "user2" it should return the first one as the second object only has vote from user1 and user4.
{
"_id": "1",
"votes": [
{
"userId": "user1",
"comment": ""
},
{
"userId": "user2",
"comment": ""
},
{
"userId": "user3",
"comment": ""
}
]
}
{
"_id": "2",
"votes": [
{
"userId": "user1",
"comment": ""
},
{
"userId": "user4",
"comment": ""
}
]
}
This view will return a list of all votes (where a vote is userId and _id) sorted by userId, to only get user1 use ?key="user1"
function(doc) {
for(i in doc.votes)
emit(doc.votes[i].userId, doc._id);
}
/dbName/_design/foo/_view/bar?key="user1"
{"total_rows":5,"offset":0,"rows":[
{"id":"1","key":"user1","value":"1"},
{"id":"2","key":"user1","value":"2"}
]}

How to return a summary of distinct values held within CouchDB

Given a collection of objects held within CouchDB that look like this:
{
"_id": "-5739365454",
"_rev": "1-d55b2651b214113bab0b4c18a401eaef",
"userId": "mike",
"remoteAddr": "77.102.112.205"
}
{
"_id": "-573936626",
"_rev": "1-778bbf1c0e5cfe8bbd83dbe618e8b549",
"userId": "mike",
"remoteAddr": "77.102.112.205"
}
...
{
"_id": "-573954545",
"_rev": "1-4350856f40c091156f8bb984a5559ebd",
"userId": "jeff",
"remoteAddr": "77.102.112.205"
}
I wish to create an output view that give me a set of results that looks something like the following (pseudo output - not syntactically valid):
userId count
mike 2
jeff 1
...
So, one 'row'/element would exist in the output for each distinct userId in the DB, along with the number of times that userId exists in the DB.
So far I have the following:
{
"_id": "_design/myObj",
"_rev": "4-cd468634f4fb6bef500753fedcb35e27",
"views": {
"by_userId": {
"map": "function(doc) { if (doc.userId) { emit (doc.userId, doc._id)} }",
"reduce": "_stats"
}
},
"language": "javascript"
}
... but this is not working as required. Any assistance gratefully received!
You could try use _count built-in function. For instance:
`
{
"_id": "_design/myObj",
"_rev": "4-cd468634f4fb6bef500753fedcb35e27",
"views": {
"by_userId": {
"map": "function(doc) { if (doc.userId) { emit (doc.userId, null)} }",
"reduce": "_count"
}
},
"language": "javascript"
}`

Elasticsearch (n) documents per property value

I have an Elasticsearch instance with about 3.5Million records and I want a fast way to return (n) records per property value in any given search.
Example
Document 1:
{
"id": 1,
"gender": "male",
"name": "Joe"
}
Document 2:
{
"id": 2,
"gender": "male",
"name": "John"
}
Document 3:
{
"id": 3,
"gender": "female",
"name": "Jill"
}
Document 4:
{
"id": 4,
"gender": "female",
"name": "Joan"
}
Assuming a match_all search
I only want to return 1 document for each value of the gender property:
For instance, return only doc 1 and doc 3
This would obviously be spread across a much larger result set, but the result should still scale to (n) docs per unique property value.
Any help with this is much appreciated.
E
Use an aggregation for "gender" field together with a "top_hits" aggregation to return n hits per each "gender" value:
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"should": [{}]
}
}
}
},
"aggs": {
"by_gender": {
"terms": {
"field": "gender"
},
"aggs": {
"first_hit": {
"top_hits": {"size":1}
}
}
}
}
}

CouchDB's Linked Documents in a View

I'm having a hard time getting my head around CouchDB's linked documents feature.
I have two types of data being stored in a single CouchDB database:
{
"id":"1",
"type": "track",
"title": "Bohemian Rhapsody"
}
{
"id":"2",
"type": "artist",
"name": "Queen",
"tracks": ["1"]
}
I'm under the impression that I can write a view like the one below and get the following documents emited:
{
"id":"2",
"type": "artist",
"name": "Queen",
"tracks": [
{
"id":"1",
"type": "track",
"title": "Bohemian Rhapsody"
}
]
}
I've been trying this view, but it's not working the way I'm expecting:
function(doc) {
if(doc.type == 'artist') {
var tracks = [];
for(var i = 0; i < doc.tracks.length; i++) {
tracks.push({_id:doc.tracks[i]});
}
newdoc = eval(uneval(doc));
newdoc.tracks = tracks;
emit(doc._id,newdoc);
}
}
example here: http://jphastings.iriscouch.com/_utils/database.html?music/_design/test/_view/linked
This isn't returning what I'd hope - do you have any suggestions? Thanks
Okay I finally understand what you are trying to do.Yes this is possible.Here is how.
You have 2 documents
{
"_id":"anyvalue",
"type": "track",
"title": "Bohemian Rhapsody"
}
{
"_id":"2",
"type": "artist",
"name": "Queen",
"tracks": ["anyvalue"]
}
What you were doing wrong was not having quotes around the value of tracks(the item in the array).
2)The reference id must be _id for this to work.The difference is worth noting since you can have id field but only _id are used to identify documents.
For the result you want this view would suffice
function(doc) {
if (doc.type === 'artist') {
for (var i in doc.tracks) {
var id = doc.tracks[i];
emit(id, { _id: id });
}
}
}
What you want to be doing is use an emit function inside the for loop to emit the id field of the 'track' of every artist.
Then you want to query couch db view with the include_docs=true parameter.Here is the final result for the database that you created on iris couch.
http://jphastings.iriscouch.com/music/_design/test/_view/nested?reduce=false&include_docs=true
{
"total_rows": 3,
"offset": 0,
"rows": [
{
"id": "0b86008d8490abf0b7e4f15f0c6a50a7",
"key": "0b86008d8490abf0b7e4f15f0c6a463b",
"value": {
"_id": "0b86008d8490abf0b7e4f15f0c6a463b"
},
"doc": {
"_id": "0b86008d8490abf0b7e4f15f0c6a463b",
"_rev": "3-7e4ba3bfedd29a07898125c09dd7262e",
"type": "track",
"title": "Boheniam Rhapsody"
}
},
{
"id": "0b86008d8490abf0b7e4f15f0c6a50a7",
"key": "0b86008d8490abf0b7e4f15f0c6a5ae2",
"value": {
"_id": "0b86008d8490abf0b7e4f15f0c6a5ae2"
},
"doc": {
"_id": "0b86008d8490abf0b7e4f15f0c6a5ae2",
"_rev": "2-b3989dd37ef4d8ed58516835900b549e",
"type": "track",
"title": "Another one bites the dust"
}
},
{
"id": "0b86008d8490abf0b7e4f15f0c6a695e",
"key": "0b86008d8490abf0b7e4f15f0c6a6353",
"value": {
"_id": "0b86008d8490abf0b7e4f15f0c6a6353"
},
"doc": {
"_id": "0b86008d8490abf0b7e4f15f0c6a6353",
"_rev": "2-0383f18c198b813943615d2bf59c212a",
"type": "track",
"title": "Stripper Vicar"
}
}
]
}
Jason explains it wonderfully in this post
Best way to do one-to-many "JOIN" in CouchDB
this link is also helpful for entity relationships in couch db
http://wiki.apache.org/couchdb/EntityRelationship

Resources