Map/Reduce differences between Couchbase & CloudAnt - couchdb

I've been playing around with Couchbase Server and now just tried replicating my local db to Cloudant, but am getting conflicting results for my map/reduce function pair to build a set of unique tags with their associated projects...
// map.js
function(doc) {
if (doc.tags) {
for(var t in doc.tags) {
emit(doc.tags[t], doc._id);
}
}
}
// reduce.js
function(key,values,rereduce) {
if (!rereduce) {
var res=[];
for(var v in values) {
res.push(values[v]);
}
return res;
} else {
return values.length;
}
}
In Cloudbase server this returns JSON like:
{"rows":[
{"key":"3d","value":["project1","project3","project8","project10"]},
{"key":"agents","value":["project2"]},
{"key":"fabrication","value":["project3","project5"]}
]}
That's exactly what I wanted & expected. However, the same query on the Cloudant replica, returns this:
{"rows":[
{"key":"3d","value":4},
{"key":"agents","value":1},
{"key":"fabrication","value":2}
]}
So it somehow only returns the length of the value array... Highly confusing & am grateful for any insights by some M&R ninjas... ;)

It looks like this is exactly the behavior you would expect given your reduce function. The key part is this:
else {
return values.length;
}
In Cloudant, rereduce is always called (since the reduce needs to span over multiple shards.) In this case, rereduce calls values.length, which will only return the length of the array.

I prefer to reduce/re-reduce implicitly rather than depending on the rereduce parameter.
function(doc) { // map
if (doc.tags) {
for(var t in doc.tags) {
emit(doc.tags[t], {id:doc._id, tag:doc.tags[t]});
}
}
}
Then reduce checks whether it is accumulating document ids from the identical tag, or whether it is just counting different tags.
function(keys, vals, rereduce) {
var initial_tag = vals[0].tag;
return vals.reduce(function(state, val) {
if(initial_tag && val.tag === initial_tag) {
// Accumulate ids which produced this tag.
var ids = state.ids;
if(!ids)
ids = [ state.id ]; // Build initial list from the state's id.
return { tag: val.tag,
, ids: ids.concat([val.id])
};
} else {
var state_count = state.ids ? state.ids.length : state;
var val_count = val.ids ? val.ids.length : val;
return state_count + val_count;
}
})
}
(I didn't test this code, but you get the idea. As long as the tag value is the same, it doesn't matter whether it's a reduce or rereduce. Once different tags start reducing together, it detects that because the tag value will change. So at that point just start accumulating.
I have used this trick before, although IMO it's rarely worth it.
Also in your specific case, this is a dangerous reduce function. You are building a wide list to see all the docs that have a tag. CouchDB likes tall lists, not fat lists. If you want to see all the docs that have a tag, you could map them.
for(var a = 0; a < doc.tags.length; a++) {
emit(doc.tags[a], doc._id);
}
Now you can query /db/_design/app/_view/docs_by_tag?key="3d" and you should get
{"total_rows":287,"offset":30,"rows":[
{"id":"project1","key":"3d","value":"project1"}
{"id":"project3","key":"3d","value":"project3"}
{"id":"project8","key":"3d","value":"project8"}
{"id":"project10","key":"3d","value":"project10"}
]}

Related

How do I filter keys from JSON in Node.js?

I'm trying to select certain keys from an JSON array, and filter the rest.
var json = JSON.stringify(body);
which is:
{
"FirstName":"foo",
"typeform_form_submits":{
"foo":true,
"bar":true,
"baz":true
},
"more keys": "foo",
"unwanted key": "foo"
}
Want I want:
{
"FirstName":"foo",
"typeform_form_submits":{
"foo":true,
"bar":true,
"baz":true
}
}
I've checked out How to filter JSON data in node.js?, but I'm looking to do this without any packages.
Now you can use Object.fromEntries like so:
Object.fromEntries(Object.entries(raw).filter(([key]) => wantedKeys.includes(key)))
You need to filter your obj before passing it to json stringify:
const rawJson = {
"FirstName":"foo",
"typeform_form_submits":{
"foo":true,
"bar":true,
"baz":true
},
"more keys": "foo",
"unwanted key": "foo"
};
// This array will serve as a whitelist to select keys you want to keep in rawJson
const filterArray = [
"FirstName",
"typeform_form_submits",
];
// this function filters source keys (one level deep) according to whitelist
function filterObj(source, whiteList) {
const res = {};
// iterate over each keys of source
Object.keys(source).forEach((key) => {
// if whiteList contains the current key, add this key to res
if (whiteList.indexOf(key) !== -1) {
res[key] = source[key];
}
});
return res;
}
// outputs the desired result
console.log(JSON.stringify(filterObj(rawJson, filterArray)));
var raw = {
"FirstName":"foo",
"typeform_form_submits":{
"foo":true,
"bar":true,
"baz":true
},
"more keys": "foo",
"unwanted key": "foo"
}
var wantedKeys =["FirstName","typeform_form_submits" ]
var opObj = {}
Object.keys(raw).forEach( key => {
if(wantedKeys.includes(key)){
opObj[key] = raw[key]
}
})
console.log(JSON.stringify(opObj))
I know this question was asked aways back, but I wanted to just toss out there, since nobody else did:
If you're bound and determined to do this with stringify, one of its less-well-known capabilities involves replacer, it's second parameter. For example:
// Creating a demo data set
let dataToReduce = {a:1, b:2, c:3, d:4, e:5};
console.log('Demo data:', dataToReduce);
// Providing an array to reduce the results down to only those specified.
let reducedData = JSON.stringify(dataToReduce, ['a','c','e']);
console.log('Using [reducer] as an array of IDs:', reducedData);
// Running a function against the key/value pairs to reduce the results down to those desired.
let processedData = JSON.stringify(dataToReduce, (key, value) => (value%2 === 0) ? undefined: value);
console.log('Using [reducer] as an operation on the values:', processedData);
// And, of course, restoring them back to their original object format:
console.log('Restoration of the results:', '\nreducedData:', JSON.parse(reducedData), '\nprocessedData:', JSON.parse(processedData));
In the above code snippet, the key value pairs are filtered using stringify exclusively:
In the first case, by providing an array of strings, representing the keys you wish to preserve (as you were requesting)
In the second, by running a function against the values, and dynamically determining those to keep (which you didn't request, but is part of the same property, and may help someone else)
In the third, their respective conversions back to JSON (using .parse()).
Now, I want to stress that I'm not advocating this as the appropriate method to reduce an object (though it will make a clean SHALLOW copy of said object, and is actually surprisingly performant), if only from an obscurity/readability standpoint, but it IS a totally-effective (and mainstream; that is: it's built into the language, not a hack) option/tool to add to the arsenal.

Adding functions in search index of loudant

I have a Json document in cloudant as:
{
"_id": "3-f812228f45b5f4e4962505561953ew245",
"_rev": "3-f812228f45b5f4e496250556195372b2",
"wiki_page": "http://en.wikipedia.org/wiki/African_lion",
"name": "african lion",
"class": "mammal",
"diet": "herbivore"
}
I want to make a search index that can search this document when I input queries as "afrian lion" or "lion african",...
I make a function that can return all cases of permutation in "doc.name" for indexing (This function works well and it also had been checked in pure JS environment). However, it does't work in cloudant, the output return null when i input a query.
This is a code that I made in search index:
function(doc){
var list = [];
function permute(ss, used, res, level, list){
if(level==ss.length&&res!==""){
list.push(res);
return;
}
for(var i=0; i<ss.length; i++){
console.log("loops");
if (used[i]===true){
continue;
}
if(level>=0){
if (res!="" && list.indexOf(res)<0){
list.push(res.trim());
}
used[i]=true;
permute(ss, used, res+" "+ss[i], level+1, list)
used[i]=false;
}
}
}
function permuteword(s){
var ss=s.split(" ");
var used = [];
var res = "";
list = [];
permute(ss, used, res, 0, list);
console.log(list);
}
var contentIndex=[];
contentIndex=permuteword("african lion");
for(var i=0; i<contentIndex.length; i++){
index("default", contentIndex[i]);
}
}
How can i solve the problem?
Update
Your update looks good, but there is still one issue: you are not returning the list from the permuteword function. I believe you also need to remove calls to console.log. Once I did these two things I was able to get it to work with Cloudant using the following search queries (I also changed your hard-coded call with "african lion" back to doc.name):
default:"african"
default:"african lion"
default:"lion"
default:"lion african"
Here is the final script:
function(doc){
var list = [];
function permute(ss, used, res, level, list){
if(level==ss.length&&res!==""){
list.push(res);
return;
}
for(var i=0; i<ss.length; i++){
if (used[i]===true){
continue;
}
if(level>=0){
if (res!="" && list.indexOf(res)<0){
list.push(res.trim());
}
used[i]=true;
permute(ss, used, res+" "+ss[i], level+1, list)
used[i]=false;
}
}
}
function permuteword(s){
var ss=s.split(" ");
var used = [];
var res = "";
list = [];
permute(ss, used, res, 0, list);
return list;
}
if (doc.name) {
var contentIndex=permuteword(doc.name);
for(var i=0; i<contentIndex.length; i++){
index("default", contentIndex[i]);
}
}
}
Updated JSFiddle:
https://jsfiddle.net/14e7L3gw/1/
Original Answer
I believe there are issues with your Javascript. The permuteword function is not returning any results. See this JSFiddle:
https://jsfiddle.net/14e7L3gw/
Note: I added some logging and commented out the call to index. Run with your browser debugger to see the output.
Here is what is happening:
The first call to permuteword calls permute(["african","lion"], [], "", 0, []);
The first if in permuteword fails because level (0) != ss.length() (2) and res == "".
Then the function loops through ss, but never does anything because level = 0.
Ultimately permuteword returns an empty array, so nothing gets indexed.

Mongodb $where query always true with nodejs

When I query my database with a function passed in the "$where" clause in nodejs, it always return me all documents in the db.
For example, if I do
var stream = timetables.find({$where: function() { return false; }}).stream();
it return me all the documents.
Instead, if I do
var stream = timetables.find({$where: 'function() { return false; }'}).stream();
the function is really executed, and this code doesn't return any document.
The problem is that if I convert in string my function the context's bindinds are removed, and I need them for more complex query. For example:
var n = 1;
var f = function() { return this.number == n; }
var stream = timetables.find({$where: f.toString()}).stream();
// error: n is not defined
Is this a normal behaviour? How can I solve my problem?
Please excuse me for my poor english!
First off, keep in mind that the $where operator should almost never be used for the reasons explained here (credit goes to #WiredPrairie).
Back to your issue, the approach you'd like to take won't work even in the mongodb shell (which explicitly allows naked js functions with the $where operator). The javascript code provided to the $where operator is executed on the mongo server and won't have access to the enclosing environment (the "context bindings").
> db.test.insert({a: 42})
> db.test.find({a: 42})
{ "_id" : ObjectId("5150433c73f604984a7dff91"), "a" : 42 }
> db.test.find({$where: function() { return this.a == 42 }}) // works
{ "_id" : ObjectId("5150433c73f604984a7dff91"), "a" : 42 }
> var local_var = 42
> db.test.find({$where: function() { return this.a == local_var }})
error: {
"$err" : "error on invocation of $where function:\nJS Error: ReferenceError: local_var is not defined nofile_b:1",
"code" : 10071
}
Moreover it looks like that the node.js native mongo driver behaves differently from the shell in that it doesn't automatically serialize a js function you provide in the query object and instead it likely drops the clause altogether. This will leave you with the equivalent of timetables.find({}) which will return all the documents in the collection.
This one is works for me , Just try to store a query as a string in one variable then concat your variable in query string,
var local_var = 42
var query = "{$where: function() { return this.a == "+local_var+"}}"
db.test.find(query)
Store your query into a varibale and use that variable at your find query. It works..... :D
The context will always be that of the mongo database, since the function is executed there. There is no way to share the context between the two instances. You have to rethink the way you query and come up with a different strategy.
You can use a wrapper to pass basic JSON objects, ie. (pardon coffee-script):
# That's the main wrapper.
wrap = (f, args...) ->
"function() { return (#{f}).apply(this, #{JSON.stringify(args)}) }"
# Example 1
where1 = (flag) ->
#myattr == 'foo' or flag
# Example 2 with different arguments
where2 = (foo, options = {}) ->
if foo == options.bar or #_id % 2 == 0
true
else
false
db.collection('coll1').count $where: wrap(where1, true), (err, count) ->
console.log err, count
db.collection('coll1').count $where: wrap(where2, true, bar: true), (err, count) ->
console.log err, count
Your functions are going to be passed as something like:
function () {
return (function (flag) {
return this.myattr === 'foo' || flag;
}).apply(this, [true])
}
...and example 2:
function () {
return (
function (foo, options) {
if (options == null) {
options = {};
}
if (foo === options.bar || this._id % 2 === 0) {
return true;
} else {
return false;
}
}
).apply(this, [ true, { "bar": true } ])
}
This is how it is supposed to be. The drivers don't translate the client code into the mongo function javascript code.
I'm assuming you are using Mongoose to query your database.
If you take a look at the actual Query object implementation, you'll find that only strings are valid arguments for the where prototype.
When using the where clause, you should use it along with the standard operators such as gt, lt that operates on in the path created by the where function.
Remember that Mongoose querying, as in Mongo, is by example, you may want to reconsider your query specification in a more descriptive fashion.

how to find specific string in key value pair in mongodb

i am having data in mongodb like that
[
{
"name":"silvester",
"product":"laptop,iphone,mobile,phone"
},
{
"name":"john",
"product":"cycle,bus,phone,laptop"
},
{
"name":"franklin",
"product":"cycle,phone"
}
]
How to find that laptop is in product key.
if product key look like this
{
"name":"XXX",
"product":"laptop"
}
I can easily find that name by using this db.collection.find("product":"laptop");
So how to find this?
Also let me know this three website name running under using backbone.js and node.js and mongodb technology such as www.trello.com .
sorry for my worst english..
Using regex with mongodb
This worked for me
db.collection.find({"product": /laptop/})
Updated Answer
If you wish to use variables, try something like this:
var abc = "laptop";
// other stuff
userdetails.find({"product":new RegExp(abc)}).toArray(function(err,result){
if (err) console.log ("error: "+err);
else
{
// if you want the length
console.log(result.length);
// if you actually want to see the results
for (var i = 0; i < result.length; i++)
{
console.log(result[i]);
}
}
});
Updated One More Time
var abc = "laptop";
// other stuff
// note this is case sensitive. if abc = "Laptop", it will not find it
// to make it case insensitive, you'll need to edit the RegExp constructor
// to this: new RegExp("^"+abc+",|, "+abc+"(?!\w)", "i")
userdetails.find({"product":new RegExp("^"+abc+",|, "+abc+"(?!\w)")}).toArray(function(err,result){
if (err) console.log ("error: "+err);
else
{
// if you want the length
console.log(result.length);
// if you actually want to see the results
for (var i = 0; i < result.length; i++)
{
console.log(result[i]);
}
}
});
regex will work perfectly fine. there is also good news for you as monogdb will be releasing full text search index in the upcoming version
http://docs.mongodb.org/manual/release-notes/2.4/#text-indexes

Getting a list of documents with a maximum field value from CouchDB view

Let's say I have blog entries like these in my CouchDB database:
{"name":"Mary", "postdate":"20110412", "subject":"this", "message":"blah"}
{"name":"Joe", "postdate":"20110411", "subject":"that", "message":"yadda"}
{"name":"Mary", "postdate":"20110411", "subject":"and this", "message":"blah-blah"}
{"name":"Joe", "postdate":"20110410", "subject":"And other thing", "message":"yada-yada"}
{"name":"Jane", "postdate":"20110409", "subject":"Serious stuff", "message":"Not really"}
It's pretty easy to get a list of all posts. But how do I get a list of latest posts from all the users?
Like that:
{"name":"Mary", "postdate":"20110412", "subject":"this", "message":"blah"}
{"name":"Joe", "postdate":"20110411", "subject":"that", "message":"yadda"}
{"name":"Jane", "postdate":"20110409", "subject":"Serious stuff", "message":"Not really"}
Try with this map function:
function(doc) {
if (doc.postdate && doc.name) {
emit([doc.name, doc.postdate], 1);
}
}
and the following reduce function:
function(keys, values, rereduce) {
var max = 0,
ks = rereduce ? values : keys;
for (var i = 1, l = ks.length; i < l; ++i) {
if (ks[max][0][1] < ks[i][0][1]) max = i;
}
return ks[max];
}
and query it with group_level=1. It gives you the _id of the posts, then you can retrieve them all with a single query with the keys parameter or using a POST.
I am not sure if this is the best approach, but it seems to work.
UPDATE: fixed map to handle rereduce correctly.
You're going to emit the postdate as the key because keys are sorted. For example, this is what your map function will look like...
function(doc) {
if(doc.postdate) {
emit(doc.postdate, doc);
}
}
That will give you all the docs sorted ascending by postdate. If you want descending then query with ?descending=true
Cheers.

Resources