MongoDB group by Functionalities MongoDB group by Functionalities mongodb mongodb

MongoDB group by Functionalities


MongoDB group by is very limited in most cases, for instance

- the result set must be lesser than 10000 keys.- it will not work in sharded environments

So its better to use map reduce. so the query would be like this

map = function() { emit({a:true,b:true},{count:1}); }

reduce = function(k, values) {    var result = {count: 0};    values.forEach(function(value) {        result.count += value.count;    });    return result;}

and then

db.list.mapReduce(map,reduce,{out: { inline : 1}})

Its a untested version. let me know if it works

EDIT:

The earlier map function was faulty. Thats why you are not getting the results. it should have been

map = function () {    emit({a:this.a, b:this.b}, {count:1});}

Test data:

> db.multi_group.insert({a:1,b:2})> db.multi_group.insert({a:2,b:2})> db.multi_group.insert({a:3,b:2})> db.multi_group.insert({a:1,b:2})> db.multi_group.insert({a:3,b:2})> db.multi_group.insert({a:7,b:2})> db.multi_group.mapReduce(map,reduce,{out: { inline : 1}}){    "results" : [        {            "_id" : {                "a" : 1,                "b" : 2            },            "value" : {                "count" : 2            }        },        {            "_id" : {                "a" : 2,                "b" : 2            },            "value" : {                "count" : 1            }        },        {            "_id" : {                "a" : 3,                "b" : 2            },            "value" : {                "count" : 2            }        },        {            "_id" : {                "a" : 7,                "b" : 2            },            "value" : {                "count" : 1            }        }    ],    "timeMillis" : 1,    "counts" : {        "input" : 6,        "emit" : 6,        "reduce" : 2,        "output" : 4    },    "ok" : 1,}

EDIT2:

Complete solution including applying having count >= 2

map = function () {    emit({a:this.a, b:this.b}, {count:1,_id:this._id});}reduce = function(k, values) {    var result = {count: 0,_id:[]};    values.forEach(function(value) {        result.count += value.count;        result._id.push(value._id);    });    return result;}>db.multi_group.mapReduce(map,reduce,{out: { replace : "multi_result"}})> db.multi_result.find({'value.count' : {$gte : 2}}){ "_id" : { "a" : 1, "b" : 2 }, "value" : { "_id" : [   ObjectId("4f0adf2884025491024f994c"),   ObjectId("4f0adf3284025491024f994f") ], "count" : 2 } }{ "_id" : { "a" : 3, "b" : 2 }, "value" : { "_id" : [   ObjectId("4f0adf3084025491024f994e"),   ObjectId("4f0adf3584025491024f9950") ], "count" : 2 } }


You should use MapReduce instead. Group has its limitations.

In future you'll be able to use the Aggregation Framework. But for now, use map/reduce.


Depends on the number of your groups, you might find a simpler and faster solution than group or MapReduce by using distinct:

var res = [];for( var cur_a = db.list.distinct('a'); cur_a.hasNext(); ) {  var a = cur_a.next();  for( var cur_b = db.list.distinct('b'); cur_b.hasNext(); ) {    var b = cur_b.next();    var cnt = db.list.count({'a':a,'b':b})    if (cnt > 2)      res.push({ 'a': a, 'b' : b 'cnt': cnt}  }} 

It will be faster if you have indexes on a and b

db.list.ensureIndex({'a':1,'b':1})