MongoDB group by Functionalities

后端 未结 3 1028
不思量自难忘°
不思量自难忘° 2021-01-31 07:00

In MySQL

select a,b,count(1) as cnt from list group by a, b having cnt > 2;

I have to execute the group by function using h

相关标签:
3条回答
  • 2021-01-31 07:16

    Depends on the number of your groups, you might find a simpler and faster solution than group or MapReduce by using distinct:

    var res = [];
    for( var cur_a = db.list.distinct('a'); cur_a.hasNext(); ) {
      var a = cur_a.next();
      for( var cur_b = db.list.distinct('b'); cur_b.hasNext(); ) {
        var b = cur_b.next();
        var cnt = db.list.count({'a':a,'b':b})
        if (cnt > 2)
          res.push({ 'a': a, 'b' : b 'cnt': cnt}
      }
    } 
    

    It will be faster if you have indexes on a and b

    db.list.ensureIndex({'a':1,'b':1})
    
    0 讨论(0)
  • 2021-01-31 07:23

    MongoDB group by is very limited in most cases, for instance

    - the result set must be lesser than 10000 keys.
    - it will not work in sharded environments
    

    So its better to use map reduce. so the query would be like this

    map = function() { emit({a:true,b:true},{count:1}); }

    reduce = function(k, values) {
        var result = {count: 0};
        values.forEach(function(value) {
            result.count += value.count;
        });
        return result;
    }
    

    and then

    db.list.mapReduce(map,reduce,{out: { inline : 1}})
    

    Its a untested version. let me know if it works

    EDIT:

    The earlier map function was faulty. Thats why you are not getting the results. it should have been

    map = function () {
        emit({a:this.a, b:this.b}, {count:1});
    }
    

    Test data:

    > db.multi_group.insert({a:1,b:2})
    > db.multi_group.insert({a:2,b:2})
    > db.multi_group.insert({a:3,b:2})
    > db.multi_group.insert({a:1,b:2})
    > db.multi_group.insert({a:3,b:2})
    > db.multi_group.insert({a:7,b:2})
    
    
    > db.multi_group.mapReduce(map,reduce,{out: { inline : 1}})
    {
        "results" : [
            {
                "_id" : {
                    "a" : 1,
                    "b" : 2
                },
                "value" : {
                    "count" : 2
                }
            },
            {
                "_id" : {
                    "a" : 2,
                    "b" : 2
                },
                "value" : {
                    "count" : 1
                }
            },
            {
                "_id" : {
                    "a" : 3,
                    "b" : 2
                },
                "value" : {
                    "count" : 2
                }
            },
            {
                "_id" : {
                    "a" : 7,
                    "b" : 2
                },
                "value" : {
                    "count" : 1
                }
            }
        ],
        "timeMillis" : 1,
        "counts" : {
            "input" : 6,
            "emit" : 6,
            "reduce" : 2,
            "output" : 4
        },
        "ok" : 1,
    }
    

    EDIT2:

    Complete solution including applying having count >= 2

    map = function () {
        emit({a:this.a, b:this.b}, {count:1,_id:this._id});
    }
    
    reduce = function(k, values) {
        var result = {count: 0,_id:[]};
        values.forEach(function(value) {
            result.count += value.count;
            result._id.push(value._id);
        });
        return result;
    }
    
    >db.multi_group.mapReduce(map,reduce,{out: { replace : "multi_result"}})
    
    > db.multi_result.find({'value.count' : {$gte : 2}})
    { "_id" : { "a" : 1, "b" : 2 }, "value" : { "_id" : [   ObjectId("4f0adf2884025491024f994c"),   ObjectId("4f0adf3284025491024f994f") ], "count" : 2 } }
    { "_id" : { "a" : 3, "b" : 2 }, "value" : { "_id" : [   ObjectId("4f0adf3084025491024f994e"),   ObjectId("4f0adf3584025491024f9950") ], "count" : 2 } }
    
    0 讨论(0)
  • 2021-01-31 07:23

    You should use MapReduce instead. Group has its limitations.

    In future you'll be able to use the Aggregation Framework. But for now, use map/reduce.

    0 讨论(0)
提交回复
热议问题