mongodb group and subgroup counts

﹥>﹥吖頭↗ 提交于 2019-12-05 08:33:10

thanks, i think i got it

db.files.aggregate(
  {
    $group: {
      _id:   { status: "$status", mimetype: "$ingest.mimetype" },
            "mimetypes": { "$push": "$ingest.mimetype" },
      "total": { "$sum": 1 }
    }
  },
  {
    $group: {
      _id: { status: "$_id.status" },
      mimetype: { $addToSet: { mimetype: "$_id.mimetype", sum:"$total" } } 
    }
  }
);

which returns

{ "_id" : { "status" : "transcoded" }, "mimetype" : [ { "mimetype" : "audio/mpeg", "sum" : 2 } ] }
{ "_id" : { "status" : "edited" }, "mimetype" : [ { "mimetype" : "audio/flac", "sum" : 5 }, { "mimetype" : "video/mp4", "sum" : 1982 }, { "mimetype" : "audio/x-ms-wma", "sum" : 185 }, { "mimetype" : "video/ogg", "sum" : 2 }, { "mimetype" : "audio/mp3", "sum" : 151 }, { "mimetype" : "audio/mp4", "sum" : 52 }, { "sum" : 146 }, { "mimetype" : "video/x-msvideo", "sum" : 14 }, { "mimetype" : "audio/wav", "sum" : 2106 }, { "mimetype" : "video/x-ms-wma", "sum" : 1 }, { "mimetype" : "audio/ogg", "sum" : 6 }, { "mimetype" : "audio/mpeg", "sum" : 2481 }, { "mimetype" : "audio/x-m4a", "sum" : 783 }, { "mimetype" : "application/octet-stream", "sum" : 34 }, { "mimetype" : "audio/amr", "sum" : 16 }, { "mimetype" : "audio/basic", "sum" : 2 }, { "mimetype" : "audio/x-aiff", "sum" : 41 }, { "mimetype" : "video/mpeg", "sum" : 7 }, { "mimetype" : "video/x-ms-wmv", "sum" : 31 }, { "mimetype" : "audio/aac", "sum" : 1 }, { "mimetype" : "video/quicktime", "sum" : 377 }, { "mimetype" : "audio/m4a", "sum" : 1 }, { "mimetype" : "video/3gpp", "sum" : 11 }, { "mimetype" : "video/x-flv", "sum" : 2 } ] }
{ "_id" : { "status" : "transcoding error" }, "mimetype" : [ { "mimetype" : "video/mp4", "sum" : 52 }, { "mimetype" : "audio/wav", "sum" : 36 }, { "mimetype" : "audio/aac", "sum" : 1 }, { "mimetype" : "audio/mpeg", "sum" : 20 }, { "mimetype" : "application/mxf", "sum" : 4 }, { "mimetype" : "audio/mp3", "sum" : 1 }, { "mimetype" : "video/mpeg", "sum" : 3 }, { "mimetype" : "video/x-ms-wmv", "sum" : 1 }, { "mimetype" : "audio/mp4", "sum" : 2 }, { "mimetype" : "video/quicktime", "sum" : 76 }, { "mimetype" : "image/jpeg", "sum" : 1 }, { "mimetype" : "application/octet-stream", "sum" : 9 }, { "sum" : 348 }, { "mimetype" : "video/x-matroska", "sum" : 3 }, { "mimetype" : "text/html", "sum" : 7 }, { "mimetype" : "audio/x-m4a", "sum" : 9 }, { "mimetype" : "audio/aiff", "sum" : 1 } ] }
{ "_id" : { "status" : "transcoding" }, "mimetype" : [ { "mimetype" : "audio/mpeg", "sum" : 20 }, { "mimetype" : "audio/ogg", "sum" : 1 }, { "sum" : 147 }, { "mimetype" : "image/jpeg", "sum" : 1 }, { "mimetype" : "audio/x-m4a", "sum" : 15 }, { "mimetype" : "audio/wav", "sum" : 22 }, { "mimetype" : "video/mp4", "sum" : 12 }, { "mimetype" : "audio/x-ms-wma", "sum" : 2 }, { "mimetype" : "video/quicktime", "sum" : 5 }, { "mimetype" : "application/mxf", "sum" : 1 }, { "mimetype" : "audio/mp3", "sum" : 4 } ] }
{ "_id" : { "status" : "received" }, "mimetype" : [ { "mimetype" : "video/x-ms-wmv", "sum" : 1 }, { "sum" : 16 }, { "mimetype" : "audio/wav", "sum" : 160 }, { "mimetype" : "video/3gpp", "sum" : 1 }, { "mimetype" : "audio/mp4", "sum" : 3 }, { "mimetype" : "video/quicktime", "sum" : 24 }, { "mimetype" : "video/mp4", "sum" : 2929 }, { "mimetype" : "audio/x-m4a", "sum" : 48 }, { "mimetype" : "audio/x-aiff", "sum" : 6 }, { "mimetype" : "audio/ogg", "sum" : 2 }, { "mimetype" : "audio/mp3", "sum" : 4 }, { "mimetype" : "audio/mpeg", "sum" : 199 }, { "mimetype" : "audio/flac", "sum" : 2 }, { "mimetype" : "audio/x-ms-wma", "sum" : 7 } ] }
{ "_id" : { "status" : "blocked" }, "mimetype" : [ { "mimetype" : "audio/wav", "sum" : 92 }, { "mimetype" : "audio/x-wav", "sum" : 20 }, { "mimetype" : "audio/mp4", "sum" : 3 }, { "mimetype" : "video/mp4", "sum" : 63 }, { "mimetype" : "audio/x-m4a", "sum" : 50 }, { "mimetype" : "application/octet-stream", "sum" : 1 }, { "mimetype" : "audio/mp3", "sum" : 40 }, { "mimetype" : "video/x-ms-wmv", "sum" : 1 }, { "mimetype" : "video/quicktime", "sum" : 8 }, { "mimetype" : "video/mpeg", "sum" : 2 }, { "sum" : 50 }, { "mimetype" : "audio/mpeg", "sum" : 163 }, { "mimetype" : "audio/basic", "sum" : 2 }, { "mimetype" : "audio/x-ms-wma", "sum" : 14 }, { "mimetype" : "audio/amr", "sum" : 3 }, { "mimetype" : "audio/x-aiff", "sum" : 3 } ] }
{ "_id" : { "status" : "transcribing" }, "mimetype" : [ { "mimetype" : "audio/mp3", "sum" : 55 }, { "mimetype" : "application/octet-stream", "sum" : 1 }, { "mimetype" : "audio/mp4", "sum" : 1 }, { "mimetype" : "audio/wav", "sum" : 58 }, { "mimetype" : "video/mp4", "sum" : 22 }, { "mimetype" : "audio/x-m4a", "sum" : 7 }, { "sum" : 10 }, { "mimetype" : "video/quicktime", "sum" : 3 }, { "mimetype" : "audio/x-aiff", "sum" : 8 }, { "mimetype" : "audio/x-ms-wma", "sum" : 1 }, { "mimetype" : "audio/mpeg", "sum" : 28 } ] }
{ "_id" : { "status" : null }, "mimetype" : [ { "sum" : 2 } ] }
{ "_id" : { "status" : "transcribed" }, "mimetype" : [ { "mimetype" : "audio/wav", "sum" : 3767 }, { "mimetype" : "video/quicktime", "sum" : 218 }, { "mimetype" : "audio/x-aiff", "sum" : 59 }, { "mimetype" : "video/x-ms-wmv", "sum" : 26 }, { "sum" : 280 }, { "mimetype" : "audio/x-m4a", "sum" : 691 }, { "mimetype" : "audio/x-ms-wma", "sum" : 119 }, { "mimetype" : "audio/mp3", "sum" : 186 }, { "mimetype" : "video/mp4", "sum" : 907 }, { "mimetype" : "audio/x-wav", "sum" : 1 }, { "mimetype" : "video/mpeg", "sum" : 7 }, { "mimetype" : "audio/amr", "sum" : 40 }, { "mimetype" : "audio/mp4", "sum" : 70 }, { "mimetype" : "application/octet-stream", "sum" : 26 }, { "mimetype" : "application/x-wav", "sum" : 1 }, { "mimetype" : "audio/caf", "sum" : 1 }, { "mimetype" : "audio/3gpp", "sum" : 1 }, { "mimetype" : "video/3gpp", "sum" : 21 }, { "mimetype" : "audio/ogg", "sum" : 4 }, { "mimetype" : "audio/mpeg", "sum" : 2464 }, { "mimetype" : "audio/aac", "sum" : 1 }, { "mimetype" : "audio/flac", "sum" : 3 }, { "mimetype" : "video/x-ms-wma", "sum" : 3 }, { "mimetype" : "audio/basic", "sum" : 1 }, { "mimetype" : "video/x-msvideo", "sum" : 3 }, { "mimetype" : "video/webm", "sum" : 2 } ] }

After the $match pipeline, you would need a $group pipeline stage which groups all the documents by the status key. Within the group pipeline add the mimetypes to a list. You would need that list later on down the pipeline.

The preceding pipeline should be an $unwind pipeline that flattens the list created before i.e. it produces a copy of each document per array entry.

You would need another $group step which then groups the flattened documents from the previous pipeline by the mimetype and status keys.

The following example demonstrates this:

db.files.aggregate([
    { "$match": { "created": { "$gte" : ISODate("2016-01-01T00:00:00Z") } } },
    {
        "$group": {
            "_id": "$status",
            "mimetypes": { "$push": "$ingest.mimetype" },
            "total": { "$sum": 1 }
        }
    },
    { "$unwind": "$mimetypes" },
    {
        "$group": {
            "_id": {
                "status": "$_id",
                "mimetype": "$mimetypes"
            },
            "total": { "$first": "$total" },
            "count": { "$sum": 1 }
        }
    }
])
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!