I have the following document type
{
"_id" : "-fA2845ORqeyMUItKXfqZw",
"user" : "553247ffdc8a4ade4bb09c5e",
"state" : 2,
"metadata" : {
"language" : "en-US"
},
"pipeline" : {
"api" : "http://localhost:4000",
"provider" : 0
},
"ingest" : {
"mimetype" : "audio/mpeg",
"size" : 92794268,
"client" : "computer",
"isWriteable" : true
},
"assembly" : {
"ok" : "ASSEMBLY_EXECUTING",
"message" : "The assembly is currently being executed.",
"assembly_id" : "23f29680b32911e59d48f358b85e112d",
"parent_id" : null,
"template_id" : null,
"bytes_received" : 1265,
"bytes_expected" : null,
"upload_duration" : 0.079,
"client_agent" : null,
"client_referer" : null,
"start_date" : "2016/01/04 21:21:38 GMT",
"is_infinite" : false,
"has_dupe_jobs" : false,
"execution_start" : "2016/01/04 21:21:38 GMT",
"execution_duration" : 0,
"notify_start" : null,
"notify_duration" : null,
"last_job_completed" : null,
"fields" : {
},
"export" : null,
"job" : null,
"transcript" : null,
"status" : "transcoding",
"created" : ISODate("2016-01-04T21:21:38.364Z"),
"updated" : ISODate("2016-01-04T21:21:38.364Z")
}
i like to get a breakdown of the file extensions and group these by status
> db.files.aggregate([{"$match": {"created": { "$gte" : ISODate("2016-01-01T00:00:00Z")}}},{"$group" : {_id:"$status", count:{$sum:1}}}]);
{ "_id" : "transcoding error", "mimetype": ["audio/mpeg": 85, "audio/mp4": 65, .... ]}
{ "_id" : "edited", "mimetype": ["audio/mpeg": 85, "audio/mp4": 65, .... ] }
{ "_id" : "transcribed", "mimetype": ["audio/mpeg": 85, "audio/mp4": 65, .... ] }
{ "_id" : "transcribing","mimetype": ["audio/mpeg": 85, "audio/mp4": 65, .... ] }
{ "_id" : "received", "mimetype": ["audio/mpeg": 85, "audio/mp4": 65, .... ] }
{ "_id" : "transcoding", "mimetype": ["audio/mpeg": 85, "audio/mp4": 65, .... ] }
{ "_id" : "blocked", "mimetype": ["audio/mpeg": 85, "audio/mp4": 65, .... ] }
so for each status, i would like to subgroup by "mimetype"
type?
any advise much appreciated
thanks, i think i got it
db.files.aggregate(
{
$group: {
_id: { status: "$status", mimetype: "$ingest.mimetype" },
"mimetypes": { "$push": "$ingest.mimetype" },
"total": { "$sum": 1 }
}
},
{
$group: {
_id: { status: "$_id.status" },
mimetype: { $addToSet: { mimetype: "$_id.mimetype", sum:"$total" } }
}
}
);
which returns
{ "_id" : { "status" : "transcoded" }, "mimetype" : [ { "mimetype" : "audio/mpeg", "sum" : 2 } ] }
{ "_id" : { "status" : "edited" }, "mimetype" : [ { "mimetype" : "audio/flac", "sum" : 5 }, { "mimetype" : "video/mp4", "sum" : 1982 }, { "mimetype" : "audio/x-ms-wma", "sum" : 185 }, { "mimetype" : "video/ogg", "sum" : 2 }, { "mimetype" : "audio/mp3", "sum" : 151 }, { "mimetype" : "audio/mp4", "sum" : 52 }, { "sum" : 146 }, { "mimetype" : "video/x-msvideo", "sum" : 14 }, { "mimetype" : "audio/wav", "sum" : 2106 }, { "mimetype" : "video/x-ms-wma", "sum" : 1 }, { "mimetype" : "audio/ogg", "sum" : 6 }, { "mimetype" : "audio/mpeg", "sum" : 2481 }, { "mimetype" : "audio/x-m4a", "sum" : 783 }, { "mimetype" : "application/octet-stream", "sum" : 34 }, { "mimetype" : "audio/amr", "sum" : 16 }, { "mimetype" : "audio/basic", "sum" : 2 }, { "mimetype" : "audio/x-aiff", "sum" : 41 }, { "mimetype" : "video/mpeg", "sum" : 7 }, { "mimetype" : "video/x-ms-wmv", "sum" : 31 }, { "mimetype" : "audio/aac", "sum" : 1 }, { "mimetype" : "video/quicktime", "sum" : 377 }, { "mimetype" : "audio/m4a", "sum" : 1 }, { "mimetype" : "video/3gpp", "sum" : 11 }, { "mimetype" : "video/x-flv", "sum" : 2 } ] }
{ "_id" : { "status" : "transcoding error" }, "mimetype" : [ { "mimetype" : "video/mp4", "sum" : 52 }, { "mimetype" : "audio/wav", "sum" : 36 }, { "mimetype" : "audio/aac", "sum" : 1 }, { "mimetype" : "audio/mpeg", "sum" : 20 }, { "mimetype" : "application/mxf", "sum" : 4 }, { "mimetype" : "audio/mp3", "sum" : 1 }, { "mimetype" : "video/mpeg", "sum" : 3 }, { "mimetype" : "video/x-ms-wmv", "sum" : 1 }, { "mimetype" : "audio/mp4", "sum" : 2 }, { "mimetype" : "video/quicktime", "sum" : 76 }, { "mimetype" : "image/jpeg", "sum" : 1 }, { "mimetype" : "application/octet-stream", "sum" : 9 }, { "sum" : 348 }, { "mimetype" : "video/x-matroska", "sum" : 3 }, { "mimetype" : "text/html", "sum" : 7 }, { "mimetype" : "audio/x-m4a", "sum" : 9 }, { "mimetype" : "audio/aiff", "sum" : 1 } ] }
{ "_id" : { "status" : "transcoding" }, "mimetype" : [ { "mimetype" : "audio/mpeg", "sum" : 20 }, { "mimetype" : "audio/ogg", "sum" : 1 }, { "sum" : 147 }, { "mimetype" : "image/jpeg", "sum" : 1 }, { "mimetype" : "audio/x-m4a", "sum" : 15 }, { "mimetype" : "audio/wav", "sum" : 22 }, { "mimetype" : "video/mp4", "sum" : 12 }, { "mimetype" : "audio/x-ms-wma", "sum" : 2 }, { "mimetype" : "video/quicktime", "sum" : 5 }, { "mimetype" : "application/mxf", "sum" : 1 }, { "mimetype" : "audio/mp3", "sum" : 4 } ] }
{ "_id" : { "status" : "received" }, "mimetype" : [ { "mimetype" : "video/x-ms-wmv", "sum" : 1 }, { "sum" : 16 }, { "mimetype" : "audio/wav", "sum" : 160 }, { "mimetype" : "video/3gpp", "sum" : 1 }, { "mimetype" : "audio/mp4", "sum" : 3 }, { "mimetype" : "video/quicktime", "sum" : 24 }, { "mimetype" : "video/mp4", "sum" : 2929 }, { "mimetype" : "audio/x-m4a", "sum" : 48 }, { "mimetype" : "audio/x-aiff", "sum" : 6 }, { "mimetype" : "audio/ogg", "sum" : 2 }, { "mimetype" : "audio/mp3", "sum" : 4 }, { "mimetype" : "audio/mpeg", "sum" : 199 }, { "mimetype" : "audio/flac", "sum" : 2 }, { "mimetype" : "audio/x-ms-wma", "sum" : 7 } ] }
{ "_id" : { "status" : "blocked" }, "mimetype" : [ { "mimetype" : "audio/wav", "sum" : 92 }, { "mimetype" : "audio/x-wav", "sum" : 20 }, { "mimetype" : "audio/mp4", "sum" : 3 }, { "mimetype" : "video/mp4", "sum" : 63 }, { "mimetype" : "audio/x-m4a", "sum" : 50 }, { "mimetype" : "application/octet-stream", "sum" : 1 }, { "mimetype" : "audio/mp3", "sum" : 40 }, { "mimetype" : "video/x-ms-wmv", "sum" : 1 }, { "mimetype" : "video/quicktime", "sum" : 8 }, { "mimetype" : "video/mpeg", "sum" : 2 }, { "sum" : 50 }, { "mimetype" : "audio/mpeg", "sum" : 163 }, { "mimetype" : "audio/basic", "sum" : 2 }, { "mimetype" : "audio/x-ms-wma", "sum" : 14 }, { "mimetype" : "audio/amr", "sum" : 3 }, { "mimetype" : "audio/x-aiff", "sum" : 3 } ] }
{ "_id" : { "status" : "transcribing" }, "mimetype" : [ { "mimetype" : "audio/mp3", "sum" : 55 }, { "mimetype" : "application/octet-stream", "sum" : 1 }, { "mimetype" : "audio/mp4", "sum" : 1 }, { "mimetype" : "audio/wav", "sum" : 58 }, { "mimetype" : "video/mp4", "sum" : 22 }, { "mimetype" : "audio/x-m4a", "sum" : 7 }, { "sum" : 10 }, { "mimetype" : "video/quicktime", "sum" : 3 }, { "mimetype" : "audio/x-aiff", "sum" : 8 }, { "mimetype" : "audio/x-ms-wma", "sum" : 1 }, { "mimetype" : "audio/mpeg", "sum" : 28 } ] }
{ "_id" : { "status" : null }, "mimetype" : [ { "sum" : 2 } ] }
{ "_id" : { "status" : "transcribed" }, "mimetype" : [ { "mimetype" : "audio/wav", "sum" : 3767 }, { "mimetype" : "video/quicktime", "sum" : 218 }, { "mimetype" : "audio/x-aiff", "sum" : 59 }, { "mimetype" : "video/x-ms-wmv", "sum" : 26 }, { "sum" : 280 }, { "mimetype" : "audio/x-m4a", "sum" : 691 }, { "mimetype" : "audio/x-ms-wma", "sum" : 119 }, { "mimetype" : "audio/mp3", "sum" : 186 }, { "mimetype" : "video/mp4", "sum" : 907 }, { "mimetype" : "audio/x-wav", "sum" : 1 }, { "mimetype" : "video/mpeg", "sum" : 7 }, { "mimetype" : "audio/amr", "sum" : 40 }, { "mimetype" : "audio/mp4", "sum" : 70 }, { "mimetype" : "application/octet-stream", "sum" : 26 }, { "mimetype" : "application/x-wav", "sum" : 1 }, { "mimetype" : "audio/caf", "sum" : 1 }, { "mimetype" : "audio/3gpp", "sum" : 1 }, { "mimetype" : "video/3gpp", "sum" : 21 }, { "mimetype" : "audio/ogg", "sum" : 4 }, { "mimetype" : "audio/mpeg", "sum" : 2464 }, { "mimetype" : "audio/aac", "sum" : 1 }, { "mimetype" : "audio/flac", "sum" : 3 }, { "mimetype" : "video/x-ms-wma", "sum" : 3 }, { "mimetype" : "audio/basic", "sum" : 1 }, { "mimetype" : "video/x-msvideo", "sum" : 3 }, { "mimetype" : "video/webm", "sum" : 2 } ] }
After the $match pipeline, you would need a $group
pipeline stage which groups all the documents by the status
key. Within the group pipeline add the mimetypes
to a list. You would need that list later on down the pipeline.
The preceding pipeline should be an $unwind
pipeline that flattens the list created before i.e. it produces a copy of each document per array entry.
You would need another $group
step which then groups the flattened documents from the previous pipeline by the mimetype
and status
keys.
The following example demonstrates this:
db.files.aggregate([
{ "$match": { "created": { "$gte" : ISODate("2016-01-01T00:00:00Z") } } },
{
"$group": {
"_id": "$status",
"mimetypes": { "$push": "$ingest.mimetype" },
"total": { "$sum": 1 }
}
},
{ "$unwind": "$mimetypes" },
{
"$group": {
"_id": {
"status": "$_id",
"mimetype": "$mimetypes"
},
"total": { "$first": "$total" },
"count": { "$sum": 1 }
}
}
])
来源:https://stackoverflow.com/questions/39660473/mongodb-group-and-subgroup-counts