I need to extract a part of a string that matches a regex and return it.
I have a set of documents such as:
{\"_id\" :12121, \"fileName\" : \"apple.d
It will be possible to do this in the upcoming version of MongoDB(as the time of this writing) using the aggregation framework and the $indexOfCP
operator. Until then, your best bet here is MapReduce
.
var mapper = function() {
emit(this._id, this.fileName.substring(this.fileName.indexOf(".")))
};
db.coll.mapReduce(mapper,
function(key, value) {},
{ "out": { "inline": 1 }}
)["results"]
Which yields:
[
{
"_id" : 12121,
"value" : ".doc"
},
{
"_id" : 12125,
"value" : ".txt"
},
{
"_id" : 12126,
"value" : ".pdf"
},
{
"_id" : 12127,
"value" : ".txt"
}
]
For completness here is the solution using the aggregation framework*
db.coll.aggregate(
[
{ "$match": { "name": /\.[0-9a-z]+$/i } },
{ "$group": {
"_id": null,
"extension": {
"$push": {
"$substr": [
"$fileName",
{ "$indexOfCP": [ "$fileName", "." ] },
-1
]
}
}
}}
])
which produces:
{
"_id" : null,
"extensions" : [ ".doc", ".txt", ".pdf", ".txt" ]
}
*current development version of MongoDB (as the time of this writing).