For example, I have the following documents in my collection:
{
\"_id\" : \"GuqXmAkkARqhBDqhy\",
\"beatmapset_id\" : \"342537\",
\"version\" : \"MX\"
First you need to update your documents and change difficultyrating
and beatmapset_id
to float point number. To do that you need to loop over each document using the .forEach method and update each document with "Bulk" operations for maximum efficiency..
var bulk = db.collection.initializeOrderedBulkOp();
var count = 0;
db.collection.find().forEach(function(doc) {
bulk.find({ '_id': doc._id }).update({
'$set': {
'beatmapset_id': parseFloat(doc.beatmapset_id),
'difficultyrating': parseFloat(doc.difficultyrating)
}
});
count++;
if(count % 100 == 0) {
bulk.execute();
bulk = db.collection.initializeOrderedBulkOp();
}
})
if(count > 0) {
bulk.execute();
}
Now and since The "dropDups" syntax for index creation has been "deprecated" as of MongoDB 2.6 and removed in MongoDB 3.0. This is how you can remove the dups.
The main idea here is first sort your document by difficultyrating
in descending order.
bulk = db.collection.initializeUnorderedBulkOp();
count = 0;
db.collection.aggregate([
{ '$sort': { 'difficultyrating': -1 }},
{ '$group': { '_id': '$beatmapset_id', 'ids': { '$push': '$_id' }, 'count': { '$sum': 1 }}},
{ '$match': { 'count': { '$gt': 1 }}}
]).forEach(function(doc) {
doc.ids.shift();
bulk.find({'_id': { '$in': doc.ids }}).remove();
count++;
if(count === 100) {
bulk.execute();
bulk = db.collection.initializeUnorderedBulkOp();
}
})
if(count !== 0) {
bulk.execute();
}
This answer cover the topic for more detail.