Random record from MongoDB

后端 未结 27 1983
栀梦
栀梦 2020-11-22 01:22

I am looking to get a random record from a huge (100 million record) mongodb.

What is the fastest and most efficient way to do so? The data is already t

27条回答
  •  别那么骄傲
    2020-11-22 01:51

    This works nice, it's fast, works with multiple documents and doesn't require populating rand field, which will eventually populate itself:

    1. add index to .rand field on your collection
    2. use find and refresh, something like:
    // Install packages:
    //   npm install mongodb async
    // Add index in mongo:
    //   db.ensureIndex('mycollection', { rand: 1 })
    
    var mongodb = require('mongodb')
    var async = require('async')
    
    // Find n random documents by using "rand" field.
    function findAndRefreshRand (collection, n, fields, done) {
      var result = []
      var rand = Math.random()
    
      // Append documents to the result based on criteria and options, if options.limit is 0 skip the call.
      var appender = function (criteria, options, done) {
        return function (done) {
          if (options.limit > 0) {
            collection.find(criteria, fields, options).toArray(
              function (err, docs) {
                if (!err && Array.isArray(docs)) {
                  Array.prototype.push.apply(result, docs)
                }
                done(err)
              }
            )
          } else {
            async.nextTick(done)
          }
        }
      }
    
      async.series([
    
        // Fetch docs with unitialized .rand.
        // NOTE: You can comment out this step if all docs have initialized .rand = Math.random()
        appender({ rand: { $exists: false } }, { limit: n - result.length }),
    
        // Fetch on one side of random number.
        appender({ rand: { $gte: rand } }, { sort: { rand: 1 }, limit: n - result.length }),
    
        // Continue fetch on the other side.
        appender({ rand: { $lt: rand } }, { sort: { rand: -1 }, limit: n - result.length }),
    
        // Refresh fetched docs, if any.
        function (done) {
          if (result.length > 0) {
            var batch = collection.initializeUnorderedBulkOp({ w: 0 })
            for (var i = 0; i < result.length; ++i) {
              batch.find({ _id: result[i]._id }).updateOne({ rand: Math.random() })
            }
            batch.execute(done)
          } else {
            async.nextTick(done)
          }
        }
    
      ], function (err) {
        done(err, result)
      })
    }
    
    // Example usage
    mongodb.MongoClient.connect('mongodb://localhost:27017/core-development', function (err, db) {
      if (!err) {
        findAndRefreshRand(db.collection('profiles'), 1024, { _id: true, rand: true }, function (err, result) {
          if (!err) {
            console.log(result)
          } else {
            console.error(err)
          }
          db.close()
        })
      } else {
        console.error(err)
      }
    })
    

    ps. How to find random records in mongodb question is marked as duplicate of this question. The difference is that this question asks explicitly about single record as the other one explicitly about getting random documents.

提交回复
热议问题