Bulk upsert in MongoDB using mongoose

后端 未结 6 553
自闭症患者
自闭症患者 2020-11-27 04:53

Is there any option to perform bulk upserts with mongoose? So basically having an array and insert each element if it not exists or update it if it exists? (I am using custo

相关标签:
6条回答
  • 2020-11-27 05:07

    If you're not seeing the bulk methods in your db.collection ie you're getting a error to the effect of xxx variable has no method: initializeOrderedBulkOp()

    Try updating your mongoose version. Apparently older mongoose versions don't pass through all of the underlying mongo db.collection methods.

    npm install mongoose

    took care of it for me.

    0 讨论(0)
  • 2020-11-27 05:12

    I have released a plugin for Mongoose that exposes a static upsertMany method to perform bulk upsert operations with a promise interface.

    An added benefit of using this plugin over initializing your own bulk op on the underlying collection, is that this plugin converts your data to Mongoose model's first, and then back to plain objects before the upsert. This ensures Mongoose schema validation is applied, and data is depopulated and fit for raw insertion.

    https://github.com/meanie/mongoose-upsert-many https://www.npmjs.com/package/@meanie/mongoose-upsert-many

    Hope it helps!

    0 讨论(0)
  • 2020-11-27 05:19
    await Model.bulkWrite(docs.map(doc => ({
        updateOne: {
            filter: {id: doc.id},
            update: doc,
            upsert: true
        }
    })))
    
    

    Or more verbose:

    const bulkOps = docs.map(doc => ({
        updateOne: {
            filter: {id: doc.id},
            update: doc,
            upsert: true
        }
    }))
    
    Model.bulkWrite(bulkOps)
            .then(bulkWriteOpResult => console.log('BULK update OK:', bulkWriteOpResult))
            .catch(err => console.error('BULK update error:', err))
    

    https://stackoverflow.com/a/60330161/5318303

    0 讨论(0)
  • 2020-11-27 05:22

    You don't need to manage limit (1000) as @neil-lunn suggested. Mongoose does this already. I used his great answer as a basis for this complete Promise-based implementation & example:

    var Promise = require('bluebird');
    var mongoose = require('mongoose');
    
    var Show = mongoose.model('Show', {
      "id": Number,
      "title": String,
      "provider":  {'type':String, 'default':'eztv'}
    });
    
    /**
     * Atomic connect Promise - not sure if I need this, might be in mongoose already..
     * @return {Priomise}
     */
    function connect(uri, options){
      return new Promise(function(resolve, reject){
        mongoose.connect(uri, options, function(err){
          if (err) return reject(err);
          resolve(mongoose.connection);
        });
      });
    }
    
    /**
     * Bulk-upsert an array of records
     * @param  {Array}    records  List of records to update
     * @param  {Model}    Model    Mongoose model to update
     * @param  {Object}   match    Database field to match
     * @return {Promise}  always resolves a BulkWriteResult
     */
    function save(records, Model, match){
      match = match || 'id';
      return new Promise(function(resolve, reject){
        var bulk = Model.collection.initializeUnorderedBulkOp();
        records.forEach(function(record){
          var query = {};
          query[match] = record[match];
          bulk.find(query).upsert().updateOne( record );
        });
        bulk.execute(function(err, bulkres){
            if (err) return reject(err);
            resolve(bulkres);
        });
      });
    }
    
    /**
     * Map function for EZTV-to-Show
     * @param  {Object} show EZTV show
     * @return {Object}      Mongoose Show object
     */
    function mapEZ(show){
      return {
        title: show.title,
        id: Number(show.id),
        provider: 'eztv'
      };
    }
    
    // if you are  not using EZTV, put shows in here
    var shows = []; // giant array of {id: X, title: "X"}
    
    // var eztv = require('eztv');
    // eztv.getShows({}, function(err, shows){
    //   if(err) return console.log('EZ Error:', err);
    
    //   var shows = shows.map(mapEZ);
      console.log('found', shows.length, 'shows.');
      connect('mongodb://localhost/tv', {}).then(function(db){
        save(shows, Show).then(function(bulkRes){
          console.log('Bulk complete.', bulkRes);
          db.close();
        }, function(err){
            console.log('Bulk Error:', err);
            db.close();
        });
      }, function(err){
        console.log('DB Error:', err);
      });
    
    // });
    

    This has the bonus of closing the connection when it's done, displaying any errors if you care, but ignoring them if not (error callbacks in Promises are optional.) It's also very fast. Just leaving this here to share my findings. You can uncomment the eztv stuff if you want to save all eztv shows to a database, as an example.

    0 讨论(0)
  • 2020-11-27 05:23

    Not in "mongoose" specifically, or at least not yet as of writing. The MongoDB shell as of the 2.6 release actually uses the "Bulk operations API" "under the hood" as it were for all of the general helper methods. In it's implementation, it tries to do this first, and if an older version server is detected then there is a "fallback" to the legacy implementation.

    All of the mongoose methods "currently" use the "legacy" implementation or the write concern response and the basic legacy methods. But there is a .collection accessor from any given mongoose model that essentially accesses the "collection object" from the underlying "node native driver" on which mongoose is implemented itself:

     var mongoose = require('mongoose'),
         Schema = mongoose.Schema;
    
     mongoose.connect('mongodb://localhost/test');
    
     var sampleSchema  = new Schema({},{ "strict": false });
    
     var Sample = mongoose.model( "Sample", sampleSchema, "sample" );
    
     mongoose.connection.on("open", function(err,conn) { 
    
        var bulk = Sample.collection.initializeOrderedBulkOp();
        var counter = 0;
    
        // representing a long loop
        for ( var x = 0; x < 100000; x++ ) {
    
            bulk.find(/* some search */).upsert().updateOne(
                /* update conditions */
            });
            counter++;
    
            if ( counter % 1000 == 0 )
                bulk.execute(function(err,result) {             
                    bulk = Sample.collection.initializeOrderedBulkOp();
                });
        }
    
        if ( counter % 1000 != 0 )
            bulk.execute(function(err,result) {
               // maybe do something with result
            });
    
     });
    

    The main catch there being that "mongoose methods" are actually aware that a connection may not actually be made yet and "queue" until this is complete. The native driver you are "digging into" does not make this distinction.

    So you really have to be aware that the connection is established in some way or form. But you can use the native driver methods as long as you are careful with what you are doing.

    0 讨论(0)
  • 2020-11-27 05:31

    I had to achieve this recently while storing products in my ecommerce app. My database used to timeout as I had to upsert 10000 items every 4 hours. One option for me was to set the socketTimeoutMS and connectTimeoutMS in mongoose while connecting to the database but it sorta felt hacky and I did not want to manipulate connection timeout defaults of the database. I also see that the solution by @neil lunn takes a simple sync approach of taking a modulus inside the for loop. Here is an async version of mine that I believe does the job much better

    let BATCH_SIZE = 500
    Array.prototype.chunk = function (groupsize) {
        var sets = [];
        var chunks = this.length / groupsize;
    
        for (var i = 0, j = 0; i < chunks; i++ , j += groupsize) {
            sets[i] = this.slice(j, j + groupsize);
        }
    
        return sets;
    }
    
    function upsertDiscountedProducts(products) {
    
        //Take the input array of products and divide it into chunks of BATCH_SIZE
    
        let chunks = products.chunk(BATCH_SIZE), current = 0
    
        console.log('Number of chunks ', chunks.length)
    
        let bulk = models.Product.collection.initializeUnorderedBulkOp();
    
        //Get the current time as timestamp
        let timestamp = new Date(),
    
            //Keep track of the number of items being looped
            pendingCount = 0,
            inserted = 0,
            upserted = 0,
            matched = 0,
            modified = 0,
            removed = 0,
    
            //If atleast one upsert was performed
            upsertHappened = false;
    
        //Call the load function to get started
        load()
        function load() {
    
            //If we have a chunk to process
            if (current < chunks.length) {
                console.log('Current value ', current)
    
                for (let i = 0; i < chunks[current].length; i++) {
                    //For each item set the updated timestamp to the current time
                    let item = chunks[current][i]
    
                    //Set the updated timestamp on each item
                    item.updatedAt = timestamp;
    
                    bulk.find({ _id: item._id })
                        .upsert()
                        .updateOne({
                            "$set": item,
    
                            //If the item is being newly inserted, set a created timestamp on it
                            "$setOnInsert": {
                                "createdAt": timestamp
                            }
                        })
                }
    
                //Execute the bulk operation for the current chunk
                bulk.execute((error, result) => {
                    if (error) {
                        console.error('Error while inserting products' + JSON.stringify(error))
                        next()
                    }
                    else {
    
                        //Atleast one upsert has happened
                        upsertHappened = true;
                        inserted += result.nInserted
                        upserted += result.nUpserted
                        matched += result.nMatched
                        modified += result.nModified
                        removed += result.nRemoved
    
                        //Move to the next chunk
                        next()
                    }
                })
    
    
    
            }
            else {
                console.log("Calling finish")
                finish()
            }
    
        }
    
        function next() {
            current++;
    
            //Reassign bulk to a new object and call load once again on the new object after incrementing chunk
            bulk = models.Product.collection.initializeUnorderedBulkOp();
            setTimeout(load, 0)
        }
    
        function finish() {
    
            console.log('Inserted ', inserted + ' Upserted ', upserted, ' Matched ', matched, ' Modified ', modified, ' Removed ', removed)
    
            //If atleast one chunk was inserted, remove all items with a 0% discount or not updated in the latest upsert
            if (upsertHappened) {
                console.log("Calling remove")
                remove()
            }
    
    
        }
    
        /**
         * Remove all the items that were not updated in the recent upsert or those items with a discount of 0
         */
        function remove() {
    
            models.Product.remove(
                {
                    "$or":
                    [{
                        "updatedAt": { "$lt": timestamp }
                    },
                    {
                        "discount": { "$eq": 0 }
                    }]
                }, (error, obj) => {
                    if (error) {
                        console.log('Error while removing', JSON.stringify(error))
                    }
                    else {
                        if (obj.result.n === 0) {
                            console.log('Nothing was removed')
                        } else {
                            console.log('Removed ' + obj.result.n + ' documents')
                        }
                    }
                }
            )
        }
    }
    
    0 讨论(0)
提交回复
热议问题