mongodb move documents from one collection to another collection

前端 未结 15 1417
感情败类
感情败类 2020-11-30 22:10

How can documents be moved from one collection to another collection in MongoDB?? For example: I have lot of documents in

相关标签:
15条回答
  • 2020-11-30 22:46

    $out is use to create the new collection with data , so use $out

    db.oldCollection.aggregate([{$out : "newCollection"}])
    

    then use drop

    db.oldCollection.drop()
    
    0 讨论(0)
  • 2020-11-30 22:50

    I had 2297 collection for 15 million of documents but some collection was empty.

    Using only copyTo the script failed, but with this script optimization:

    db.getCollectionNames().forEach(function(collname) {
        var c = db.getCollection(collname).count();
        if(c!==0){
          db.getCollection(collname).copyTo('master-collection');
          print('Copied collection ' + collname);
        }
    });
    

    all works fine for me.

    NB: copyTo is deprecated because it block the read/write operation: so I think is fine if you know that the database is not usable during this operation.

    0 讨论(0)
  • 2020-11-30 22:51

    The bulk operations @markus-w-mahlberg showed (and @mark-mullin refined) are efficient but unsafe as written. If the bulkInsert fails, the bulkRemove will still continue. To make sure you don't lose any records when moving, use this instead:

    function insertBatch(collection, documents) {
      var bulkInsert = collection.initializeUnorderedBulkOp();
      var insertedIds = [];
      var id;
      documents.forEach(function(doc) {
        id = doc._id;
        // Insert without raising an error for duplicates
        bulkInsert.find({_id: id}).upsert().replaceOne(doc);
        insertedIds.push(id);
      });
      bulkInsert.execute();
      return insertedIds;
    }
    
    function deleteBatch(collection, documents) {
      var bulkRemove = collection.initializeUnorderedBulkOp();
      documents.forEach(function(doc) {
        bulkRemove.find({_id: doc._id}).removeOne();
      });
      bulkRemove.execute();
    }
    
    function moveDocuments(sourceCollection, targetCollection, filter, batchSize) {
      print("Moving " + sourceCollection.find(filter).count() + " documents from " + sourceCollection + " to " + targetCollection);
      var count;
      while ((count = sourceCollection.find(filter).count()) > 0) {
        print(count + " documents remaining");
        sourceDocs = sourceCollection.find(filter).limit(batchSize);
        idsOfCopiedDocs = insertBatch(targetCollection, sourceDocs);
    
        targetDocs = targetCollection.find({_id: {$in: idsOfCopiedDocs}});
        deleteBatch(sourceCollection, targetDocs);
      }
      print("Done!")
    }

    0 讨论(0)
  • 2020-11-30 22:51

    It can be done on the server-side using the $merge operator (starting from MongoDB 4.2).

    db.getCollection("sourceColl").aggregate([
      { $merge: {
         into: "targetColl",
         on: "_id",
         whenMatched: "fail",
         whenNotMatched: "insert"
      }}
    ]);
    db.getCollection("sourceColl").deleteMany({})
    
    0 讨论(0)
  • 2020-11-30 22:53

    This is a restatement of @Markus W Mahlberg

    Returning the favor - as a function

    function moveDocuments(sourceCollection,targetCollection,filter) {
        var bulkInsert = targetCollection.initializeUnorderedBulkOp();
        var bulkRemove = sourceCollection.initializeUnorderedBulkOp();
        sourceCollection.find(filter)
            .forEach(function(doc) {
            bulkInsert.insert(doc);
            bulkRemove.find({_id:doc._id}).removeOne();
            }
      )
      bulkInsert.execute();
      bulkRemove.execute();
    }
    

    An example use

    var x = {dsid:{$exists: true}};
    moveDocuments(db.pictures,db.artifacts,x)
    

    to move all documents that have top level element dsid from the pictures to the artifacts collection

    0 讨论(0)
  • 2020-11-30 22:53

    Here's an update to @jasongarber's answer which uses the more recent mongo 'bulkWrite' operation (Read docs here), and also keeps the whole process asynchronous so you can run it as part of a wider script which depends on its' completion.

    async function moveDocuments (sourceCollection, targetCollection, filter) {
      const sourceDocs = await sourceCollection.find(filter)
    
      console.log(`Moving ${await sourceDocs.count()} documents from ${sourceCollection.collectionName} to ${targetCollection.collectionName}`)
    
      const idsOfCopiedDocs = await insertDocuments(targetCollection, sourceDocs)
    
      const targetDocs = await targetCollection.find({_id: {$in: idsOfCopiedDocs}})
      await deleteDocuments(sourceCollection, targetDocs)
    
      console.log('Done!')
    }
    
    async function insertDocuments (collection, documents) {
      const insertedIds = []
      const bulkWrites = []
    
      await documents.forEach(doc => {
        const {_id} = doc
    
        insertedIds.push(_id)
        bulkWrites.push({
          replaceOne: {
            filter: {_id},
            replacement: doc,
            upsert: true,
          },
        })
      })
    
      if (bulkWrites.length) await collection.bulkWrite(bulkWrites, {ordered: false})
    
      return insertedIds
    }
    
    async function deleteDocuments (collection, documents) {
      const bulkWrites = []
    
      await documents.forEach(({_id}) => {
        bulkWrites.push({
          deleteOne: {
            filter: {_id},
          },
        })
      })
    
      if (bulkWrites.length) await collection.bulkWrite(bulkWrites, {ordered: false})
    }
    
    0 讨论(0)
提交回复
热议问题