mongodb move documents from one collection to another collection

前端 未结 15 1418
感情败类
感情败类 2020-11-30 22:10

How can documents be moved from one collection to another collection in MongoDB?? For example: I have lot of documents in

相关标签:
15条回答
  • 2020-11-30 22:53

    In my case for each didn't work. So I had to make some changes.

    var kittySchema = new mongoose.Schema({
    name: String
    });
    
    var Kitten = mongoose.model('Kitten', kittySchema);
    
    var catSchema = new mongoose.Schema({
    name: String
    });
    
    var Cat = mongoose.model('Cat', catSchema);
    

    This is Model for both the collection

    `function Recursion(){
    Kitten.findOne().lean().exec(function(error, results){
        if(!error){
            var objectResponse = results;
            var RequiredId = objectResponse._id;
            delete objectResponse._id;
            var swap = new Cat(objectResponse);
            swap.save(function (err) {
               if (err) {
                   return err;
               }
               else {
                   console.log("SUCCESSFULL");
                   Kitten.deleteOne({ _id: RequiredId }, function(err) {
                    if (!err) {
                            console.log('notification!');
                    }
                    else {
                            return err;
                    }
                });
                   Recursion();
               }
            });
        }
        if (err) {
            console.log("No object found");
            // return err;
        }
    })
    }`
    
    0 讨论(0)
  • 2020-11-30 22:56

    you can use range query to get data from sourceCollection and keep the cursor data in variable and loop on it and insert to target collection:

     var doc = db.sourceCollection.find({
            "Timestamp":{
                  $gte:ISODate("2014-09-01T00:00:00Z"),
                  $lt:ISODate("2014-10-01T00:00:00Z")
            }
     });
    
     doc.forEach(function(doc){
        db.targetCollection.insert(doc);
     })
    

    Hope so it helps!!

    0 讨论(0)
  • 2020-11-30 22:56

    I do like the response from @markus-w-mahlberg, however at times, I have seen the need to keep it a bit simpler for people. As such I have a couple of functions that are below. You could naturally wrap thing here with bulk operators as he did, but this code works with new and old Mongo systems equally.

    function parseNS(ns){
        //Expects we are forcing people to not violate the rules and not doing "foodb.foocollection.month.day.year" if they do they need to use an array.
        if (ns instanceof Array){
            database =  ns[0];
            collection = ns[1];
        }
        else{
            tNS =  ns.split(".");
            if (tNS.length > 2){
                print('ERROR: NS had more than 1 period in it, please pass as an [ "dbname","coll.name.with.dots"] !');
                return false;
            }
            database = tNS[0];
            collection = tNS[1];
        }
        return {database: database,collection: collection};
    }
    
    function insertFromCollection( sourceNS,  destNS, query, batchSize, pauseMS){
        //Parse and check namespaces
        srcNS = parseNS(sourceNS);
        destNS = parseNS(destNS);
        if ( srcNS == false ||  destNS == false){return false;}
    
        batchBucket = new Array();
        totalToProcess = db.getDB(srcNS.database).getCollection(srcNS.collection).find(query,{_id:1}).count();
        currentCount = 0;
        print("Processed "+currentCount+"/"+totalToProcess+"...");
        db.getDB(srcNS.database).getCollection(srcNS.collection).find(query).addOption(DBQuery.Option.noTimeout).forEach(function(doc){
            batchBucket.push(doc);
            if ( batchBucket.length > batchSize){
                db.getDB(destNS.database).getCollection(destNS.collection)insert(batchBucket);
                currentCount += batchBucket.length;
                batchBucket = [];
                sleep (pauseMS);
                print("Processed "+currentCount+"/"+totalToProcess+"...");       
            }
        }
        print("Completed");
    }
    
    /** Example Usage:
            insertFromCollection("foo.bar","foo2.bar",{"type":"archive"},1000,20);    
    

    You could obviously add a db.getSiblingDB(srcNS.database).getCollection(srcNS.collection).remove(query,true) If you wanted to also remove the records after they are copied to the new location. The code can easily be built like that to make it restartable.

    0 讨论(0)
  • 2020-11-30 23:00

    Insert and remove:

    var documentsToMove = db.collectionA.find({});
    documentsToMove.forEach(function(doc) {
        db.collectionB.insert(doc);
        db.collectionA.remove(doc);
    });
    

    note: this method might be quite slow for large collections or collections holding large documents.

    0 讨论(0)
  • 2020-11-30 23:01

    May be from the performance point of view it's better to remove a lot of documents using one command(especially if you have indexes for query part) rather than deleting them one-by-one.

    For example:

    db.source.find({$gte: start, $lt: end}).forEach(function(doc){
       db.target.insert(doc);
    });
    db.source.remove({$gte: start, $lt: end});
    
    0 讨论(0)
  • 2020-11-30 23:02

    I planned to arhieve 1000 records at a time using bulkinsert and bulkdelete methods of pymongo.

    For both source and target

    1. create mongodb objects to connect to the database.

    2. instantiate the bulk objects. Note: I created a backup of bulk objects too. This will help me to rollback the insertion or removal when an error occurs. example:

      For source // replace this with mongodb object creation logic source_db_obj = db_help.create_db_obj(source_db, source_col) source_bulk = source_db_obj.initialize_ordered_bulk_op() source_bulk_bak = source_db_obj.initialize_ordered_bulk_op()
      For target // replace this with mogodb object creation logic target_db_obj = db_help.create_db_obj(target_db, target_col) target_bulk = target_db_obj.initialize_ordered_bulk_op() target_bulk_bak = target_db_obj.initialize_ordered_bulk_op()

    3. Obtain the source records that matches the filter criteria

      source_find_results = source_db_obj.find(filter)

    4. Loop through the source records

      create target and source bulk operations

      Append archived_at field with the current datetime to the target collection

      //replace this with the logic to obtain the UTCtime. doc['archived_at'] = db_help.getUTCTime() target_bulk.insert(document) source_bulk.remove(document)

      for rollback in case of any errors or exceptions, create target_bulk_bak and source_bulk_bak operations.

      target_bulk_bak.find({'_id':doc['_id']}).remove_one() source_bulk_bak.insert(doc) //remove the extra column doc.pop('archieved_at', None)

    5. When the record count to 1000, execute the target - bulk insertion and source - bulk removal. Note: this method takes target_bulk and source_bulk objects for execution.

      execute_bulk_insert_remove(source_bulk, target_bulk)

    6. When exception occurs, execute the target_bulk_bak removal and source_bulk_bak inesertions. This would rollback the changes. Since mongodb doesn't have rollback, I came up with this hack

      execute_bulk_insert_remove(source_bulk_bak, target_bulk_bak)

    7. Finally re-initialize the source and target bulk and bulk_bak objects. This is necessary because you can use them only once.

    8. Complete code

          def execute_bulk_insert_remove(source_bulk, target_bulk):
              try:
                  target_bulk.execute()
                  source_bulk.execute()
              except BulkWriteError as bwe:
                  raise Exception(
                      "could not archive document, reason:    {}".format(bwe.details))
      
          def archive_bulk_immediate(filter, source_db, source_col, target_db, target_col):
              """
              filter: filter criteria for backup
              source_db: source database name
              source_col: source collection name
              target_db: target database name
              target_col: target collection name
              """
              count = 0
              bulk_count = 1000
      
              source_db_obj = db_help.create_db_obj(source_db, source_col)
              source_bulk = source_db_obj.initialize_ordered_bulk_op()
              source_bulk_bak = source_db_obj.initialize_ordered_bulk_op()
      
              target_db_obj = db_help.create_db_obj(target_db, target_col)
              target_bulk = target_db_obj.initialize_ordered_bulk_op()
              target_bulk_bak = target_db_obj.initialize_ordered_bulk_op()
      
              source_find_results = source_db_obj.find(filter)
      
              start = datetime.now()
      
              for doc in source_find_results:
                  doc['archived_at'] = db_help.getUTCTime()
      
                  target_bulk.insert(doc)
                  source_bulk.find({'_id': doc['_id']}).remove_one()
                  target_bulk_bak.find({'_id': doc['_id']}).remove_one()
                  doc.pop('archieved_at', None)
                  source_bulk_bak.insert(doc)
      
                  count += 1
      
                  if count % 1000 == 0:
                      logger.info("count: {}".format(count))
                      try:
                          execute_bulk_insert_remove(source_bulk, target_bulk)
                      except BulkWriteError as bwe:
                          execute_bulk_insert_remove(source_bulk_bak, target_bulk_bak)
                          logger.info("Bulk Write Error: {}".format(bwe.details))
                          raise
      
                      source_bulk = source_db_obj.initialize_ordered_bulk_op()
                      source_bulk_bak = source_db_obj.initialize_ordered_bulk_op()
      
                      target_bulk = target_db_obj.initialize_ordered_bulk_op()
                      target_bulk_bak = target_db_obj.initialize_ordered_bulk_op()
      
              end = datetime.now()
      
              logger.info("archived {} documents to {} in ms.".format(
                  count, target_col, (end - start)))
      
    0 讨论(0)
提交回复
热议问题