Reshape documents by splitting a field value

后端 未结 1 1928
不思量自难忘°
不思量自难忘° 2021-01-06 01:32

Suppose we have a collection of raw data:

{ \"person\": \"David, age 102\"}
{ \"person\": \"Max, age 8\" }

and we\'d like to transform that

相关标签:
1条回答
  • 2021-01-06 01:57

    The optimal way in MongoDB version 3.4.

    This version of mongod provides the $split operator which, of course split the string as shown here.

    We then assign the the newly computed value to a variable using the $let variable operator. The new value can then be use in the in expression to return the "name" and the "age" values using the $arrayElemAt operator to return the element at a specified index; 0 for the first element and -1 for the last element.

    Note that in the in expression we need to split the last element in order to return the string of integer.

    Finally we need to iterate the Cursor object and cast the convert the string of integer to numeric using Number or parseInt and use bulk operation and the bulkWrite() method to $set the value for those field for maximum efficiency.

    let requests = [];
    db.coll.aggregate(
        [
            { "$project": {  
                "person": { 
                    "$let": { 
                        "vars": { 
                            "infos":  { "$split": [ "$person", "," ] } 
                        }, 
                        "in": { 
                            "name": { "$arrayElemAt": [ "$$infos", 0 ] }, 
                            "age": { 
                                "$arrayElemAt": [ 
                                    { "$split": [ 
                                        { "$arrayElemAt": [ "$$infos", -1 ] }, 
                                        " " 
                                    ]}, 
                                    -1 
                                ] 
                            } 
                        } 
                    } 
                }  
            }}
        ] 
    ).forEach(document => { 
        requests.push({ 
            "updateOne": { 
                "filter": { "_id": document._id }, 
                "update": { 
                    "$set": { 
                        "name": document.person.name, 
                        "age": Number(document.person.age) 
                    },
                    "$unset": { "person": " " }
                } 
            } 
        }); 
        if ( requests.length === 500 ) { 
            // Execute per 500 ops and re-init
            db.coll.bulkWrite(requests); 
            requests = []; 
        }} 
    );
    
     // Clean up queues
    if(requests.length > 0) {
        db.coll.bulkWrite(requests);
    }
    

    MongoDB 3.2 or newer.

    MongoDB 3.2 deprecates the old Bulk() API and its associated methods and provides the bulkWrite() method but it doesn't provide the $split operator so the only option we have here is to use the mapReduce() method to transform our data then update the collection using bulk operation.

    var mapFunction = function() { 
        var person = {}, 
        infos = this.person.split(/[,\s]+/); 
        person["name"] = infos[0]; 
        person["age"] = infos[2]; 
        emit(this._id, person); 
    };
    
    var results = db.coll.mapReduce(
        mapFunction, 
        function(key, val) {}, 
        { "out": { "inline": 1 } }
    )["results"];
    
    results.forEach(document => { 
        requests.push({ 
            "updateOne": { 
                "filter": { "_id": document._id }, 
                "update": { 
                    "$set": { 
                        "name": document.value.name, 
                        "age": Number(document.value.age) 
                    }, 
                    "$unset": { "person": " " }
                } 
            } 
        }); 
        if ( requests.length === 500 ) { 
            // Execute per 500 operations and re-init
            db.coll.bulkWrite(requests); 
            requests = []; 
        }} 
    );
    
    // Clean up queues
    if(requests.length > 0) {
        db.coll.bulkWrite(requests);
    }
    

    MongoDB version 2.6 or 3.0.

    We need to use the now deprecated Bulk API.

    var bulkOp = db.coll.initializeUnorderedBulkOp();
    var count = 0;
    
    results.forEach(function(document) { 
        bulkOp.find({ "_id": document._id}).updateOne(
            { 
                "$set": { 
                    "name": document.value.name, 
                    "age": Number(document.value.age)
                },
                "$unset": { "person": " " }
            }
        );
        count++;
        if (count === 500 ) {
            // Execute per 500 operations and re-init
            bulkOp.execute();
            bulkOp = db.coll.initializeUnorderedBulkOp();
        }
    });
    
    // clean up queues
    if (count > 0 ) {
        bulkOp.execute();
    }
    
    0 讨论(0)
提交回复
热议问题