Flattening mongoDB schema

后端 未结 2 1292
刺人心
刺人心 2021-01-24 15:11

I have an existing deeply nested mongoDB schema that I must flatten as I have a complex query that cannot be efficiently made with the current structure. Here is the MWE of the

相关标签:
2条回答
  • 2021-01-24 15:35

    The following

    db.collection.aggregate(
        [{$unwind:"$tests"},
        {$unwind:"$tests.details"},
        {$unwind:"$tests.details.a"},
        {$group:{
            _id:"$_id",
            "tests": {"$push":{
                "aPos":"$tests.details.a.pos",
                "aSize":"$tests.details.a.size",
                "aUnit":"$tests.details.a.unit"
            }}}},
        ])
    

    produces:

    { "_id" : ObjectId("58e574a768afb6085ec3a388"), "tests" : [ { "aPos" : "Far", "aSize" : "5", "aUnit" : "08" } ] }
    

    The above only yielded one set of field:value pairs; doing multiple $unwind at the same level did not work:

    db.collection.aggregate(
        [{$unwind:"$tests"},
        {$unwind:"$tests.details"},
        {$unwind:"$tests.details.a"},
        {$unwind:"$tests.details.b"},
        {$group:{
            _id:"$_id",
            "tests": {"$push":{
                "aPos":"$tests.details.a.pos",
                "aSize":"$tests.details.a.size",
                "aUnit":"$tests.details.a.unit",
                "bPos":"$tests.details.b.pos",
                "bSize":"$tests.details.b.size",
                "bUnit":"$tests.details.b.unit"
            }}}},
        ])  //does not run
    

    Therefore, there needs to be another aggregation stage of $facet to carry out similar steps for details.b, details.c and details.d.

    0 讨论(0)
  • 2021-01-24 15:45

    New Response

    Print the data

    db.test.find().forEach(doc => {
      doc.details = doc.details.map( detail => {
        Object.keys(detail).filter( k => k !== "_id" ).forEach( k => {
          detail[k].forEach( item => {
            Object.keys(item).filter(i => i !== "_id" ).forEach( inner => {
              detail[k + inner.charAt(0).toUpperCase() + inner.substr(1)]
                = item[inner];
            })
          });
          delete detail[k];
        });
        return detail;
      });
      printjson(doc);
    });
    

    Update the data

    db.test.find().forEach(doc => {
      doc.details = doc.details.map( detail => {
        Object.keys(detail).filter( k => k !== "_id" ).forEach( k => {
          detail[k].forEach( item => {
            Object.keys(item).filter(i => i !== "_id" ).forEach( inner => {
              detail[k + inner.charAt(0).toUpperCase() + inner.substr(1)]
                = item[inner];
            })
          });
          delete detail[k];
        });
        return detail;
      });
    
      ops = [
        ...ops,
        { "updateOne": {
          "filter": { "_id": doc._id },
          "update": { "$set": { "doc.details": doc.details } }
        }}
      ];
    
      if ( ops.length >= 500 ) {
        db.test.bulkWrite(ops);
        ops = [];
      }
    });
    
    if ( ops.length > 0 ) {
      db.test.bulkWrite(ops);
      ops = [];
    }
    

    Output Form

    {
        "_id" : ObjectId("58e574a768afb6085ec3a388"),
        "details" : [
            {
              "_id" : ObjectId("58e55f0f68afb6085ec3a2cc"),
              "aUnit" : "08",
              "aSize" : "5",
              "aPos" : "Far",
              "bUnit" : "08",
              "bSize" : "5",
              "bPos" : "Far",
              "cUnit" : "08",
              "cSize" : "3",
              "cPos" : "Far",
              "dUnit" : "08",
              "dSize" : "5",
              "dPos" : "Far"
            }
        ]
    }
    

    Original Data

    {
        "_id" : ObjectId("58e574a768afb6085ec3a388"),
        "tests" : [
          {
            "_id" : ObjectId("58e542fb68afb6085ec3a1d2"),
            "details" : [
              {
                "a" : [
                  {
                    "unit" : "08",
                    "size" : "5",
                    "pos" : "Far",
                    "_id" : ObjectId("58e542fb68afb6085ec3a1d6")
                  }
                ]
              },
              {
                "b" : [
                  {
                    "pos" : "Drive Side Far",
                    "size" : "5",
                    "unit" : "08",
                    "_id" : ObjectId("58e542fb68afb6085ec3a1d3")
                  }
                ]
              },
              {
                "c" : [
                  {
                    "pos" : "Far",
                    "size" : "3",
                    "unit" : "08",
                    "_id" : ObjectId("58e542fb68afb6085ec3a1d4")
                  }
                ]
              },
              {
                "d" : [
                  {
                    "pos" : "Far",
                    "size" : "5",
                    "unit" : "08",
                    "_id" : ObjectId("58e542fb68afb6085ec3a1d5")
                  }
                ]
              }
            ]
          }
        ]
    }
    

    Original Answer

    If you are trying "update" your data, then it's a lot more involved than what you are trying. You have several arrays and you need to actually "traverse" the array elements rather than trying to access them directly.

    Here's just a sample to "print out" the "flattened" data:

    db.test.find().forEach(doc => {
      doc.tests = doc.tests.map( test => {
        test.details.forEach( detail => {
          Object.keys(detail).forEach( key => {
            detail[key].forEach( item => {
              Object.keys(item).forEach( inner => {
                if ( inner !== '_id' ) {
                  test[key + inner.charAt(0).toUpperCase() + inner.substr(1)]
                    = item[inner];
                }
              });
            });
          });
        });
        delete test.details;
        return test;
      });
      printjson(doc);
    })
    

    Which I believe gives the structure you are looking for:

    {
        "_id" : ObjectId("58e574a768afb6085ec3a388"),
        "tests" : [
            {
                "_id" : ObjectId("58e542fb68afb6085ec3a1d2"),
                "aUnit" : "08",
                "aSize" : "5",
                "aPos" : "Far",
                "bPos" : "Drive Side Far",
                "bSize" : "5",
                "bUnit" : "08",
                "cPos" : "Far",
                "cSize" : "3",
                "cUnit" : "08",
                "dPos" : "Far",
                "dSize" : "5",
                "dUnit" : "08"
            }
        ]
    
    }
    

    Now I'm not taking into account any possibility that inside your "details" array the documents with keys like "a" etc could maybe appear multiple times. So I am just considering that there is only ever 1 document inside there which has a an "a" or a "b" etc, and the last found value matching that key is always assigned when adding the new keys to the top level of the "details" documents.

    If you're actual case varies, then you would need to modify various .forEach() loops inside there to also use the "index" as a parameter and include that index value as part of the key name. i.e:

    "a0Unit": "08",
    "a0Size": "05",
    "a1Unit": "09",
    "a1Size": "06"
    

    But that is a detail you will have to work out if necessary since this would differ from how the data is presented in the question.

    If however this is a perfect fit for what you want to update to, then simply run the loop with .bulkWrite() statements executing at regular intervals:

    let ops = [];
    
    db.test.find().forEach(doc => {
      doc.tests = doc.tests.map( test => {
        test.details.forEach( detail => {
          Object.keys(detail).forEach( key => {
            detail[key].forEach( item => {
              Object.keys(item).forEach( inner => {
                if ( inner !== '_id' ) {
                  test[key + inner.charAt(0).toUpperCase() + inner.substr(1)]
                    = item[inner];
                }
              });
            });
          });
        });
        delete test.details;
        return test;
      });
    
      ops = [
        ...ops,
        { "updateOne": {
          "filter": { "_id": doc._id },
          "update": { "$set": { "tests": doc.tests } }
        }}
      ];
    
      if ( ops.length >= 500 ) {
        db.test.bulkWrite(ops);
        ops = [];
      }
    });
    
    if ( ops.length > 0 ) {
      db.test.bulkWrite(ops);
      ops = [];
    }
    

    It also appears from the _id fields present in each array member document that you are using mongoose. So whatever you do, do not try and run the code using mongoose itself. It's a "one off" bulk update of your data and should be run directly from the shell. Then of course you will need to modify your schema to suit the new structure.

    But this is why you should run through your data in the shell with the printjson() method first.

    0 讨论(0)
提交回复
热议问题