I have an existing deeply nested mongoDB schema that I must flatten as I have a complex query that cannot be efficiently made with the current structure. Here is the MWE of the
The following
db.collection.aggregate(
[{$unwind:"$tests"},
{$unwind:"$tests.details"},
{$unwind:"$tests.details.a"},
{$group:{
_id:"$_id",
"tests": {"$push":{
"aPos":"$tests.details.a.pos",
"aSize":"$tests.details.a.size",
"aUnit":"$tests.details.a.unit"
}}}},
])
produces:
{ "_id" : ObjectId("58e574a768afb6085ec3a388"), "tests" : [ { "aPos" : "Far", "aSize" : "5", "aUnit" : "08" } ] }
The above only yielded one set of field:value pairs; doing multiple $unwind at the same level did not work:
db.collection.aggregate(
[{$unwind:"$tests"},
{$unwind:"$tests.details"},
{$unwind:"$tests.details.a"},
{$unwind:"$tests.details.b"},
{$group:{
_id:"$_id",
"tests": {"$push":{
"aPos":"$tests.details.a.pos",
"aSize":"$tests.details.a.size",
"aUnit":"$tests.details.a.unit",
"bPos":"$tests.details.b.pos",
"bSize":"$tests.details.b.size",
"bUnit":"$tests.details.b.unit"
}}}},
]) //does not run
Therefore, there needs to be another aggregation stage of $facet to carry out similar steps for details.b, details.c and details.d.
Print the data
db.test.find().forEach(doc => {
doc.details = doc.details.map( detail => {
Object.keys(detail).filter( k => k !== "_id" ).forEach( k => {
detail[k].forEach( item => {
Object.keys(item).filter(i => i !== "_id" ).forEach( inner => {
detail[k + inner.charAt(0).toUpperCase() + inner.substr(1)]
= item[inner];
})
});
delete detail[k];
});
return detail;
});
printjson(doc);
});
Update the data
db.test.find().forEach(doc => {
doc.details = doc.details.map( detail => {
Object.keys(detail).filter( k => k !== "_id" ).forEach( k => {
detail[k].forEach( item => {
Object.keys(item).filter(i => i !== "_id" ).forEach( inner => {
detail[k + inner.charAt(0).toUpperCase() + inner.substr(1)]
= item[inner];
})
});
delete detail[k];
});
return detail;
});
ops = [
...ops,
{ "updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "doc.details": doc.details } }
}}
];
if ( ops.length >= 500 ) {
db.test.bulkWrite(ops);
ops = [];
}
});
if ( ops.length > 0 ) {
db.test.bulkWrite(ops);
ops = [];
}
Output Form
{
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"details" : [
{
"_id" : ObjectId("58e55f0f68afb6085ec3a2cc"),
"aUnit" : "08",
"aSize" : "5",
"aPos" : "Far",
"bUnit" : "08",
"bSize" : "5",
"bPos" : "Far",
"cUnit" : "08",
"cSize" : "3",
"cPos" : "Far",
"dUnit" : "08",
"dSize" : "5",
"dPos" : "Far"
}
]
}
{
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"tests" : [
{
"_id" : ObjectId("58e542fb68afb6085ec3a1d2"),
"details" : [
{
"a" : [
{
"unit" : "08",
"size" : "5",
"pos" : "Far",
"_id" : ObjectId("58e542fb68afb6085ec3a1d6")
}
]
},
{
"b" : [
{
"pos" : "Drive Side Far",
"size" : "5",
"unit" : "08",
"_id" : ObjectId("58e542fb68afb6085ec3a1d3")
}
]
},
{
"c" : [
{
"pos" : "Far",
"size" : "3",
"unit" : "08",
"_id" : ObjectId("58e542fb68afb6085ec3a1d4")
}
]
},
{
"d" : [
{
"pos" : "Far",
"size" : "5",
"unit" : "08",
"_id" : ObjectId("58e542fb68afb6085ec3a1d5")
}
]
}
]
}
]
}
If you are trying "update" your data, then it's a lot more involved than what you are trying. You have several arrays and you need to actually "traverse" the array elements rather than trying to access them directly.
Here's just a sample to "print out" the "flattened" data:
db.test.find().forEach(doc => {
doc.tests = doc.tests.map( test => {
test.details.forEach( detail => {
Object.keys(detail).forEach( key => {
detail[key].forEach( item => {
Object.keys(item).forEach( inner => {
if ( inner !== '_id' ) {
test[key + inner.charAt(0).toUpperCase() + inner.substr(1)]
= item[inner];
}
});
});
});
});
delete test.details;
return test;
});
printjson(doc);
})
Which I believe gives the structure you are looking for:
{
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"tests" : [
{
"_id" : ObjectId("58e542fb68afb6085ec3a1d2"),
"aUnit" : "08",
"aSize" : "5",
"aPos" : "Far",
"bPos" : "Drive Side Far",
"bSize" : "5",
"bUnit" : "08",
"cPos" : "Far",
"cSize" : "3",
"cUnit" : "08",
"dPos" : "Far",
"dSize" : "5",
"dUnit" : "08"
}
]
}
Now I'm not taking into account any possibility that inside your "details"
array the documents with keys like "a"
etc could maybe appear multiple times. So I am just considering that there is only ever 1 document inside there which has a an "a"
or a "b"
etc, and the last found value matching that key is always assigned when adding the new keys to the top level of the "details"
documents.
If you're actual case varies, then you would need to modify various .forEach()
loops inside there to also use the "index" as a parameter and include that index value as part of the key name. i.e:
"a0Unit": "08",
"a0Size": "05",
"a1Unit": "09",
"a1Size": "06"
But that is a detail you will have to work out if necessary since this would differ from how the data is presented in the question.
If however this is a perfect fit for what you want to update to, then simply run the loop with .bulkWrite() statements executing at regular intervals:
let ops = [];
db.test.find().forEach(doc => {
doc.tests = doc.tests.map( test => {
test.details.forEach( detail => {
Object.keys(detail).forEach( key => {
detail[key].forEach( item => {
Object.keys(item).forEach( inner => {
if ( inner !== '_id' ) {
test[key + inner.charAt(0).toUpperCase() + inner.substr(1)]
= item[inner];
}
});
});
});
});
delete test.details;
return test;
});
ops = [
...ops,
{ "updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "tests": doc.tests } }
}}
];
if ( ops.length >= 500 ) {
db.test.bulkWrite(ops);
ops = [];
}
});
if ( ops.length > 0 ) {
db.test.bulkWrite(ops);
ops = [];
}
It also appears from the _id
fields present in each array member document that you are using mongoose. So whatever you do, do not try and run the code using mongoose itself. It's a "one off" bulk update of your data and should be run directly from the shell. Then of course you will need to modify your schema to suit the new structure.
But this is why you should run through your data in the shell with the printjson()
method first.