How to obtain a master structure for a json file?

问题

I have a JSON file as follows:

[
    {
        "dog": "lmn",
        "tiger": [
            {
                "bengoltiger": {
                    "height": {
                        "x": 4
                    }
                },
                "indiantiger": {
                    "paw": "a",
                    "foor": "b"
                }
            },
            {
                "bengoltiger": {
                    "width": {
                        "a": 8
                    }
                },
                "indiantiger": {
                    "b": 3
                }
            }
        ]
    },
    {
        "dog": "pqr",
        "tiger": [
            {
                "bengoltiger": {
                    "width": {
                        "m": 3
                    }
                },
                "indiantiger": {
                    "paw": "a",
                    "foor": "b"
                }
            },
            {
                "bengoltiger": {
                    "height": {
                        "n": 8
                    }
                },
                "indiantiger": {
                    "b": 3
                }
            }
        ],
        "lion": 90
    }
]

I want to transform this to obtain all possible properties of any object at any nesting level. For arrays, the first object should contain all the properties. The values are trivial, but the below solution considers the first encountered value for any property. (For ex. "lmn" is preserved for the "dog" property) Expected output:

[
    {
        "dog": "lmn",
        "tiger": [
            {
                "bengoltiger": {
                    "height": {
                        "x": 4,
                        "n": 8
                    },
                    "width": {
                        "a": 8,
                        "m": 3
                    }
                },
                "indiantiger": {
                    "paw": "a",
                    "foor": "b",
                    "b": 3
                }
            }
        ],
        "lion": 90
    }
]

Here's a recursive function I tried before this nesting problem struck me

function consolidateArray(json) {
    if (Array.isArray(json)) {
      const reference = json[0];
      json.forEach(function(element) {
        for (var key in element) {
          if (!reference.hasOwnProperty(key)) {
            reference[key] = element[key];
          }
        }
      });
      json.splice(1);
      this.consolidateArray(json[0]);
    } else if (typeof json === 'object') {
      for (var key in json) {
        if (json.hasOwnProperty(key)) {
          this.consolidateArray(json[key]);
        }
      }
    }
  };
  
var json = [
    {
        "dog": "lmn",
        "tiger": [
            {
                "bengoltiger": {
                    "height": {
                        "x": 4
                    }
                },
                "indiantiger": {
                    "paw": "a",
                    "foor": "b"
                }
            },
            {
                "bengoltiger": {
                    "width": {
                        "a": 8
                    }
                },
                "indiantiger": {
                    "b": 3
                }
            }
        ]
    },
    {
        "dog": "pqr",
        "tiger": [
            {
                "bengoltiger": {
                    "width": {
                        "m": 3
                    }
                },
                "indiantiger": {
                    "paw": "a",
                    "foor": "b"
                }
            },
            {
                "bengoltiger": {
                    "height": {
                        "n": 8
                    }
                },
                "indiantiger": {
                    "b": 3
                }
            }
        ],
        "lion": 90
    }
];
consolidateArray(json);
alert(JSON.stringify(json, null, 2));

回答1:

This was an interesting problem. Here's what I came up with:

// Utility functions

const isInt = Number.isInteger

const path = (ps = [], obj = {}) =>
  ps .reduce ((o, p) => (o || {}) [p], obj)

const assoc = (prop, val, obj) => 
  isInt (prop) && Array .isArray (obj)
    ? [... obj .slice (0, prop), val, ...obj .slice (prop + 1)]
    : {...obj, [prop]: val}

const assocPath = ([p = undefined, ...ps], val, obj) => 
  p == undefined
    ? obj
    : ps.length == 0
      ? assoc(p, val, obj)
      : assoc(p, assocPath(ps, val, obj[p] || (obj[p] = isInt(ps[0]) ? [] : {})), obj)


// Helper functions

function * getPaths(o, p = []) {
  if (Object(o) !== o || Object .keys (o) .length == 0) yield p 
  if (Object(o) === o)
    for (let k of Object .keys (o))
      yield * getPaths (o[k], [...p, isInt (Number (k)) ? Number (k) : k])
}

const canonicalPath = (path) =>
  path.map (n => isInt (Number (n)) ? 0 : n)

const splitPaths = (xs) => 
  Object .values ( xs.reduce ( 
    (a, p, _, __, cp = canonicalPath (p), key = cp .join ('\u0000')) => 
      ({...a, [key]: a [key] || {canonical: cp, path: p} })
    , {}
  ))


// Main function

const canonicalRep = (data) => splitPaths ([...getPaths (data)]) 
  .reduce (
    (a, {path:p, canonical}) => assocPath(canonical, path(p, data), a),
    Array.isArray(data) ? [] : {}
  ) 


  // Test

const data = [{"dog": "lmn", "tiger": [{"bengoltiger": {"height": {"x": 4}}, "indiantiger": {"foor": "b", "paw": "a"}}, {"bengoltiger": {"width": {"a": 8}}, "indiantiger": {"b": 3}}]}, {"dog": "pqr", "lion": 90, "tiger": [{"bengoltiger": {"width": {"m": 3}}, "indiantiger": {"foor": "b", "paw": "a"}}, {"bengoltiger": {"height": {"n": 8}}, "indiantiger": {"b": 3}}]}]

console .log (
  canonicalRep (data)
)

The first few functions are plain utility functions that I would keep in a system library. They have plenty of uses outside this code:

isInt is simply a first-class function alias to Number.isInteger

path finds the nested property of an object along a given pathway

path(['b', 1, 'c'], {a: 10, b: [{c: 20, d: 30}, {c: 40}], e: 50}) //=> 40

assoc returns a new object cloning your original, but with the value of a certain property set to or replaced with the supplied one.
```
assoc('c', 42, {a: 1, b: 2, c: 3, d: 4}) //=> {a: 1, b: 2, c: 42, d: 4}
```
Note that internal objects are shared by reference where possible.

assocPath does this same thing, but with a deeper path, building nodes as needed.

assocPath(['a', 'b', 1, 'c', 'd'], 42, {a: {b: [{x: 1}, {x: 2}], e: 3})
    //=> {a: {b: [{x: 1}, {c: {d: 42}, x: 2}], e: 3}}

Except for isInt, these borrow their APIs from Ramda. (Disclaimer: I'm a Ramda author.) But these are unique implementations.

The next function, getPaths, is an adaptation of one from another SO answer. It lists all the paths in your object in the format used by path and assocPath, returning an array of values which are integers if the relevant nested object is an array and strings otherwise. Unlike the function from which is was borrowed, it only returns paths to leaf values.

For your original object, it returns an iterator for this data:

[
  [0, "dog"], 
  [0, "tiger", 0, "bengoltiger", "height", "x"], 
  [0, "tiger", 0, "indiantiger", "foor"], 
  [0, "tiger", 0, "indiantiger", "paw"], 
  [0, "tiger", 1, "bengoltiger", "width", "a"], 
  [0, "tiger", 1, "indiantiger", "b"], 
  [1, "dog"], 
  [1, "lion"], 
  [1, "tiger", 0, "bengoltiger", "width", "m"], 
  [1, "tiger", 0, "indiantiger", "foor"], 
  [1, "tiger", 0, "indiantiger", "paw"], 
  [1, "tiger", 1, "bengoltiger", "height", "n"], 
  [1, "tiger", 1, "indiantiger", "b"]
]

If I wanted to spend more time on this, I would replace that version of getPaths with a non-generator version, just to keep this code consistent. It shouldn't be hard, but I'm not interested in spending more time on it.

We can't use those results directly to build your output, since they refer to array elements beyond the first one. That's where splitPaths and its helper canonicalPath come in. We create the canonical paths by replacing all integers with 0, giving us a data structure like this:

[{
  canonical: [0, "dog"],
  path:      [0, "dog"]
}, {
  canonical: [0, "tiger", 0, "bengoltiger", "height", "x"],
  path:      [0, "tiger", 0, "bengoltiger", "height", "x"]
}, {
  canonical: [0, "tiger", 0, "indiantiger", "foor"], 
  path:      [0, "tiger", 0, "indiantiger", "foor"]
}, {
  canonical: [0, "tiger", 0, "indiantiger", "paw"],
  path:      [0, "tiger", 0, "indiantiger", "paw"]
}, {
  canonical: [0, "tiger", 0, "bengoltiger", "width", "a"], 
  path:      [0, "tiger", 1, "bengoltiger", "width", "a"]
}, {
  canonical: [0, "tiger", 0, "indiantiger", "b"], 
  path:      [0, "tiger", 1, "indiantiger", "b"]
}, {
  canonical: [0, "lion"], 
  path:      [1, "lion"]
}, {
  canonical: [0, "tiger", 0, "bengoltiger", "width", "m"], 
  path:      [1, "tiger", 0, "bengoltiger", "width", "m"]
}, {
  canonical: [0, "tiger", 0, "bengoltiger", "height", "n"], 
  path:      [1, "tiger", 1, "bengoltiger", "height", "n"]
}]

Note that this function also removes duplicate canonical paths. We originally had both [0, "tiger", 0, "indiantiger", "foor"] and [1, "tiger", 0, "indiantiger", "foor"], but the output only contains the first one.

It does this by storing them in an object under a key created by joining the path together with the non-printable character \u0000. This was the easiest way to accomplish this task, but there is an extremely unlikely failure mode possible ¹ so if we really wanted we could do a more sophisticated duplicate checking. I wouldn't bother.

Finally, the main function, canonicalRep builds a representation out of your object by calling splitPaths and folding over the result, using canonical to say where to put the new data, and applying the path function to your path property and the original object.

Our final output, as requested, looks like this:

[
    {
        dog: "lmn",
        lion: 90,
        tiger: [
            {
                bengoltiger: {
                    height: {
                        n: 8,
                        x: 4
                    },
                    width: {
                        a: 8,
                        m: 3
                    }
                },
                indiantiger: {
                    b: 3,
                    foor: "b",
                    paw: "a"
                }
            }
        ]
    }
]

What's fascinating for me is that I saw this as an interesting programming challenge, although I couldn't really imagine any practical uses for it. But now that I've coded it, I realize it will solve a problem in my current project that I'd put aside a few weeks ago. I will probably implement this on Monday!

¹That failure mode could happen if you had certain nodes containing that separator, \u0000. For instance, if you had paths [...nodes, "abc\u0000", "def", ...nodes] and [...nodes, "abc", "\u0000def", ...nodes], they would both map to "...abc\u0000\u0000def...". If this is a real concern, we could certainly use other forms of deduplication.

回答2:

General logic using this new JNode IIFE with comments - ask someone more clever if you do not understand something as me ;-)

And level starts from 1 as there is no root object @start.

var json;
function DamnDemo() {
    json = DemoJSON();
    var it = new JNode(json), it2 = it;
    var levelKeys = []; /* A bit crazy structure:
    [
      levelN:{
               keyA:[JNode, JNode,...],
               keyB:[JNode, JNode,...],
               ...
             },
      levelM:...
    ]
    */
    do {
        var el = levelKeys[it.level]; // array of level say LevelN or undefined
        el = levelKeys[it.level] = el || {}; // set 2 empty it if does not exist
        el = el[it.key] = el[it.key] || []; // key array in say levelN
        el.push(it); // save current node indexing by level, key -> array
    } while (it = it.DepthFirst()) // traverse all nodes
    for(var l1 in levelKeys) { // let start simply by iterating levels
        l2(levelKeys[l1]);
    }
    console.log(JSON.stringify(json, null, 2));
}

function l2(arr) { // fun starts here...
    var len = 0, items = []; // size of arr, his items to simple Array
    for(var ln in arr) { // It's a kind of magic here ;-) Hate recursion, but who want to rewrite it ;-)
        if (arr[ln] instanceof JNode) return 1; // End of chain - our JNode for traverse of length 1
        len += l2(arr[ln]);
        items.push(arr[ln]);
    }
    if (len == 2) { // we care only about 2 items to move (getting even 3-5)
        //console.log(JSON.stringify(json));
        if (!isNaN(items[0][0].key) || (items[0][0].key == items[1][0].key)) { // key is number -> ignore || string -> must be same
            console.log("Keys 2B moved:", items[0][0].key, items[1][0].key, "/ level:", items[0][0].level);
            var src = items[1][0]; // 2nd similar JNode
            moveMissing(items[0][0].obj, src.obj); // move to 1st
            //console.log(JSON.stringify(json));
            if (src.level == 1) { // top level cleaning
                delete src.obj;
                delete json[src.key]; // remove array element
                if (!json[json.length-1]) json.length--; // fix length - hope it was last one (there are other options, but do not want to overcomplicate logic)
            } else {
                var parent = src.parent;
                var end = 0;
                for(var i in parent.obj) {
                    end++;
                    if (parent.obj[i] == src.obj) { // we found removed in parent's array
                        delete src.obj; // delete this empty object
                        delete parent.obj[i]; // and link on
                        end = 1; // stupid marker
                    }
                }
                if (end == 1 && parent.obj instanceof Array) parent.obj.length--; // fix length - now only when we are on last element
            }
        } else console.log("Keys left:", items[0][0].key, items[1][0].key, "/ level:", items[0][0].level); // keys did not match - do not screw it up, but report it
    }
    return len;
}

function moveMissing(dest, src) {
    for(var i in src) {
        if (src[i] instanceof Object) {
            if (!dest[i]) { // uff object, but not in dest
                dest[i] = src[i];
            } else { // copy object over object - let it bubble down...
                moveMissing(dest[i], src[i]);
            }
            delete src[i];
        } else { // we have value here, check if it does not exist, move and delete source
            if (!dest[i]) {
                dest[i] = src[i];
                delete src[i];
            }
        }
    }
}

// JSON_Node_Iterator_IIFE.js
'use strict';
var JNode = (function (jsNode) {

    function JNode(json, parent, pred, key, obj, fill) {
        var node, pred = null;
        if (parent === undefined) {
            parent = null;
        } else if (fill) {
            this.parent = parent;
            this.pred = pred;
            this.node = null;
            this.next = null;
            this.key = key;
            this.obj = obj;
            return this;
        }
        var current;
        var parse = (json instanceof Array);
        for (var child in json) {
            if (parse) child = parseInt(child);
            var sub = json[child];
            node = new JNode(null, parent, pred, child, sub, true);
            if (pred) {
                pred.next = node;
                node.pred = pred;
            }
            if (!current) current = node;
            pred = node;
        }
        return current;
    }

    JNode.prototype = {
        get hasNode() {
            if (this.node) return this.node;
            return (this.obj instanceof Object);
        },
        get hasOwnKey() { return this.key && (typeof this.key != "number"); },
        get level() {
            var level = 1, i = this;
            while(i = i.parent) level++;
            return level;
        },
        Down: function() {
            if (!this.node && this.obj instanceof Object) {
                this.node = new JNode(this.obj, this);
            }
            return this.node;
        },
        Stringify: function() { // Raw test stringify - #s are taken same as strings
            var res;
            if (typeof this.key == "number") {
                res = '[';
                var i = this;
                do {
                    if (i.node) res += i.node.Stringify();
                    else res += "undefined";
                    i = i.next;
                    if (i) res += ','
                } while(i);
                res += ']';
            } else {
                res = '{' + '"' + this.key + '":';
                res += (this.node?this.node.Stringify():this.hasNode?"undefined":'"'+this.obj+'"');
                var i = this;
                while (i = i.next) {
                    res += ',' + '"' + i.key + '":';
                    if (i.node) res += i.node.Stringify();
                    else {
                        if (i.obj instanceof Object) res += "undefined";
                        else res += '"' + i.obj + '"';
                    }
                };
                res += '}';
            }
            return res;
        },
        DepthFirst: function () {
            if (this == null) return 0; // exit sign
            if (this.node != null || this.obj instanceof Object) {
                return this.Down(); // moved down
            } else if (this.next != null) {
                return this.next;// moved right
            } else {
                var i = this;
                while (i != null) {
                    if (i.next != null) {
                        return i.next; // returned up & moved next
                    }
                    i = i.parent;
                }
            }
            return 0; // exit sign
        }
    }

    return JNode;
})();

// Fire test
DamnDemo();
function DemoJSON() {
    return [
    {
        "dog": "lmn",
        "tiger": [
            {
                "bengoltiger": {
                    "height": {
                        "x": 4
                    }
                },
                "indiantiger": {
                    "paw": "a",
                    "foor": "b"
                }
            },
            {
                "bengoltiger": {
                    "width": {
                        "a": 8
                    }
                },
                "indiantiger": {
                    "b": 3
                }
            }
        ]
    },
    {
        "dog": "pqr",
        "tiger": [
            {
                "bengoltiger": {
                    "width": {
                        "m": 3
                    }
                },
                "indiantiger": {
                    "paw": "a",
                    "foor": "b"
                }
            },
            {
                "bengoltiger": {
                    "height": {
                        "n": 8
                    }
                },
                "indiantiger": {
                    "b": 3
                }
            }
        ],
        "lion": 90
    }
]
;}

来源：https://stackoverflow.com/questions/58008420/how-to-obtain-a-master-structure-for-a-json-file

标签

javascript

json

recursion

tree