Fastest way to flatten / un-flatten nested JSON objects

前端 未结 13 1235
孤城傲影
孤城傲影 2020-11-21 20:32

I threw some code together to flatten and un-flatten complex/nested JSON objects. It works, but it\'s a bit slow (triggers the \'long script\' warning).

For the flat

相关标签:
13条回答
  • 2020-11-21 21:20

    I wrote two functions to flatten and unflatten a JSON object.


    Flatten a JSON object:

    var flatten = (function (isArray, wrapped) {
        return function (table) {
            return reduce("", {}, table);
        };
    
        function reduce(path, accumulator, table) {
            if (isArray(table)) {
                var length = table.length;
    
                if (length) {
                    var index = 0;
    
                    while (index < length) {
                        var property = path + "[" + index + "]", item = table[index++];
                        if (wrapped(item) !== item) accumulator[property] = item;
                        else reduce(property, accumulator, item);
                    }
                } else accumulator[path] = table;
            } else {
                var empty = true;
    
                if (path) {
                    for (var property in table) {
                        var item = table[property], property = path + "." + property, empty = false;
                        if (wrapped(item) !== item) accumulator[property] = item;
                        else reduce(property, accumulator, item);
                    }
                } else {
                    for (var property in table) {
                        var item = table[property], empty = false;
                        if (wrapped(item) !== item) accumulator[property] = item;
                        else reduce(property, accumulator, item);
                    }
                }
    
                if (empty) accumulator[path] = table;
            }
    
            return accumulator;
        }
    }(Array.isArray, Object));
    

    Performance:

    1. It's faster than the current solution in Opera. The current solution is 26% slower in Opera.
    2. It's faster than the current solution in Firefox. The current solution is 9% slower in Firefox.
    3. It's faster than the current solution in Chrome. The current solution is 29% slower in Chrome.

    Unflatten a JSON object:

    function unflatten(table) {
        var result = {};
    
        for (var path in table) {
            var cursor = result, length = path.length, property = "", index = 0;
    
            while (index < length) {
                var char = path.charAt(index);
    
                if (char === "[") {
                    var start = index + 1,
                        end = path.indexOf("]", start),
                        cursor = cursor[property] = cursor[property] || [],
                        property = path.slice(start, end),
                        index = end + 1;
                } else {
                    var cursor = cursor[property] = cursor[property] || {},
                        start = char === "." ? index + 1 : index,
                        bracket = path.indexOf("[", start),
                        dot = path.indexOf(".", start);
    
                    if (bracket < 0 && dot < 0) var end = index = length;
                    else if (bracket < 0) var end = index = dot;
                    else if (dot < 0) var end = index = bracket;
                    else var end = index = bracket < dot ? bracket : dot;
    
                    var property = path.slice(start, end);
                }
            }
    
            cursor[property] = table[path];
        }
    
        return result[""];
    }
    

    Performance:

    1. It's faster than the current solution in Opera. The current solution is 5% slower in Opera.
    2. It's slower than the current solution in Firefox. My solution is 26% slower in Firefox.
    3. It's slower than the current solution in Chrome. My solution is 6% slower in Chrome.

    Flatten and unflatten a JSON object:

    Overall my solution performs either equally well or even better than the current solution.

    Performance:

    1. It's faster than the current solution in Opera. The current solution is 21% slower in Opera.
    2. It's as fast as the current solution in Firefox.
    3. It's faster than the current solution in Firefox. The current solution is 20% slower in Chrome.

    Output format:

    A flattened object uses the dot notation for object properties and the bracket notation for array indices:

    1. {foo:{bar:false}} => {"foo.bar":false}
    2. {a:[{b:["c","d"]}]} => {"a[0].b[0]":"c","a[0].b[1]":"d"}
    3. [1,[2,[3,4],5],6] => {"[0]":1,"[1][0]":2,"[1][1][0]":3,"[1][1][1]":4,"[1][2]":5,"[2]":6}

    In my opinion this format is better than only using the dot notation:

    1. {foo:{bar:false}} => {"foo.bar":false}
    2. {a:[{b:["c","d"]}]} => {"a.0.b.0":"c","a.0.b.1":"d"}
    3. [1,[2,[3,4],5],6] => {"0":1,"1.0":2,"1.1.0":3,"1.1.1":4,"1.2":5,"2":6}

    Advantages:

    1. Flattening an object is faster than the current solution.
    2. Flattening and unflattening an object is as fast as or faster than the current solution.
    3. Flattened objects use both the dot notation and the bracket notation for readability.

    Disadvantages:

    1. Unflattening an object is slower than the current solution in most (but not all) cases.

    The current JSFiddle demo gave the following values as output:

    Nested : 132175 : 63
    Flattened : 132175 : 564
    Nested : 132175 : 54
    Flattened : 132175 : 508
    

    My updated JSFiddle demo gave the following values as output:

    Nested : 132175 : 59
    Flattened : 132175 : 514
    Nested : 132175 : 60
    Flattened : 132175 : 451
    

    I'm not really sure what that means, so I'll stick with the jsPerf results. After all jsPerf is a performance benchmarking utility. JSFiddle is not.

    0 讨论(0)
  • 2020-11-21 21:20

    I added +/- 10-15% efficiency to the selected answer by minor code refactoring and moving the recursive function outside of the function namespace.

    See my question: Are namespaced functions reevaluated on every call? for why this slows nested functions down.

    function _flatten (target, obj, path) {
      var i, empty;
      if (obj.constructor === Object) {
        empty = true;
        for (i in obj) {
          empty = false;
          _flatten(target, obj[i], path ? path + '.' + i : i);
        }
        if (empty && path) {
          target[path] = {};
        }
      } 
      else if (obj.constructor === Array) {
        i = obj.length;
        if (i > 0) {
          while (i--) {
            _flatten(target, obj[i], path + '[' + i + ']');
          }
        } else {
          target[path] = [];
        }
      }
      else {
        target[path] = obj;
      }
    }
    
    function flatten (data) {
      var result = {};
      _flatten(result, data, null);
      return result;
    }
    

    See benchmark.

    0 讨论(0)
  • 2020-11-21 21:25

    Here's my much shorter implementation:

    Object.unflatten = function(data) {
        "use strict";
        if (Object(data) !== data || Array.isArray(data))
            return data;
        var regex = /\.?([^.\[\]]+)|\[(\d+)\]/g,
            resultholder = {};
        for (var p in data) {
            var cur = resultholder,
                prop = "",
                m;
            while (m = regex.exec(p)) {
                cur = cur[prop] || (cur[prop] = (m[2] ? [] : {}));
                prop = m[2] || m[1];
            }
            cur[prop] = data[p];
        }
        return resultholder[""] || resultholder;
    };
    

    flatten hasn't changed much (and I'm not sure whether you really need those isEmpty cases):

    Object.flatten = function(data) {
        var result = {};
        function recurse (cur, prop) {
            if (Object(cur) !== cur) {
                result[prop] = cur;
            } else if (Array.isArray(cur)) {
                 for(var i=0, l=cur.length; i<l; i++)
                     recurse(cur[i], prop + "[" + i + "]");
                if (l == 0)
                    result[prop] = [];
            } else {
                var isEmpty = true;
                for (var p in cur) {
                    isEmpty = false;
                    recurse(cur[p], prop ? prop+"."+p : p);
                }
                if (isEmpty && prop)
                    result[prop] = {};
            }
        }
        recurse(data, "");
        return result;
    }
    

    Together, they run your benchmark in about the half of the time (Opera 12.16: ~900ms instead of ~ 1900ms, Chrome 29: ~800ms instead of ~1600ms).

    Note: This and most other solutions answered here focus on speed and are susceptible to prototype pollution and shold not be used on untrusted objects.

    0 讨论(0)
  • 2020-11-21 21:25

    Here's another approach that runs slower (about 1000ms) than the above answer, but has an interesting idea :-)

    Instead of iterating through each property chain, it just picks the last property and uses a look-up-table for the rest to store the intermediate results. This look-up-table will be iterated until there are no property chains left and all values reside on uncocatenated properties.

    JSON.unflatten = function(data) {
        "use strict";
        if (Object(data) !== data || Array.isArray(data))
            return data;
        var regex = /\.?([^.\[\]]+)$|\[(\d+)\]$/,
            props = Object.keys(data),
            result, p;
        while(p = props.shift()) {
            var m = regex.exec(p),
                target;
            if (m.index) {
                var rest = p.slice(0, m.index);
                if (!(rest in data)) {
                    data[rest] = m[2] ? [] : {};
                    props.push(rest);
                }
                target = data[rest];
            } else {
                target = result || (result = (m[2] ? [] : {}));
            }
            target[m[2] || m[1]] = data[p];
        }
        return result;
    };
    

    It currently uses the data input parameter for the table, and puts lots of properties on it - a non-destructive version should be possible as well. Maybe a clever lastIndexOf usage performs better than the regex (depends on the regex engine).

    See it in action here.

    0 讨论(0)
  • 2020-11-21 21:27

    3 ½ Years later...

    For my own project I wanted to flatten JSON objects in mongoDB dot notation and came up with a simple solution:

    /**
     * Recursively flattens a JSON object using dot notation.
     *
     * NOTE: input must be an object as described by JSON spec. Arbitrary
     * JS objects (e.g. {a: () => 42}) may result in unexpected output.
     * MOREOVER, it removes keys with empty objects/arrays as value (see
     * examples bellow).
     *
     * @example
     * // returns {a:1, 'b.0.c': 2, 'b.0.d.e': 3, 'b.1': 4}
     * flatten({a: 1, b: [{c: 2, d: {e: 3}}, 4]})
     * // returns {a:1, 'b.0.c': 2, 'b.0.d.e.0': true, 'b.0.d.e.1': false, 'b.0.d.e.2.f': 1}
     * flatten({a: 1, b: [{c: 2, d: {e: [true, false, {f: 1}]}}]})
     * // return {a: 1}
     * flatten({a: 1, b: [], c: {}})
     *
     * @param obj item to be flattened
     * @param {Array.string} [prefix=[]] chain of prefix joined with a dot and prepended to key
     * @param {Object} [current={}] result of flatten during the recursion
     *
     * @see https://docs.mongodb.com/manual/core/document/#dot-notation
     */
    function flatten (obj, prefix, current) {
      prefix = prefix || []
      current = current || {}
    
      // Remember kids, null is also an object!
      if (typeof (obj) === 'object' && obj !== null) {
        Object.keys(obj).forEach(key => {
          this.flatten(obj[key], prefix.concat(key), current)
        })
      } else {
        current[prefix.join('.')] = obj
      }
    
      return current
    }
    

    Features and/or caveats

    • It only accepts JSON objects. So if you pass something like {a: () => {}} you might not get what you wanted!
    • It removes empty arrays and objects. So this {a: {}, b: []} is flattened to {}.
    0 讨论(0)
  • 2020-11-21 21:29

    You can use https://github.com/hughsk/flat

    Take a nested Javascript object and flatten it, or unflatten an object with delimited keys.

    Example from the doc

    var flatten = require('flat')
    
    flatten({
        key1: {
            keyA: 'valueI'
        },
        key2: {
            keyB: 'valueII'
        },
        key3: { a: { b: { c: 2 } } }
    })
    
    // {
    //   'key1.keyA': 'valueI',
    //   'key2.keyB': 'valueII',
    //   'key3.a.b.c': 2
    // }
    
    
    var unflatten = require('flat').unflatten
    
    unflatten({
        'three.levels.deep': 42,
        'three.levels': {
            nested: true
        }
    })
    
    // {
    //     three: {
    //         levels: {
    //             deep: 42,
    //             nested: true
    //         }
    //     }
    // }
    
    0 讨论(0)
提交回复
热议问题