Compare arrays as (multi-) sets

前端 未结 8 1659
梦如初夏
梦如初夏 2021-01-04 21:42

I\'m looking for an efficient way to find out whether two arrays contain same amounts of equal elements (in the == sense), in any order:

foo = {         


        
相关标签:
8条回答
  • 2021-01-04 21:49

    Thanks everyone for sharing ideas! I've came up with the following

    function sameElements(a, b) {
        var hash = function(x) {
            return typeof x + (typeof x == "object" ? a.indexOf(x) : x);
        }
        return a.map(hash).sort().join() == b.map(hash).sort().join();
    }
    

    This isn't the fastest solution, but IMO, most readable one so far.

    0 讨论(0)
  • 2021-01-04 21:50

    Like this perhaps?

    var foo = {}; var bar=[];
    var a = [3,2,1,foo]; var b = [foo,1,2,3];
    
    function comp(a,b)
    {
        // immediately discard if they are of different sizes
        if (a.length != b.length) return false;
    
        b = b.slice(0); // clone to keep original values after the function
    
        a.forEach(function(e) {
            var i;
            if ((i = b.indexOf(e)) != -1)
                b.splice(i, 1);
        });
    
        return !b.length;
    }
    
    comp(a,b);
    
    0 讨论(0)
  • 2021-01-04 21:55

    You can implement the following algorithm:

    • If a and b do not have the same length:
      • Return false.
    • Otherwise:
      • Clone b,
      • For each item in a:
        • If the item exists in our clone of b:
          • Remove the item from our clone of b,
        • Otherwise:
          • Return false.
      • Return true.

    With Javascript 1.6, you can use every() and indexOf() to write:

    function sameElements(a, b)
    {
        if (a.length != b.length) {
            return false;
        }
        var ourB = b.concat();
        return a.every(function(item) {
            var index = ourB.indexOf(item);
            if (index < 0) {
                return false;
            } else {
                ourB.splice(index, 1);
                return true;
            }
        });
    }
    

    Note this implementation does not completely fulfill your requirements because indexOf() uses strict equality (===) internally. If you really want non-strict equality (==), you will have to write an inner loop instead.

    0 讨论(0)
  • 2021-01-04 21:55

    i wasn't sure if "===" is ok, the question is a bit vauge... if so, this is quite a bit faster and simpler than some other possible ways of doing it:

    function isSame(a,b){
      return a.length==b.length && 
          a.filter(function(a){ return b.indexOf(a)!==-1 }).length == b.length;
    }
    
    0 讨论(0)
  • 2021-01-04 21:59

    Using efficient lookup tables for the counts of the elements:

    function sameElements(a) { // can compare any number of arrays
        var map, maps = [], // counting booleans, numbers and strings
            nulls = [], // counting undefined and null
            nans = [], // counting nans
            objs, counts, objects = [],
            al = arguments.length;
    
        // quick escapes:
        if (al < 2)
            return true;
        var l0 = a.length;
        if ([].slice.call(arguments).some(function(s) { return s.length != l0; }))
            return false;
    
        for (var i=0; i<al; i++) {
            var multiset = arguments[i];
            maps.push(map = {}); // better: Object.create(null);
            objects.push({vals: objs=[], count: counts=[]});
            nulls[i] = 0;
            nans[i] = 0;
            for (var j=0; j<l0; j++) {
                var val = multiset[j];
                if (val !== val)
                    nans[i]++;
                else if (val === null)
                    nulls[i]++;
                else if (Object(val) === val) { // non-primitive
                    var ind = objs.indexOf(val);
                    if (ind > -1)
                        counts[ind]++;
                    else
                        objs.push(val), counts.push(1);
                } else { // booleans, strings and numbers do compare together
                    if (typeof val == "boolean")
                        val = +val;
                    if (val in map)
                        map[val]++;
                    else
                        map[val] = 1;
                }
            }
        }
    
        // testing if nulls and nans are the same everywhere
        for (var i=1; i<al; i++)
            if (nulls[i] != nulls[0] || nans[i] != nans[0])
                return false;
    
        // testing if primitives were the same everywhere
        var map0 = maps[0];
        for (var el in map0)
            for (var i=1; i<al; i++) {
                if (map0[el] !== maps[i][el])
                    return false;
                delete maps[i][el];
            }
        for (var i=1; i<al; i++)
            for (var el in maps[i])
                return false;
    
        // testing if objects were the same everywhere
        var objs0 = objects[0].vals,
            ol = objs0.length;
            counts0 = objects[0].count;
        for (var i=1; i<al; i++)
            if (objects[i].count.length != ol)
                return false;
        for (var i=0; i<ol; i++)
            for (var j=1; j<al; j++)
                if (objects[j].count[ objects[j].vals.indexOf(objs0[i]) ] != counts0[i])
                    return false; 
    
        // else, the multisets are equal:
        return true;
    }
    

    It still uses indexOf search amongst all objects, so if you have multisets with many different objects you might want to optimize that part as well. Have a look at Unique ID or object signature (and it's duplicate questions) for how to get lookup table keys for them. And if you don't have many primitive values in the multisets, you might just store them in arrays and sort those before comparing each item-by-item (like @Bruno did).

    Disclaimer: This solution doesn't try to get the [[PrimitiveValue]] of objects, they will never be counted as equal to primitives (while == would do).

    Here is the update on @Bruno's jsperf test of the answers, yet I guess only two objects (each of them present 500 times in the 10k array) and no duplicate primitive values are not representative.

    0 讨论(0)
  • 2021-01-04 22:02

    UPDATE

    As @Bergi and @thg435 point out my previous implementation was flawed so here is another implementation:

    function sameElements(a, b) {
        var objs = [];
        // if length is not the same then must not be equal
        if (a.length != b.length) return false;
    
        // do an initial sort which will group types
        a.sort();
        b.sort();
    
        for ( var i = 0; i < a.length; i++ ) {
    
            var aIsPrimitive = isPrimitive(a[i]);
            var bIsPrimitive = isPrimitive(b[i]);
    
            // NaN will not equal itself
            if( a[i] !== a[i] ) {
                if( b[i] === b[i] ) {
                    return false;
                }
            }
            else if (aIsPrimitive && bIsPrimitive) {
    
                if( a[i] != b[i] ) return false;
            }
            // if not primitive increment the __count property
            else if (!aIsPrimitive && !bIsPrimitive) {
                incrementCountA(a[i]);
                incrementCountB(b[i]);
                // keep track on non-primitive objects
                objs.push(i);
            }
            // if both types are not the same then this array
            // contains different number of primitives
            else {
                return false;
            }
    
        }
    
        var result = true;
    
        for (var i = 0; i < objs.length; i++) {
            var ind = objs[i];
            // if __aCount and __bCount match then object exists same
            // number of times in both arrays
            if( a[ind].__aCount !== a[ind].__bCount ) result = false;
            if( b[ind].__aCount !== b[ind].__bCount ) result = false;
    
            // revert object to what it was 
            // before entering this function
            delete a[ind].__aCount;
            delete a[ind].__bCount;
            delete b[ind].__aCount;
            delete b[ind].__bCount;
        }
    
        return result;
    }
    
    // inspired by @Bergi's code
    function isPrimitive(arg) {
        return Object(arg) !== arg;
    }
    
    function incrementCountA(arg) {
        if (arg.hasOwnProperty("__aCount")) {
            arg.__aCount = arg.__aCount + 1;
        } else {
            Object.defineProperty(arg, "__aCount", {
                enumerable: false,
                value: 1,
                writable: true,
                configurable: true
            });
        }
    }
    function incrementCountB(arg) {
        if (arg.hasOwnProperty("__bCount")) {
            arg.__bCount = arg.__bCount + 1;
        } else {
            Object.defineProperty(arg, "__bCount", {
                enumerable: false,
                value: 1,
                writable: true,
                configurable: true
            });
        }
    }
    

    Then just call the function

    sameElements( ["NaN"], [NaN] ); // false
    
    // As "1" == 1 returns true
    sameElements( [1],["1"] ); // true
    
    sameElements( [1,2], [1,2,3] ); //false
    

    The above implement actually defines a new property called "__count" that is used to keep track of non-primitive elements in both arrays. These are deleted before the function returns so as to leave the array elements as before.

    Fiddle here

    jsperf here.

    The reason I changed the jsperf test case was that as @Bergi states the test arrays, especially the fact there were only 2 unique objects in the whole array is not representative of what we are testing for.

    One other advantage of this implementation is that if you need to make it compatible with pre IE9 browsers instead of using the defineProperty to create a non-enumerable property you could just use a normal property.

    0 讨论(0)
提交回复
热议问题