Algorithm to get changes between two arrays

后端 未结 8 1392
难免孤独
难免孤独 2021-02-04 05:43

I needed to create an algorithm which will (efficiently) take an old array and a new array and give me back the changes between the two (which items added, which removed). It ha

相关标签:
8条回答
  • 2021-02-04 06:09
    // I prefer to not sort the arrays
    
    Array.prototype.diff= function(ar){
        var a= [], i= 0, L= this.length,
        ar= ar.concat(), t1, t2;
        while(i<L){
            t1= this[i++];
            t2= ar.indexOf(t1);
            if(t2!= -1){
                ar.splice(t2, 1);
            }
            else a.push(t1);
        }
        return a;
    }
    
    Array.prototype.compare= function(a2){
        return{
            r: this.diff(a2), a: a2.diff(this)
        }
    }
    
    /* 
    test
    
    var a1= [-1, 2, 3, 3, 4, 5], a2= [0, 2, 4, 3, 5, 6, 8];
    var o= a1.compare(a2);
    
    alert('added: '+o.a+'\nremoved: '+o.r);
    
    
    returned:
    
    added: 0, 6, 8
    removed: -1, 3
    
    */
    
    
    // oldbrowser code:
    
    if(![].indexOf){
        Array.prototype.indexOf= function(what, i){
            i= i || 0;
            var L= this.length;
            while(i< L){
                if(this[i]=== what) return i;
                ++i;
            }
            return -1;
        }
    }
    
    // Return common values instead of differences
    Array.prototype.incommon= function(ar){
        var a= [], i= 0, L= this.length, a2= ar.concat(), t1, t2;
        while(i<L && a2.length){
            t1= this[i++];
            t2= a2.indexOf(t1);
            if(t2 != -1){
                a.push(a2.splice(t2, 1));
            }
        }
        return a;
    }
    
    0 讨论(0)
  • 2021-02-04 06:17

    I created a speed test between two possible implementations.

    Source code:

    function diff1 (o, n) { 
      // deal with empty lists 
      if (o == undefined) o = []; 
      if (n == undefined) n = []; 
    
      // sort both arrays (or this won't work) 
      o.sort(); n.sort(); 
    
      // don't compare if either list is empty 
      if (o.length == 0 || n.length == 0) return {added: n, removed: o}; 
    
      // declare temporary variables 
      var op = 0; var np = 0; 
      var a = []; var r = []; 
    
      // compare arrays and add to add or remove lists 
      while (op < o.length && np < n.length) { 
          if (o[op] < n[np]) { 
              // push to diff? 
              r.push(o[op]); 
              op++; 
          } 
          else if (o[op] > n[np]) { 
              // push to diff? 
              a.push(n[np]); 
              np++; 
          } 
          else { 
              op++;np++; 
          } 
      } 
    
      // add remaining items 
      if( np < n.length ) 
        a = a.concat(n.slice(np, n.length)); 
      if( op < o.length ) 
        r = r.concat(o.slice(op, o.length)); 
    
      return {added: a, removed: r};  
    }
    
    function diff2 (o, n) {
            // convert array items to object members
        var objO = {}, objN = {};
        for (var i = 0; i < o.length; i++) {
            objO[o[i]] = 1;
        }
        for (var i = 0; i < n.length; i++) {
            objN[n[i]] = 1;
        }
    
        var a = []; var r = []; 
    
        for (var i in objO) {
            if (i in objN) {
                delete objN[i];
            }
            else {
                r.push (i);
            }
        }
        for (var i in objN) {
            a.push (i);
        }
        return {added: a, removed: r};
    }
    
    var o = [], n = [];
    for (var i = 0; i < 300000; i++) {
        if (i % 2 == 0) {
            o.push (i);
        }
        if (i % 3 == 0) {
            n.push (i);
        }
    }
    
    var start = new Date ();
    diff1 (o, n);
    var end1 = new Date ();
    diff2 (o, n);
    var end2 = new Date ();
    
    alert ((end1 - start) + ", " + (end2 - end1));
    

    The disadvantage of diff2 that the returned arrays (added, removed) are not sorted.

    Speed Test:

    IE7: diff1: 2578ms, diff2: 1906ms

    IE8: diff1: 1953ms, diff2: 1152ms

    Firefox: diff1: 254ms, diff2: 527ms

    Opera: diff1: 143ms, diff2: 253ms

    Safari: diff1: 466ms, diff2: 657ms

    Chrome: diff1: 734ms, diff2: 581ms

    Conclusion: diff1 is faster in Firefox, Opera and Safari, diff2 is faster in IE and Chrome.

    0 讨论(0)
  • 2021-02-04 06:17

    Your algorithm looks pretty good for having come up with it yourself! Congrats!
    Keep in mind thugh that while your algorithm reveals changes of the content of two arrays (item removal, etc), it does not reveal changes of content order (or removed items being added again later on).

    You could for example remove item 1 of array a and add it back in later on, technically changing array a from array b, however remaining unnoticed by your algorithm.

    array a: {1, 2, 3, 4, 5, 6}
    
    array b: {1, 2, 3, 4, 5, 6}
    
    array a: {2, 3, 4, 5, 6}    //after a.shift();
    array a: {2, 3, 4, 5, 6, 1} //after a.push(1);
    
    => a != b //your algorithm would return "a == b" though
    

    Your alorithm might be sufficient for your particular needs, however for a really strict array diff algorithm I would try to port a string diff algorithm. Basically changing the algorithm so instead of comparing chars/runs in a string it compares the items in your array.

    Javascript string diff algorithm (JS Code)

    0 讨论(0)
  • 2021-02-04 06:23

    There is no undefined constant. You should check the type of the variable instead:

    if (typeof o === 'undefined') o = [];
    

    Edit:

    As Tim Down showed, the property is actually defined in the standard, but as the standard doesn't define it to be constant, it's unreliable and should not be used.

    0 讨论(0)
  • 2021-02-04 06:27

    I think the implementation of the diff method is correct, the time complexity of your algorithm is O(n * log(n)) because of the sort methods. If you use arrays, you need to sort them before the comparison and the time complexity of sorting algorithms is at least O(n * log(n)).

    If the o and n arrays cannot contain the same value more than once, maybe the use of objects (associative arrays) instead of arrays is a better choice.

    Example:

    function diff(o, n) { 
        var a = {}; var r = {}; 
    
        for (var i in o) {
            if (!(i in n)) {
                r[i] = 1;
            }
        }
        for (var i in n) {
            if (!(i in o)) {
                a[i] = 1;
            }
        }
        return {added: a, removed: r};
    }
    
        // how to use
    var o = {"a":1, "b":1, "ab":1};
    var n = {"a":1, "aa":1};
    
    var diffon = diff (o, n);
    
        // display the results
    var added = "", removed = "";
    for (var i in diffon.added) {
        added += i + ", ";
    }
    for (var i in diffon.removed) {
        removed += i + ", ";
    }
    
    alert ("added: " + added);
    alert ("removed: " + removed);
    

    The time complexity in that case is O(n).

    If you need further details about arrays, associative arrays in JavaScript, I suggest you the following link: Array object

    0 讨论(0)
  • 2021-02-04 06:27

    I use this function:

    function diffArray(from, to){
        /*
        result will hold the transformations (in order) that need to 
        be done to make the from array equal to the to array
        */
        var result = [];
        var fromValue, fromIndex, fromCount, fromOffset;
        var toValue, toIndex, toCount, toMap;
    
        /*
        Buld an index for the two arrays to speed up the process. Do
        note that due to this optimization all values in the array will
        be transformed to strings. So the number 1 will be equal to the
        string '1'. Also all objects will converted to strings (via
        toString) and therefore probably considered equal.
        */
        toMap = to.reduce(function(result, value, index){
            if(value in result) result[value].push(index);
            else result[value] = [index];
            return result;
        }, {});
        toIndex = 0;
        toCount = to.length;
    
        fromOffset = 0;
        fromIndex = 0;
        fromCount = from.length;
    
        /*
        loop until we reach the end of one of the two arrays
        */
        while(fromIndex < fromCount && toIndex < toCount){
            fromValue = from[fromIndex];
            toValue = to[toIndex];
    
            /*
            when the two values are equal, no transformation is required.
            */
            if(fromValue === toValue){
                fromIndex++;
                toIndex++;
            }
            else{
                /*
                if fromValue is not in the remainder of the to array
                */
                // if(~to.indexOf(fromValue, toIndex)){ 
                if(
                    fromValue in toMap
                    && toMap[fromValue].some(function(value){
                        return toIndex <= value;
                    })
                ){
                    result.push({
                        type: 'insert'
                        , index: fromIndex + fromOffset
                        , value: toValue
                    });
                    toIndex++
                    fromOffset++;
                }
                else{
                    result.push({
                        type: 'delete'
                        , index: toIndex
                        , value: fromValue
                    });
                    fromIndex++
                    fromOffset--;
                }
    
            }
    
        }
    
        return result
        /*
        add the remainder of the from array to the result as deletions
        */
        .concat(
            from
            .slice(fromIndex)
            .map(function(value, index){
                var result = {
                    type: 'delete'
                    , index: index + fromIndex + fromOffset
                    , value: value
                };
                fromOffset--;
                return result;
            })
        )
        /*
        add the remainder of the to array to the result as insertions
        */
        .concat(
            to
            .slice(toIndex)
            .map(function(value, index){
                var result = {
                    type: 'insert'
                    , index: index + toIndex
                    , value: value
                };
                fromOffset++;
                return result;
            })
        )
        ;
    
    }//diffArray
    

    Check out the repository for updates and unit tests: https://github.com/elmerbulthuis/diff-array

    0 讨论(0)
提交回复
热议问题