Calculating median - javascript

对着背影说爱祢 提交于 2019-12-06 17:19:23

问题


I've been trying to calculate median but still I've got some mathematical issues I guess as I couldn't get the correct median value and couldn't figure out why. Here's the code;

class StatsCollector {

    constructor() {
        this.inputNumber = 0;
        this.average = 0;

        this.timeout = 19000;

        this.frequencies = new Map();
        for (let i of Array(this.timeout).keys()) {
            this.frequencies.set(i, 0);
        }
    }

    pushValue(responseTimeMs) {
        let req = responseTimeMs;
        if (req > this.timeout) {
            req = this.timeout;
        }

        this.average = (this.average * this.inputNumber + req) / (this.inputNumber + 1);

        console.log(responseTimeMs / 1000)
        let groupIndex = Math.floor(responseTimeMs / 1000);
        this.frequencies.set(groupIndex, this.frequencies.get(groupIndex) + 1);

        this.inputNumber += 1;
    }

    getMedian() {
        let medianElement = 0;
        if (this.inputNumber <= 0) {
            return 0;
        }
        if (this.inputNumber == 1) {
            return this.average
        }
        if (this.inputNumber == 2) {
            return this.average
        }
        if (this.inputNumber > 2) {
            medianElement = this.inputNumber / 2;
        }

        let minCumulativeFreq = 0;
        let maxCumulativeFreq = 0;
        let cumulativeFreq = 0;
        let freqGroup = 0;
        for (let i of Array(20).keys()) {
            if (medianElement <= cumulativeFreq + this.frequencies.get(i)) {
                minCumulativeFreq = cumulativeFreq;
                maxCumulativeFreq = cumulativeFreq + this.frequencies.get(i);
                freqGroup = i;
                break;
            }
            cumulativeFreq += this.frequencies.get(i);
        }

        return (((medianElement - minCumulativeFreq) / (maxCumulativeFreq - minCumulativeFreq)) + (freqGroup)) * 1000;
    }

    getAverage() {
        return this.average;
    }

}

Here's the snapshot of the results when I enter the values of

342,654,987,1093,2234,6243,7087,20123

The correct result should be;

Median: 1663.5


回答1:


Change your median method to this:

function median(values){
  if(values.length ===0) return 0;

  values.sort(function(a,b){
    return a-b;
  });

  var half = Math.floor(values.length / 2);

  if (values.length % 2)
    return values[half];

  return (values[half - 1] + values[half]) / 2.0;
}

fiddle




回答2:


The solutions above - sort then find middle - are fine, but slow on large data sets. Sorting the data first has a complexity of n x log(n).

There is a faster median algorithm, which consists in segregating the array in two according to a pivot, then looking for the median in the larger set. Here is some javascript code, but here is a more detailed explanation

// Trying some array
alert(quickselect_median([7,3,5])); // 2300,5,4,0,123,2,76,768,28]));

function quickselect_median(arr) {
   const L = arr.length, halfL = L/2;
   if (L % 2 == 1)
      return quickselect(arr, halfL);
   else
      return 0.5 * (quickselect(arr, halfL - 1) + quickselect(arr, halfL));
}

function quickselect(arr, k) {
   // Select the kth element in arr
   // arr: List of numerics
   // k: Index
   // return: The kth element (in numerical order) of arr
   if (arr.length == 1)
      return arr[0];
   else {
      const pivot = arr[0];
      const lows = arr.filter((e)=>(e<pivot));
      const highs = arr.filter((e)=>(e>pivot));
      const pivots = arr.filter((e)=>(e==pivot));
      if (k < lows.length) // the pivot is too high
         return quickselect(lows, k);
      else if (k < lows.length + pivots.length)// We got lucky and guessed the median
         return pivot;
      else // the pivot is too low
         return quickselect(highs, k - lows.length - pivots.length);
   }
}

Astute readers will notice a few things:

  1. I simply transliterated Russel Cohen's Python solution into JS, so all kudos to him.
  2. There are several small optimisations worth doing, but there's parallelisation worth doing, and the code as is is easier to change in either a quicker single-threaded, or quicker multi-threaded, version.
  3. This is the average linear time algorithm, there is more efficient a deterministic linear time version, see Russel's post for details, including performance data.

ADDITION 19 Sept. 2019:

One comment asks whether this is worth doing in javascript. I ran the code in JSPerf and it gives interesting results.

  • if the array has an odd number of elements (one figure to find), sorting is 20% slower that this "fast median" proposition.

  • if there is an even number of elements, the "fast" algorithm is 40% slower, because it filters through the data twice, to find elements number k and k+1 to average. It is possible to write a version of fast median that doesn't do this.

The test used rather small arrays (29 elements in the jsperf test). The effect appears to be more pronounced as arrays get larger. A more general point to make is: it shows these kinds of optimisations are worth doing in Javascript. An awful lot of computation is done in JS, including with large amounts of data (think of dashboards, spreadsheets, data visualisations), and in systems with limited resources (think of mobile and embedded computing).




回答3:


`

var arr = {  
  max: function(array) {
    return Math.max.apply(null, array);
  },

  min: function(array) {
    return Math.min.apply(null, array);
  },

  range: function(array) {
    return arr.max(array) - arr.min(array);
  },

  midrange: function(array) {
    return arr.range(array) / 2;
  },

  sum: function(array) {
    var num = 0;
    for (var i = 0, l = array.length; i < l; i++) num += array[i];
    return num;
  },

  mean: function(array) {
    return arr.sum(array) / array.length;
  },

  median: function(array) {
    array.sort(function(a, b) {
      return a - b;
    });
    var mid = array.length / 2;
    return mid % 1 ? array[mid - 0.5] : (array[mid - 1] + array[mid]) / 2;
  },

  modes: function(array) {
    if (!array.length) return [];
    var modeMap = {},
      maxCount = 1,
      modes = [array[0]];

    array.forEach(function(val) {
      if (!modeMap[val]) modeMap[val] = 1;
      else modeMap[val]++;

      if (modeMap[val] > maxCount) {
        modes = [val];
        maxCount = modeMap[val];
      }
      else if (modeMap[val] === maxCount) {
        modes.push(val);
        maxCount = modeMap[val];
      }
    });
    return modes;
  },

  variance: function(array) {
    var mean = arr.mean(array);
    return arr.mean(array.map(function(num) {
      return Math.pow(num - mean, 2);
    }));
  },

  standardDeviation: function(array) {
    return Math.sqrt(arr.variance(array));
  },

  meanAbsoluteDeviation: function(array) {
    var mean = arr.mean(array);
    return arr.mean(array.map(function(num) {
      return Math.abs(num - mean);
    }));
  },

  zScores: function(array) {
    var mean = arr.mean(array);
    var standardDeviation = arr.standardDeviation(array);
    return array.map(function(num) {
      return (num - mean) / standardDeviation;
    });
  }
};

`




回答4:


Here's another solution:

function median(numbers) {
    const sorted = numbers.slice().sort((a, b) => a - b);
    const middle = Math.floor(sorted.length / 2);

    if (sorted.length % 2 === 0) {
        return (sorted[middle - 1] + sorted[middle]) / 2;
    }

    return sorted[middle];
}

console.log(median([4, 5, 7, 1, 33]));



回答5:


For better performance in terms of time complexity, use MaxHeap - MinHeap to find the median of stream of array.



来源:https://stackoverflow.com/questions/45309447/calculating-median-javascript

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!