1D -> 2D Array W/Normal Curve Sub-Array Lengths

问题

I am trying to break a 1D array into a 2D array where the sub-arrays are of varying lengths. This variance should follow the gaussian curve [or a mound shape]. So, say the 2D array variable we make is named gaussianCurve. The array within gaussianCurve[0] & gaussianCurve[n] would be of length 1, and gaussianCurve[n/2] would be a maximum provided by a parameter "maxArrayLength". This forces the number of gaussianCurve indexes to become variable.

Say I have the following psuedo-code:

function (oneDimentionalArray, maxArrayLength) {
// oneDimentionalArray is ["A","B","C","D","E","F","G","H","I","J","K"]
// maxArrayLength is 5
// Currently working like this (i.e. "batches"):
// return [["A","B","C","D","E"],["F","G","H","I","J"],["K"]]
// would LIKE it to work like this
    gaussianCurve = []
    gaussianCurve.push(["A"])
    gaussianCurve.push(["B", "C"])
    gaussianCurve.push(["D", "E", "F", "G", "H"])
    gaussianCurve.push(["I", "J"])
    gaussianCurve.push(["K"])

    return  gaussianCurve
}

Why would I want such a thing? Progress bars.

They don’t show I am making progress immediately
1. This is because the first job must complete before the bar can move
They slow down at 95%+ and sometimes even stick at 100%
1. Just annoying

Any suggestions are welcome. I am just not seeing the answer in my minds eye.

EDIT: I feel it was worded poorly, so I am rewording it.

...gaussianCurve[0].length & gaussianCurve[gaussianCurve.length - 1].length would be 1, and gaussianCurve[gaussianCurve.length/2].length would be up to "maxArrayLength".

INPUT:

function gaussianRefactor(["A","B","C","D","E","F","G","H","I","J","K"], 1)
function gaussianRefactor(["A","B","C","D","E","F","G","H","I","J","K"], 2)
function gaussianRefactor(["A","B","C","D","E","F","G","H","I","J","K"], 4)
function gaussianRefactor(["A","B","C","D","E","F","G","H","I","J","K"], 8)
function gaussianRefactor(["A","B","C","D","E","F","G","H","I","J","K"], 16)

OUTPUT:

[["A"],["B"],["C"],["D"],["E"],["F"],["G"],["H"],["I"],["J"],["K"]]
[["A"],["B","C"],["D","E"],["F","G"],["H","I"],["J"],["K"]]
[["A"],["B","C","D"],["E","F","G","H"],["I","J","K"]]
[["A"],["B","C","D","E","F","G","H","I"],["J","K"]]
[["A","B","C","D","E","F","G","H","I","J","K"]]

No inner array may exceed the length of maxArrayLength

回答1:

I gave it a quick shot and it seems to work. Some potential improvements:

Input checking for the functions
It places any possible leftover values into the middle bin. For even-numbered total bins it would benefit from some balancing. After that, it might be good to attempt to sort each bin based on the original index in the input data, since right now things can end up out of order. But if this is just to have non-linearly distributed jobs for a progress bar, the order may not matter.

function probability(s, m, x) {
	var eExp = -Math.pow(x - m, 2) /
		(2 * Math.pow(s, 2));
	return 1/(Math.sqrt(2*Math.PI) * s) *
		Math.pow(Math.E, eExp);
}

function gassianArray(input, nBins) {
	// first try to determine a reasonable value of s so that the outer bins have a value
	var s = 0.1;
	var sMax = 10;
	var m = (nBins - 1) / 2.0;
	var outerBinMinimum = 1 / input.length;
	var p = 0;
	while (true && s <= sMax) {
		p = probability(s, m, 0);
		if (p >= outerBinMinimum) {
			break;
		} else {
			s += 0.1;
		}
	}

	// holds arrays
	var output = [];
	// holds desired array sizes
	var outputLengths = [];
	// fill these based on probability density
	for (var b=0; b<nBins; b++) {
		var n = Math.floor(probability(s, m, b) * input.length);
		output.push([]);
		outputLengths.push(n);
	}

	// fill arrays from outside, leaving extra values for the middle
	var midIndex = Math.floor(m);
	// left side
	for (var i=0; i<midIndex; i++) {
		for (var j=0; j<outputLengths[i]; j++) {
			output[i].push(input.shift());
		}
	}
	// right side
	for (var i=nBins-1; i>=midIndex; i--) {
		for (var j=0; j<outputLengths[i]; j++) {
			output[i].push(input.pop());
		}
		output[i].reverse();
	}
	// whatever remains goes in the "middle"
	while (input.length !== 0) {
		output[midIndex].unshift(input.pop());
	}

	return output;
}

var input = ["A","B","C","D","E","F","G","H","I","J","K"];
var n = 5;
console.log(gassianArray(input, n));
/*
[ [ 'A' ],
  [ 'B', 'C' ],
  [ 'E', 'D', 'F', 'G', 'H' ],
  [ 'I', 'J' ],
  [ 'K' ] ]
*/


var input = ["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"];
var n = 6;
console.log(gassianArray(input, n));
/*
[ [ 'A' ],
  [ 'B', 'C', 'D', 'E' ],
  [ 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N' ],
  [ 'O', 'P', 'Q', 'R', 'S', 'T', 'U' ],
  [ 'V', 'W', 'X', 'Y' ],
  [ 'Z' ] ]
*/

回答2:

Very interesting challenge. :)

I have played a bit and here is what I came up with:

function chunk(arr, start, n) {
  if (arr.length < n) {
    return null;
  }

  return arr.splice(start, n);
}

function gaussianArray(arr, max) {
  const len = arr.length;

  if (max > len) {
    return [arr];
  }

  const curve = [];

  // Extract middle.
  const mid = Math.floor(len / 2);
  const startIndex = mid - (max / 2) + 1;
  const highest = arr.splice(startIndex, max);

  curve.push(highest);

  // Splits the rest in 2 arrays; left side and right side, middle already excluded.
  const leftArr = arr.slice(0, startIndex);
  const rightArr = arr.slice(startIndex, len);

  let leftMax = max;
  let rightMax = max;

  // Adds chunks from left side.
  while (leftArr.length) {
    const leftChunk = chunk(leftArr, leftArr.length - leftMax, leftMax);

    if (leftChunk) {
      curve.unshift(leftChunk);
    } else {
      leftMax--;
    }
  }

  // Adds chunks from right side.
  while (rightArr.length) {
    const rightChunk = chunk(rightArr, 0, rightMax);

    if (rightChunk) {
      curve.push(rightChunk);
    } else {
      rightMax--;
    }
  }

  return curve;
}

console.log(JSON.stringify(gaussianArray(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"], 1)));
console.log(JSON.stringify(gaussianArray(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"], 2)));
console.log(JSON.stringify(gaussianArray(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"], 4)));
console.log(JSON.stringify(gaussianArray(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"], 8)));
console.log(JSON.stringify(gaussianArray(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"], 16)));

It is not exactly what you want, but I think it should be close to solve your progress bar problem...

回答3:

This was more inline with what I was thinking. I greatly dislike the way I am finding sigma. I know I should just reorder the formula to calculate it, but I have yet to get that to work. Anyway, here is the, "answer" though it fails for the smaller arrays I provided as examples in the question, it successfully does what I needed to do. If anyone has improvements they'd like to add, just let me know.

var gaussianRefactor = function(srcOneDimentionalArray, srcMaxArrayLength) {
  var finalArray = [];
  if (srcOneDimentionalArray.length <= srcMaxArrayLength) {
    finalArray.push(srcOneDimentionalArray);
    return finalArray;
  }
  if (srcMaxArrayLength === 1) {
  for(var lengthOne = 0; lengthOne < srcOneDimentionalArray.length; lengthOne++)
    finalArray.push([srcOneDimentionalArray[lengthOne]]);
    return finalArray;
  }
  var maxArrayLength = srcMaxArrayLength;
  var oneDimentionalArray = srcOneDimentionalArray.slice(0);
  for (var x = srcMaxArrayLength; x > 1 && maxArrayLength / oneDimentionalArray.length > 0.3333; x--) {
    maxArrayLength--;
  }
  var standardChunkSize = srcOneDimentionalArray.length / maxArrayLength;
  var predictedSize = (3 * Math.floor(standardChunkSize)) % 2 === 0 ? 3 * Math.floor(standardChunkSize) + 1 : 3 * Math.floor(standardChunkSize);
  var predictedSizeCenter = Math.ceil(predictedSize / 2);
  var sigma = 0.2034185 * Math.pow(standardChunkSize, 1.963449);
  var multiplicand = 1 / (Math.sqrt(sigma) * Math.sqrt(2 * Math.PI));
  var centerGauss = maxArrayLength / multiplicand;
  var mu = 0;
  var delta;
  var fraction;
  var exponent;
  var full;
  var subArrayLength;
  var subArray;
  var notWideEnough = true;
  var maxElements;
  var maxAttempts = Math.max(Math.ceil(sigma), 100);
  var currentAttempts = 0;
  while (notWideEnough && currentAttempts < maxAttempts) {
    maxElements = 0;
    for (var j = 0; j < predictedSize; j++) {
      delta = (j - predictedSizeCenter) - mu;
      fraction = delta / Math.sqrt(sigma);
      exponent = -0.5 * Math.pow(fraction, 2);
      full = multiplicand * Math.exp(exponent);
      subArrayLength = Math.floor(full * centerGauss);
      maxElements += subArrayLength;
    }
    if (maxElements >= srcOneDimentionalArray.length) {
      notWideEnough = false;
    } else {
      sigma = sigma + sigma * 0.05;
    }
    currentAttempts++;
  }
  if (currentAttempts === maxAttempts) {
    return false;
  }

  for (var i = 0; i < predictedSize; i++) {
    delta = (i - predictedSizeCenter) - mu;
    fraction = delta / Math.sqrt(sigma);
    exponent = -0.5 * Math.pow(fraction, 2);
    full = multiplicand * Math.exp(exponent);
    subArrayLength = Math.floor(full * centerGauss);
    if (subArrayLength < 1 || oneDimentionalArray.length < 1) {
      continue;
    }
    subArray = oneDimentionalArray.slice(0, subArrayLength);
    oneDimentionalArray = oneDimentionalArray.slice(subArrayLength, oneDimentionalArray.length);
    finalArray.push(subArray);
  }
  return finalArray;
}

INPUT

gaussianRefactor(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"], 1)
gaussianRefactor(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"], 2)
gaussianRefactor(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"], 4)
gaussianRefactor(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"], 8)
gaussianRefactor(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"], 16)

OUTPUT

[["A"],["B"],["C"],["D"],["E"],["F"],["G"],["H"],["I"],["J"],["K"]]
[["A"],["B"],["C"],["D"],["E"],["F","G"],["H"],["I"],["J"],["K"]]
[["A"],["B"],["C","D"],["E","F","G"],["H","I"],["J"],["K"]]
[["A"],["B"],["C","D"],["E","F","G"],["H","I"],["J"],["K"]]
[["A","B","C","D","E","F","G","H","I","J","K"]]

来源：https://stackoverflow.com/questions/55943151/1d-2d-array-w-normal-curve-sub-array-lengths

标签

javascript

arrays

gaussian

splice

batching