How to encode integers into other integers?

我的未来我决定 提交于 2019-12-01 07:36:37

问题


As a follow up to Store 2 4-bit numbers in 1 8 bit number, I am wondering if there is a generalization to it where you can store n x-bit numbers into m y-bit numbers. For example, maybe you can store 5 8-bit numbers into 3 15-bit numbers. Or maybe 2 8-bit numbers into 1 16-bit number, or 3 16-bit numbers into 2 32-bit numbers. Wondering what the implementation would be for encoding and decoding for a procedure that did this, or if it's not possible.

Something like:

function encode(i, s1, n, s2) {
  // i = array of input bytes
  // s1 = size of input bytes
  // n = number of output bytes
  // s2 = size of output bytes
}

function decode(i, s1, n, s2) {

}

Based on the answer below, I tried translating it to JavaScript but don't understand what anything really means and don't think it works.

function encode(input, inputSize, outputSize, callback) {
  var buffer = 0
  var bbits = 0
  var mask = (1 << outputSize) - 1
  while (bbits < outputSize) {
    buffer |= (input << bbits)
    bbits += inputSize
  }
  while (bbits >= outputSize) {
    callback(buffer & mask)
    buffer >>= outputSize
    bbits -= outputSize
  }
}

回答1:


You can't store 5 8-bit numbers into 3 15-bit numbers because 45 bits of information obviously don't fit in 40 bits of memory. You can only do that if the total number of variations is smaller than or equal to 2k with k is the number of bits used for encoding

If the width of every value is the same then here's my attempt, which stores the bits linearly in big endian. The encode function translates the bits in a byte array to another array that stores the full value in bitLength bits and the decode function does the reverse thing

function encode(input, bitLength) {
  // size of each array element must be greater than bitLength
  var output = new Uint16Array(Math.ceil(input.length * 8 / bitLength));
  var remainingBits = bitLength; // the remaining bits left for the current value

  // example when bitLength = 11
  //       start of current value
  //       │          next value
  //       │2345678901│
  // ...┆  ↓    ┆     ↓ ┆       ┆       ┆       ┆...      ← input bytes
  // ...₀₁₂₃₄₅₆₇⁰¹²³⁴⁵⁶⁷₀₁₂₃₄₅₆₇⁰¹²³⁴⁵⁶⁷₀₁₂₃₄₅₆₇ ...      ← bit position

  for (var inIdx = 0, outIdx = 0; inIdx < input.length; inIdx++) {
    if (remainingBits > 8) {
      output[outIdx] = (output[outIdx] << 8) | input[inIdx];
      remainingBits -= 8;               // 8 less bits to read
    } else if (remainingBits == 8) {    // finish current value
      output[outIdx] = (output[outIdx] << 8) | input[inIdx];
      remainingBits = bitLength; // next byte is the start of the next output value
      outIdx++;
    } else {
      var nextRemainingBits = 8 - remainingBits;
      output[outIdx] = (output[outIdx] << remainingBits)
                     | (input[inIdx] >>> nextRemainingBits);
      // the leftover bits (nextRemainingBits) in the input byte
      // go into the next output
      output[++outIdx] = input[inIdx] & ((1 << nextRemainingBits) - 1);
      // adjust the number of remaining bits, after we've read
      // `8 - remainingBits` bits for the current output
      remainingBits = bitLength - nextRemainingBits;
    }
  }
  return output;
}

function decode(input, bitLength) {
    const numBits = input.BYTES_PER_ELEMENT*8;
  var output = new Uint8Array(Math.ceil(input.length * bitLength / 8));
  var remainingInputBits = bitLength; // the remaining bits left for the current value
  
  // shift value to the most significant position
  for (var i = 0; i < input.length; i++)
    input[i] <<= numBits - bitLength;
  
  for (var inIdx = 0, outIdx = 0; outIdx < output.length; outIdx++) {
    if (remainingInputBits > 8) {
      output[outIdx] = input[inIdx] >>> (numBits - 8);  // get the top byte from input
      input[inIdx] <<= 8;   // shift the read bits out, leaving next bits on top
      remainingInputBits -= 8;
    } else if (remainingInputBits == 8) {
      output[outIdx] = input[inIdx] >>> (numBits - 8);
      remainingInputBits = bitLength;
      inIdx++;
    } else {
      remainingInputBits = 8 - remainingInputBits;
      output[outIdx] = input[inIdx] >>> (numBits - 8);
      inIdx++;
      output[outIdx] |= input[inIdx] >>> (numBits - remainingInputBits);
      input[inIdx] <<= remainingInputBits;
      remainingInputBits = bitLength - remainingInputBits;
    }
  }
  return output;
}

function pad(s, size) {
  s = (s >>> 0).toString(2);
  while (s.length < (size || 2)) { s = "0" + s; }
  return s;
}

function printBinaryArray(arr, padLength) {
    var str = "";
    for (var i = 0; i < arr.length; i++)
        str += pad(arr[i], padLength) + " ";
    console.log(str);
}

var inputBytes = 22;
var bitLength = 11; // each value is 11-bit long
var input = new Uint8Array(inputBytes);

window.crypto.getRandomValues(input);

var encodedData = encode(input, bitLength);
console.log("Input data", input);
printBinaryArray(input, 8);
console.log("Encoded data");
// console.log(encodedData);
printBinaryArray(encodedData, bitLength);

var decodedData = decode(encodedData, bitLength);
console.log("Decoded data", decodedData);
printBinaryArray(decodedData, 8);

for (var i = 0; i < input.length; i++)
    if (input[i] != decodedData[i])
        console.log("Wrong decoded data");
console.log("Data decoded successfully");

In fact the encoding and decoding procedures are just inverse of each other, so you can easily modify them to encode(input, inputBitWidth, outputBitWidth) that can be used for both encoding and decoding, just swap the input and output width

However for odd-sized values it's often better to pack the high bits together for easier access. For example 10-bit pixel formats often pack 4 pixels into a 5-byte group, with the 8 high bits of each pixel in the first 4 bytes, and the last byte contains the 2 low bits for them

See also

  • Extract 14-bit values from an array of bytes in C
  • anyone know 10-bit raw rgb? about omnivision
  • large array of 26-bit unsigned integers
  • packing 10 bit values into a byte stream with SIMD



回答2:


The general case is "streaming", which works no matter how badly misaligned everything gets. As usual, it pays for generality by being less efficient. It basically works by dropping input into a buffer until at least once chunk of output can be extracted from it and then extracting all the output, so something like this:

buffer = 0
bbits = 0
mask = (1 << outSize) - 1
while more input:
    while bbits < outSize:
        buffer |= in() << bbits
        bbits += inSize
    while bbits >= outSize:
        out(buffer & mask)
        buffer >>= outSize
        bbits -= outSize
if bbits != 0:
    out(buffer & mask)

Encoding and decoding is conceptually the same, but with the sizes swapped. When specialized to specific sizes of input and output chunk, one of the inner loops will not be a loop. An other packing order could be used too, outputting the high bits of a chunk of input before the low bits, whichever you like.

The size of the buffer must be at least outSize - 1 + inSize, to accommodate reading input after the maximum number of bits is left over after outputting from the buffer.

The sizes can even be changed during the procedure.



来源:https://stackoverflow.com/questions/51227279/how-to-encode-integers-into-other-integers

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!