How to encode integers into other integers?

后端 未结 2 708
情书的邮戳
情书的邮戳 2021-01-15 08:48

As a follow up to Store 2 4-bit numbers in 1 8 bit number, I am wondering if there is a generalization to it where you can store n x-bit numbers into m y-bit numbers. For ex

相关标签:
2条回答
  • 2021-01-15 09:39

    The general case is "streaming", which works no matter how badly misaligned everything gets. As usual, it pays for generality by being less efficient. It basically works by dropping input into a buffer until at least once chunk of output can be extracted from it and then extracting all the output, so something like this:

    buffer = 0
    bbits = 0
    mask = (1 << outSize) - 1
    while more input:
        while bbits < outSize:
            buffer |= in() << bbits
            bbits += inSize
        while bbits >= outSize:
            out(buffer & mask)
            buffer >>= outSize
            bbits -= outSize
    if bbits != 0:
        out(buffer & mask)
    

    Encoding and decoding is conceptually the same, but with the sizes swapped. When specialized to specific sizes of input and output chunk, one of the inner loops will not be a loop. An other packing order could be used too, outputting the high bits of a chunk of input before the low bits, whichever you like.

    The size of the buffer must be at least outSize - 1 + inSize, to accommodate reading input after the maximum number of bits is left over after outputting from the buffer.

    The sizes can even be changed during the procedure.

    0 讨论(0)
  • 2021-01-15 09:42

    You can't store 5 8-bit numbers into 3 15-bit numbers because 45 bits of information obviously don't fit in 40 bits of memory. You can only do that if the total number of variations is smaller than or equal to 2k with k is the number of bits used for encoding

    If the width of every value is the same then here's my attempt, which stores the bits linearly in big endian. The encode function translates the bits in a byte array to another array that stores the full value in bitLength bits and the decode function does the reverse thing

    function encode(input, bitLength) {
      // size of each array element must be greater than bitLength
      var output = new Uint16Array(Math.ceil(input.length * 8 / bitLength));
      var remainingBits = bitLength; // the remaining bits left for the current value
    
      // example when bitLength = 11
      //       start of current value
      //       │          next value
      //       │2345678901│
      // ...┆  ↓    ┆     ↓ ┆       ┆       ┆       ┆...      ← input bytes
      // ...₀₁₂₃₄₅₆₇⁰¹²³⁴⁵⁶⁷₀₁₂₃₄₅₆₇⁰¹²³⁴⁵⁶⁷₀₁₂₃₄₅₆₇ ...      ← bit position
    
      for (var inIdx = 0, outIdx = 0; inIdx < input.length; inIdx++) {
        if (remainingBits > 8) {
          output[outIdx] = (output[outIdx] << 8) | input[inIdx];
          remainingBits -= 8;               // 8 less bits to read
        } else if (remainingBits == 8) {    // finish current value
          output[outIdx] = (output[outIdx] << 8) | input[inIdx];
          remainingBits = bitLength; // next byte is the start of the next output value
          outIdx++;
        } else {
          var nextRemainingBits = 8 - remainingBits;
          output[outIdx] = (output[outIdx] << remainingBits)
                         | (input[inIdx] >>> nextRemainingBits);
          // the leftover bits (nextRemainingBits) in the input byte
          // go into the next output
          output[++outIdx] = input[inIdx] & ((1 << nextRemainingBits) - 1);
          // adjust the number of remaining bits, after we've read
          // `8 - remainingBits` bits for the current output
          remainingBits = bitLength - nextRemainingBits;
        }
      }
      return output;
    }
    
    function decode(input, bitLength) {
        const numBits = input.BYTES_PER_ELEMENT*8;
      var output = new Uint8Array(Math.ceil(input.length * bitLength / 8));
      var remainingInputBits = bitLength; // the remaining bits left for the current value
      
      // shift value to the most significant position
      for (var i = 0; i < input.length; i++)
        input[i] <<= numBits - bitLength;
      
      for (var inIdx = 0, outIdx = 0; outIdx < output.length; outIdx++) {
        if (remainingInputBits > 8) {
          output[outIdx] = input[inIdx] >>> (numBits - 8);  // get the top byte from input
          input[inIdx] <<= 8;   // shift the read bits out, leaving next bits on top
          remainingInputBits -= 8;
        } else if (remainingInputBits == 8) {
          output[outIdx] = input[inIdx] >>> (numBits - 8);
          remainingInputBits = bitLength;
          inIdx++;
        } else {
          remainingInputBits = 8 - remainingInputBits;
          output[outIdx] = input[inIdx] >>> (numBits - 8);
          inIdx++;
          output[outIdx] |= input[inIdx] >>> (numBits - remainingInputBits);
          input[inIdx] <<= remainingInputBits;
          remainingInputBits = bitLength - remainingInputBits;
        }
      }
      return output;
    }
    
    function pad(s, size) {
      s = (s >>> 0).toString(2);
      while (s.length < (size || 2)) { s = "0" + s; }
      return s;
    }
    
    function printBinaryArray(arr, padLength) {
        var str = "";
        for (var i = 0; i < arr.length; i++)
            str += pad(arr[i], padLength) + " ";
        console.log(str);
    }
    
    var inputBytes = 22;
    var bitLength = 11; // each value is 11-bit long
    var input = new Uint8Array(inputBytes);
    
    window.crypto.getRandomValues(input);
    
    var encodedData = encode(input, bitLength);
    console.log("Input data", input);
    printBinaryArray(input, 8);
    console.log("Encoded data");
    // console.log(encodedData);
    printBinaryArray(encodedData, bitLength);
    
    var decodedData = decode(encodedData, bitLength);
    console.log("Decoded data", decodedData);
    printBinaryArray(decodedData, 8);
    
    for (var i = 0; i < input.length; i++)
        if (input[i] != decodedData[i])
            console.log("Wrong decoded data");
    console.log("Data decoded successfully");

    In fact the encoding and decoding procedures are just inverse of each other, so you can easily modify them to encode(input, inputBitWidth, outputBitWidth) that can be used for both encoding and decoding, just swap the input and output width

    However for odd-sized values it's often better to pack the high bits together for easier access. For example 10-bit pixel formats often pack 4 pixels into a 5-byte group, with the 8 high bits of each pixel in the first 4 bytes, and the last byte contains the 2 low bits for them

    See also

    • Extract 14-bit values from an array of bytes in C
    • anyone know 10-bit raw rgb? about omnivision
    • large array of 26-bit unsigned integers
    • packing 10 bit values into a byte stream with SIMD
    0 讨论(0)
提交回复
热议问题