Reading bytes from a JavaScript string

前端 未结 9 1298
悲哀的现实
悲哀的现实 2020-11-28 02:46

I have a string containing binary data in JavaScript. Now I want to read, for example, an integer from it. So I get the first 4 characters, use charCodeAt, do s

相关标签:
9条回答
  • 2020-11-28 03:18

    I believe that you can can do this with relatively simple bit operations:

    function stringToBytes ( str ) {
      var ch, st, re = [];
      for (var i = 0; i < str.length; i++ ) {
        ch = str.charCodeAt(i);  // get char 
        st = [];                 // set up "stack"
        do {
          st.push( ch & 0xFF );  // push byte to stack
          ch = ch >> 8;          // shift value down by 1 byte
        }  
        while ( ch );
        // add stack contents to result
        // done because chars have "wrong" endianness
        re = re.concat( st.reverse() );
      }
      // return an array of bytes
      return re;
    }
    
    stringToBytes( "A\u1242B\u4123C" );  // [65, 18, 66, 66, 65, 35, 67]
    

    It should be a simple matter to sum the output up by reading the byte array as if it were memory and adding it up into larger numbers:

    function getIntAt ( arr, offs ) {
      return (arr[offs+0] << 24) +
             (arr[offs+1] << 16) +
             (arr[offs+2] << 8) +
              arr[offs+3];
    }
    
    function getWordAt ( arr, offs ) {
      return (arr[offs+0] << 8) +
              arr[offs+1];
    }
    
    '\\u' + getWordAt( stringToBytes( "A\u1242" ), 1 ).toString(16);  // "1242"
    
    0 讨论(0)
  • 2020-11-28 03:22

    There are two methods for encoding and decoding utf-8 string to a byte array and back.

    var utf8 = {}
    
    utf8.toByteArray = function(str) {
        var byteArray = [];
        for (var i = 0; i < str.length; i++)
            if (str.charCodeAt(i) <= 0x7F)
                byteArray.push(str.charCodeAt(i));
            else {
                var h = encodeURIComponent(str.charAt(i)).substr(1).split('%');
                for (var j = 0; j < h.length; j++)
                    byteArray.push(parseInt(h[j], 16));
            }
        return byteArray;
    };
    
    utf8.parse = function(byteArray) {
        var str = '';
        for (var i = 0; i < byteArray.length; i++)
            str +=  byteArray[i] <= 0x7F?
                    byteArray[i] === 0x25 ? "%25" : // %
                    String.fromCharCode(byteArray[i]) :
                    "%" + byteArray[i].toString(16).toUpperCase();
        return decodeURIComponent(str);
    };
    
    // sample
    var str = "Да!";
    var ba = utf8.toByteArray(str);
    alert(ba);             // 208, 148, 208, 176, 33
    alert(ba.length);      // 5
    alert(utf8.parse(ba)); // Да!
    
    0 讨论(0)
  • 2020-11-28 03:28

    Borga's solution works perfectly. In case you want a more concrete implementation, you may want to have a look at the BinaryReader class from vjeux (which, for the records, is based on the binary-parser class from Jonas Raoni Soares Silva).

    0 讨论(0)
  • 2020-11-28 03:36

    Borgar's answer seems correct.

    Just wanted to clarify one point. Javascript treats bitwise operations as '32-bit signed int's, where the last (left-most) bit is the sign bit. Ie,

    getIntAt([0x7f,0,0,0],0).toString(16)  //  "7f000000"
    
    getIntAt([0x80,0,0,0],0).toString(16)  // "-80000000"
    

    However, for octet-data processing (eg, network stream, etc), usually want the 'unsigned int' representation. This can be accomplished by adding a '>>> 0' (zero-fill right-shift) operator which internally tells Javascript to treat this as unsigned.

    function getUIntAt ( arr, offs ) {
      return (arr[offs+0] << 24) +
             (arr[offs+1] << 16) +
             (arr[offs+2] << 8) +
              arr[offs+3] >>> 0;
    }
    
    getUIntAt([0x80,0,0,0],0).toString(16)   // "80000000"
    
    0 讨论(0)
  • 2020-11-28 03:36

    I'm going to assume for a second that your objective is to read arbitrary bytes from a string. My first suggestion would be to make your string representation a hexidecmal representation of the binary data.

    You can read the values using conversions to numbers from hex:

    var BITS_PER_BYTE = 8;
    
    function readBytes(hexString, numBytes) {
        return Number( parseInt( hexString.substr(0, numBytes * (BITS_PER_BYTE/4) ),16 ) );
    }
    
    function removeBytes(hexString, numBytes) {
        return hexString.substr( numBytes * (BITS_PER_BYTE/BITS_PER_CHAR) );
    }
    

    The functions can then be used to read whatever you want:

    var hex = '4ef2c3382fd';
    alert( 'We had: ' + hex );
    
    var intVal = readBytes(hex,2);
    alert( 'Two bytes: ' + intVal.toString(2) );
    
    hex = removeBytes(hex,2);
    alert( 'Now we have: ' + hex );
    

    You can then interpret the byte string however you want.

    Hope this helps! Cheers!

    0 讨论(0)
  • 2020-11-28 03:37

    How did you get the binary data into the string in the first place? How the binary data gets encoded into a string is an IMPORTANT consideration, and you need an answer to that question before you can proceed.

    One way I know of to get binary data into a string, is to use the XHR object, and set it to expect UTF-16.

    Once it's in utf-16, you can retrieve 16-bit numbers from the string using "....".charCodeAt(0)

    which will be a number between 0 and 65535

    Then, if you like, you can convert that number into two numbers between 0 and 255 like this:

    var leftByte = mynumber>>>8;
    var rightByte = mynumber&255;
    
    0 讨论(0)
提交回复
热议问题