I have a string containing binary data in JavaScript. Now I want to read, for example, an integer from it. So I get the first 4 characters, use charCodeAt
, do s
I believe that you can can do this with relatively simple bit operations:
function stringToBytes ( str ) {
var ch, st, re = [];
for (var i = 0; i < str.length; i++ ) {
ch = str.charCodeAt(i); // get char
st = []; // set up "stack"
do {
st.push( ch & 0xFF ); // push byte to stack
ch = ch >> 8; // shift value down by 1 byte
}
while ( ch );
// add stack contents to result
// done because chars have "wrong" endianness
re = re.concat( st.reverse() );
}
// return an array of bytes
return re;
}
stringToBytes( "A\u1242B\u4123C" ); // [65, 18, 66, 66, 65, 35, 67]
It should be a simple matter to sum the output up by reading the byte array as if it were memory and adding it up into larger numbers:
function getIntAt ( arr, offs ) {
return (arr[offs+0] << 24) +
(arr[offs+1] << 16) +
(arr[offs+2] << 8) +
arr[offs+3];
}
function getWordAt ( arr, offs ) {
return (arr[offs+0] << 8) +
arr[offs+1];
}
'\\u' + getWordAt( stringToBytes( "A\u1242" ), 1 ).toString(16); // "1242"
There are two methods for encoding and decoding utf-8 string to a byte array and back.
var utf8 = {}
utf8.toByteArray = function(str) {
var byteArray = [];
for (var i = 0; i < str.length; i++)
if (str.charCodeAt(i) <= 0x7F)
byteArray.push(str.charCodeAt(i));
else {
var h = encodeURIComponent(str.charAt(i)).substr(1).split('%');
for (var j = 0; j < h.length; j++)
byteArray.push(parseInt(h[j], 16));
}
return byteArray;
};
utf8.parse = function(byteArray) {
var str = '';
for (var i = 0; i < byteArray.length; i++)
str += byteArray[i] <= 0x7F?
byteArray[i] === 0x25 ? "%25" : // %
String.fromCharCode(byteArray[i]) :
"%" + byteArray[i].toString(16).toUpperCase();
return decodeURIComponent(str);
};
// sample
var str = "Да!";
var ba = utf8.toByteArray(str);
alert(ba); // 208, 148, 208, 176, 33
alert(ba.length); // 5
alert(utf8.parse(ba)); // Да!
Borga's solution works perfectly. In case you want a more concrete implementation, you may want to have a look at the BinaryReader class from vjeux (which, for the records, is based on the binary-parser class from Jonas Raoni Soares Silva).
Borgar's answer seems correct.
Just wanted to clarify one point. Javascript treats bitwise operations as '32-bit signed int's, where the last (left-most) bit is the sign bit. Ie,
getIntAt([0x7f,0,0,0],0).toString(16) // "7f000000"
getIntAt([0x80,0,0,0],0).toString(16) // "-80000000"
However, for octet-data processing (eg, network stream, etc), usually want the 'unsigned int' representation. This can be accomplished by adding a '>>> 0' (zero-fill right-shift) operator which internally tells Javascript to treat this as unsigned.
function getUIntAt ( arr, offs ) {
return (arr[offs+0] << 24) +
(arr[offs+1] << 16) +
(arr[offs+2] << 8) +
arr[offs+3] >>> 0;
}
getUIntAt([0x80,0,0,0],0).toString(16) // "80000000"
I'm going to assume for a second that your objective is to read arbitrary bytes from a string. My first suggestion would be to make your string representation a hexidecmal representation of the binary data.
You can read the values using conversions to numbers from hex:
var BITS_PER_BYTE = 8;
function readBytes(hexString, numBytes) {
return Number( parseInt( hexString.substr(0, numBytes * (BITS_PER_BYTE/4) ),16 ) );
}
function removeBytes(hexString, numBytes) {
return hexString.substr( numBytes * (BITS_PER_BYTE/BITS_PER_CHAR) );
}
The functions can then be used to read whatever you want:
var hex = '4ef2c3382fd';
alert( 'We had: ' + hex );
var intVal = readBytes(hex,2);
alert( 'Two bytes: ' + intVal.toString(2) );
hex = removeBytes(hex,2);
alert( 'Now we have: ' + hex );
You can then interpret the byte string however you want.
Hope this helps! Cheers!
How did you get the binary data into the string in the first place? How the binary data gets encoded into a string is an IMPORTANT consideration, and you need an answer to that question before you can proceed.
One way I know of to get binary data into a string, is to use the XHR object, and set it to expect UTF-16.
Once it's in utf-16, you can retrieve 16-bit numbers from the string using "....".charCodeAt(0)
which will be a number between 0 and 65535
Then, if you like, you can convert that number into two numbers between 0 and 255 like this:
var leftByte = mynumber>>>8;
var rightByte = mynumber&255;