I am working on a program that needs to convert a 32-bit number into a decimal number.
The number that I get from input is a 32 bit number represented as floating point. The first bit is the sign, the next 8 bits are the exponent, and the other 23 bits are mantissa. I am working the program in C. In input, I get that number as a char[]
array, and after that I am making a new int[]
array where I store the sign , the exponent and the mantissa. But, I have problem with the mantissa when I am trying to store it in some datatype, because I need to use the mantissa as a number, not as an array: formula=sign*(1+0.mantissa)*2^(exponent-127)
.
Here is the code I use to store the mantissa, but still the program gets me wrong results:
double oMantissa=0;
int counter=0;
for(counter=0;counter<23;counter++)
{
if(mantissa[counter]==1)
{
oMantissa+=mantissa[counter]*pow(10,-counter);
}
}
mantissa[]
is an int
array where I have already converted the mantissa from a char
array. When I get the value from formula
, it has to be a binary number, and I have to convert it to decimal, so I will get the value of the number. Can you help me with storing the 23 bits of the mantissa? And, I mustn't use functions like strtoul
that convert the 32-bit number directly into binary. I have to use formula
.
Which part of the below code was hard to get right given all the formulas and sample numbers and a calculator?
#include <stdio.h>
#include <limits.h>
#if UINT_MAX >= 0xFFFFFFFF
typedef unsigned uint32;
#else
typedef unsigned long uint32;
#endif
#define C_ASSERT(expr) extern char CAssertExtern[(expr)?1:-1]
// Ensure uint32 is exactly 32-bit
C_ASSERT(sizeof(uint32) * CHAR_BIT == 32);
// Ensure float has the same number of bits as uint32, 32
C_ASSERT(sizeof(uint32) == sizeof(float));
double Ieee754SingleDigits2DoubleCheat(const char s[32])
{
uint32 v;
float f;
unsigned i;
char *p1 = (char*)&v, *p2 = (char*)&f;
// Collect binary digits into an integer variable
v = 0;
for (i = 0; i < 32; i++)
v = (v << 1) + (s[i] - '0');
// Copy the bits from the integer variable to a float variable
for (i = 0; i < sizeof(f); i++)
*p2++ = *p1++;
return f;
}
double Ieee754SingleDigits2DoubleNoCheat(const char s[32])
{
double f;
int sign, exp;
uint32 mant;
int i;
// Do you really need strto*() here?
sign = s[0] - '0';
// Do you really need strto*() or pow() here?
exp = 0;
for (i = 1; i <= 8; i++)
exp = exp * 2 + (s[i] - '0');
// Remove the exponent bias
exp -= 127;
// Should really check for +/-Infinity and NaNs here
if (exp > -127)
{
// Normal(ized) numbers
mant = 1; // The implicit "1."
// Account for "1." being in bit position 23 instead of bit position 0
exp -= 23;
}
else
{
// Subnormal numbers
mant = 0; // No implicit "1."
exp = -126; // See your IEEE-54 formulas
// Account for ".1" being in bit position 22 instead of bit position -1
exp -= 23;
}
// Or do you really need strto*() or pow() here?
for (i = 9; i <= 31; i++)
mant = mant * 2 + (s[i] - '0');
f = mant;
// Do you really need pow() here?
while (exp > 0)
f *= 2, exp--;
// Or here?
while (exp < 0)
f /= 2, exp++;
if (sign)
f = -f;
return f;
}
int main(void)
{
printf("%+g\n", Ieee754SingleDigits2DoubleCheat("110000101100010010000000000000000"));
printf("%+g\n", Ieee754SingleDigits2DoubleNoCheat("010000101100010010000000000000000"));
printf("%+g\n", Ieee754SingleDigits2DoubleCheat("000000000100000000000000000000000"));
printf("%+g\n", Ieee754SingleDigits2DoubleNoCheat("100000000100000000000000000000000"));
printf("%+g\n", Ieee754SingleDigits2DoubleCheat("000000000000000000000000000000000"));
printf("%+g\n", Ieee754SingleDigits2DoubleNoCheat("000000000000000000000000000000000"));
return 0;
}
Output (ideone):
-98.25
+98.25
+5.87747e-39
-5.87747e-39
+0
+0
Not too long ago, I had the opportunity to write a similar piece of code that you, and others, may find useful. It takes a character string representing a floating point number as the first argument to the program and converts the string to its IEEE-754 Single Precision Floating Point representation along with its equivalent integer value. Take a look and let me know if you have any questions.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <errno.h>
#if defined(__LP64__) || defined(_LP64)
# define BUILD_64 1
#endif
/* constants for word and double-word size */
#define WDSZ 64
#define DWSZ 128
inline int getmsb (unsigned long x);
char *fmt_binstr (unsigned long n, unsigned char sz, unsigned char szs, char sep);
char *binstr (unsigned long n);
char *fpfrc_bin (float fvalue);
void show_fltmem (float f);
void show_ieee754str (char *s);
void show_ieee754 (float f);
float xstrtof (char *str);
char *form_ieee754SPstr (int sign, char *exp, char *dec, char *frac);
int main (int argc, char** argv) {
if (argc < 2) {
fprintf (stderr, "error: insufficient input. Usage: %s float\n", argv[0]);
return 1;
}
char *dp = strchr (argv[1], '.'); /* pointer to decimal point */
int dec = atoi (argv[1]); /* integer of decimal part */
int frc = (dp) ? atoi (dp + 1) : 0; /* integer of fraction part */
/* output string input values */
printf ("\nString Values:\n");
printf (" string : %s\n whole : %d\n fraction : %d\n\n", argv[1], dec, frc);
float fvalue = xstrtof (argv[1]);
float ffrc = fvalue - dec;
int signbit = (fvalue >= 0) ? 0 : 1;
/* output float input values */
printf ("Float Values:\n");
printf (" decimal : %d\n fraction : %f\n\n", dec, ffrc);
char *fstring = fpfrc_bin (fvalue); /* fraction part in binary */
char *bs = binstr ((unsigned long) dec); /* decimal part in binary */
/* output binary values decimal part/fraction part */
printf ("Binary Values:\n");
printf (" decimal : %s\n fraction : %s\n sign bit : %d\n\n", bs, fstring, signbit);
/* quick hack of exp bias, biased value, conversion to binary */
int bias = (int) strlen (bs) - 1;
int biasexp = 127+bias;
char *binexp = binstr ((unsigned long) biasexp);
/* output summary of biased IEEE-754 exponent */
printf ("Normalization for biased exponent:\n");
printf ("\n %s.%s => %s.%s%s\n\n", bs, fstring, "1", (bs+1), fstring);
printf (" exponent bias: %d\n unbiased exponent: 127\n", bias);
printf (" __________________+____\n\n");
printf (" biased exponent: %3d\n binary exponent: %s\n\n", biasexp, binexp);
/* output summary of IEEE-754 mantissa */
printf ("Conversion to 'hidden bit' format to form mantissa:\n\n");
printf (" %s.%s%s => %s%s\n\n", "1", (bs+1), fstring, (bs+1), fstring);
/* form IEEE-754 binary representation from values computed */
char *ieee754str = form_ieee754SPstr (signbit, binexp, bs, fstring);
/* output formatted complete IEEE-754 binary - from computed values above */
printf ("IEEE-754 Single Precision Floating Point Representation (caclulated value)\n\n");
show_ieee754str (ieee754str);
/* output formatted complete IEEE-754 binary - from float value in memory */
printf ("IEEE-754 Single Precision Floating Point Representation (memory value)\n\n");
show_ieee754 (fvalue);
/* output float, binary and integer equivalent */
show_fltmem (fvalue);
if (bs) free (bs);
if (binexp) free (binexp);
if (ieee754str) free (ieee754str);
return 0;
}
/** single-precision float in memory
* output the float, equivalent unsigned int, and
* binary representation of the number in memory
*/
void show_fltmem (float f)
{
unsigned int i = *(unsigned int *)&f;
printf ("\nRepresentations of float value in memory:\n\n");
printf (" The float value entered : %f\n\n", f);
printf (" binary value in memory : %s\n\n", fmt_binstr (i, 32, 8, '-'));
printf (" bits as unsigned int : %u\n\n", i);
}
/** most significant bit.
* return the 0-based most significant bit for any
* unsigned value using the bit-scan-right assembly
* directive.
*/
inline int getmsb (unsigned long x)
{
#ifdef BUILD_64
asm ("bsrq %0, %0" : "=r" (x) : "0" (x));
#else
asm ("bsr %0, %0" : "=r" (x) : "0" (x));
#endif
return x;
}
/** returns pointer to formatted binary representation of 'n' zero padded to 'sz'.
* returns pointer to string contianing formatted binary representation of
* unsigned 64-bit (or less ) value zero padded to 'sz' digits with char
* 'sep' placed every 'szs' digits. (e.g. 10001010 -> 1000-1010).
*/
char *fmt_binstr (unsigned long n, unsigned char sz, unsigned char szs, char sep) {
static char s[DWSZ + 1] = {0};
char *p = s + DWSZ;
unsigned char i;
for (i = 0; i < sz; i++) {
p--;
if (i > 0 && szs > 0 && i % szs == 0)
*p-- = sep;
*p = (n >> i & 1) ? '1' : '0';
}
return p;
}
/** returns an allocated string containing unpadded binary
* representation of the integer value 'n'. This value must
* be assigned to a pointer and freed to prevent leaks.
*/
char *binstr (unsigned long n)
{
unsigned char msb = getmsb (n);
char *s = calloc (msb + 2, sizeof *s);
char *p = s + msb;
unsigned char i;
for (i = 0; i < msb+1; i++) {
*p-- = (n >> i & 1) ? '1' : '0';
}
return s;
}
/** return string containing binary representation of fraction
* The function takes a float as an argument and computes the
* binary representation of the fractional part of the float,
* On success, the function returns a null-terminated string
* containing the binary value, or NULL otherwise. MAXD of 24
* (23 + null-term) for Single-Precision mantissa, 53
* (52 + null-term) for Double-Precision mantissa.
*/
char *fpfrc_bin (float fvalue)
{
float fv = fvalue - (int)fvalue;
int MAXD = 24;
char *fvs = calloc (MAXD, sizeof *fvs);
if (!fvs) {
fprintf (stderr, "%s()_error: allocation failed.\n", __func__);
return NULL;
}
char *p = fvs;
unsigned char it = 0;
while (fv > 0 && it < MAXD)
{
fv = fv * 2.0;
*p++ = ((int)fv) ? '1' : '0';
fv = ((int)fv >= 1) ? fv - 1.0 : fv;
it++;
}
return fvs;
}
/** formatted output of ieee-754 representation of float from binary string.
*/
void show_ieee754str (char *s)
{
printf (" ");
while (*s)
printf (" %c", *s++);
printf ("\n");
printf (" |- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|\n");
printf (" |s| exp | mantissa |\n\n");
}
/** formatted output of ieee-754 representation of float from stored value.
*/
void show_ieee754 (float f)
{
printf (" ");
int i = 32;
while (i) {
i--;
printf ("%d ", ((*(int *)&f >> i) & 0x1));
}
printf ("\n");
printf (" |- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|\n");
printf (" |s| exp | mantissa |\n\n");
}
/** string to float with error checking. */
float xstrtof (char *str)
{
char *endptr = NULL;
errno = 0;
float val = strtof (str, &endptr);
/* Check for various possible errors */
if ((errno == ERANGE && (val == HUGE_VALF || val == HUGE_VALL)) ||
(errno != 0 && val == 0)) {
perror ("strtof");
exit (EXIT_FAILURE);
}
if (endptr == str) {
fprintf (stderr, "No digits were found\n");
exit (EXIT_FAILURE);
}
return val;
}
/** form IEEE-754 binary representation from computed values for the
* sign bit, biased exponent binary string, decimal binary string, and
* fractional binary string, forming the 23-bit mantissa from the decimal
* and fractional strings, filling with '0' as needed. An allocated
* string containing the IEEE-754 Single-Precision representation is
* returned.
*/
char *form_ieee754SPstr (int sign, char *exp, char *dec, char *frac)
{
char *str = calloc (33, sizeof *str);
char *p = str + 1;
char *sp = dec + 1; /* leading 1 - hidden bit */
size_t fsl = strlen (frac); /* length of fractional str */
size_t manbits = fsl + strlen (sp); /* available mantissa bits */
size_t mdiff = 23 - manbits; /* diff from required 23 */
*str = (sign == 0) ? '0' : '1'; /* set sign bit in string */
memcpy (p, exp, 8); /* set biased exponent */
p += 8;
while (*sp) { *p = *sp++; p++; }; /* mantissa - decimal bits */
if (manbits < 23) /* test < 23 bits available */
{
memcpy (p, frac, fsl); /* copy fractional bits */
p += fsl; /* increment pointer */
register size_t it = 0;
if (mdiff > 0) /* fill remaining mantissa */
for (it = 0; it < mdiff; it++)
{
*p = '0';
p++;
}
}
else
{
memcpy (p, frac, 23); /* fill mantissa w/23 bits */
}
return str;
}
Example Use/Output
$ ./bin/ieee754cvt 123.456
String Values:
string : 123.456
whole : 123
fraction : 456
Float Values:
decimal : 123
fraction : 0.456001
Binary Values:
decimal : 1111011
fraction : 01110100101111001
sign bit : 0
Normalization for biased exponent:
1111011.01110100101111001 => 1.11101101110100101111001
exponent bias: 6
unbiased exponent: 127
__________________+____
biased exponent: 133
binary exponent: 10000101
Conversion to 'hidden bit' format to form mantissa:
1.11101101110100101111001 => 11101101110100101111001
IEEE-754 Single Precision Floating Point Representation (caclulated value)
0 1 0 0 0 0 1 0 1 1 1 1 0 1 1 0 1 1 1 0 1 0 0 1 0 1 1 1 1 0 0 1
|- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
|s| exp | mantissa |
IEEE-754 Single Precision Floating Point Representation (memory value)
0 1 0 0 0 0 1 0 1 1 1 1 0 1 1 0 1 1 1 0 1 0 0 1 0 1 1 1 1 0 0 1
|- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
|s| exp | mantissa |
Representations of float value in memory:
The float value entered : 123.456001
binary value in memory : 01000010-11110110-11101001-01111001
bits as unsigned int : 1123477881
After scouring the internet and being unable to find similar functions, I wrote these floating point Conversion functions.
//NOTE memcpy is a more efficient way to do this
//These instructions are presented for reference only
//I Zackery Sobin created these functions
//and I release them into the public domain
//there is no warranty
//they might not work properly
//certain things like NAN or INFINITY might not be handled correctly
#include "math.h"
float charArray2float(charArray *S)
{
unsigned int uintS = charArray2lluint(S, 4);
unsigned int sign = (uintS & 0x80000000); //01111111 10000000 00000000 00000000
unsigned int exponent = (uintS & 0x7F800000); //01111111 10000000 00000000 00000000
unsigned int mantessa = (uintS & 0x007FFFFF); //00000000 01111111 11111111 11111111
float normalizedExponent = (float) ((signed char) ((exponent>>23) - 127));
float normalizedMantessa = (float) 1 + (float) mantessa / pow((float)2,(float)23);
float theVar = normalizedMantessa * pow((float)2,(float)normalizedExponent);
if (sign != 0) theVar = theVar * (float) (-1);
if (fabs(theVar) < pow((float) 10, (float) -38)) theVar = 0;
return theVar;
}
long long int charArray2lluint(char *S, int length)
{
int x;
unsigned long long int sum =0;
for (x = 0; x < length; x++)
{
if (isBigEndian){
sum = sum + ((unsigned long long int) ((unsigned char) S[x]) << ((length-1)-x) * 8);
}
else{
sum = sum + ((unsigned long long int) ((unsigned char) S[length-x-1]) << ((length-1)-x) * 8);
}
}
return sum;
}
void float_2charArray(char *outputArray, float testVariable1) { //long int is the same size as regular intz
int o = 0;
int x;
char byteNum[8];
unsigned int sign = 0;
float mantessa = 0;
int exp = 0;
unsigned int theResult;
if (testVariable1 ==0){theResult = 0;}
else{ if (testVariable1 < 0) {
sign = 0x80000000;
testVariable1 = testVariable1 * -1.0;
}
int watchdog = 0;
while (1){
watchdog++;
if (watchdog > 512) {
ErrorCode = 6; //This is a global variable used to see if there is a bug in this function
break;
}
mantessa = testVariable1 / powf(2,exp);
if (mantessa >= 1 && mantessa < 2) {break;}
else if (mantessa >= 2.0) {exp = exp + 1;}
else if (mantessa < 1 ) {exp = exp - 1;}
}
unsigned int fixedExponent = ((exp+127)<<23);
unsigned int fixedMantessa = (float) (mantessa -1) * pow((float)2,(float)23);
theResult = sign + fixedExponent + fixedMantessa;
}
unsigned_int_2charArray(byteNum, theResult);
if (!isBigEndian) for (x = 0; x <= 7; x++) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
else if (isBigEndian) for (x = 7; x >= 0; x--) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
}
void double_2charArray(char *outputArray, double testVariable2) { //long int is the same size as regular int
int o = 0;
int x;
char byteNum[16];
unsigned long long int sign = 0;
double mantessa = 0;
long long int exp = 0;
unsigned long long int theResult;
if (testVariable2 ==0){theResult = 0;theResult = 0;}
else{ if (testVariable2 < 0) {
sign = 0x8000000000000000;
testVariable2 = testVariable2 * -1.0;
}
long long int watchdog = 0;
while (1){
watchdog++;
if (watchdog > 512) {
FlighboardErrorCode = 7;
break;
}
mantessa = testVariable2 / powf(2,exp);
if (mantessa >= 1 && mantessa < 2) {break;}
else if (mantessa >= 2.0) {exp = exp + 1;}
else if (mantessa < 1 ) {exp = exp - 1;}
}
unsigned long long int fixedExponent = ((exp+1023)<<52);
unsigned long long int fixedMantessa = (double) (mantessa -1) * pow((double)2,(double)52);
theResult = sign | (fixedExponent + fixedMantessa); //Fixme is this quite right?
}
unsigned_long_long_int_2charArray(byteNum, theResult);
if (!isBigEndian) for (x = 0; x <= 15; x++) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
else if (isBigEndian) for (x = 15; x >= 0; x--) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
}
void unsigned_long_long_int_2charArray(char *outputArray, unsigned long long int X) { //long int is the same size as regular int
int o = 0;
int x;
char byteNum[8];
byteNum[0] = (X & 0x00000000000000FF);
byteNum[1] = (X & 0x000000000000FF00) >> 8;
byteNum[2] = (X & 0x0000000000FF0000) >> 16;
byteNum[3] = (X & 0x00000000FF000000) >> 24;
byteNum[4] = (X & 0x000000FF00000000) >> 32;
byteNum[5] = (X & 0x0000FF0000000000) >> 40;
byteNum[6] = (X & 0x00FF000000000000) >> 48;
byteNum[7] = (X & 0xFF00000000000000) >> 56;
if (!isBigEndian) for (x = 0; x <= 7; x++) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
else if (isBigEndian) for (x = 7; x >= 0; x--) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
}
void long_long_int_2charArray(char *outputArray, long long int X) { //long int is the same size as regular int
int o = 0;
int x;
char byteNum[8];
byteNum[0] = (X & 0x00000000000000FF);
byteNum[1] = (X & 0x000000000000FF00) >> 8;
byteNum[2] = (X & 0x0000000000FF0000) >> 16;
byteNum[3] = (X & 0x00000000FF000000) >> 24;
byteNum[4] = (X & 0x000000FF00000000) >> 32;
byteNum[5] = (X & 0x0000FF0000000000) >> 40;
byteNum[6] = (X & 0x00FF000000000000) >> 48;
byteNum[7] = (X & 0xFF00000000000000) >> 56;
if (!isBigEndian) for (x = 0; x <= 7; x++) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
else if (isBigEndian) for (x = 7; x >= 0; x--) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
}
void unsigned_int_2charArray(char *outputArray, unsigned int X) { //long int is the same size as regular int
int o = 0;
int x;
char byteNum[4];
byteNum[0] = (X & 0x000000FF);
byteNum[1] = (X & 0x0000FF00) >> 8;
byteNum[2] = (X & 0x00FF0000) >> 16;
byteNum[3] = (X & 0xFF000000) >> 24;
if (!isBigEndian) for (x = 0; x <= 3; x++) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
else if (isBigEndian) for (x = 3; x >= 0; x--) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
}
void int_2charArray(char *outputArray, int X) { //long int is the same size as regular int
int o = 0;
int x;
char byteNum[4];
byteNum[0] = (X & 0x000000FF);
byteNum[1] = (X & 0x0000FF00) >> 8;
byteNum[2] = (X & 0x00FF0000) >> 16;
byteNum[3] = (X & 0xFF000000) >> 24;
if (!isBigEndian) for (x = 0; x <= 3; x++) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
else if (isBigEndian) for (x = 3; x >= 0; x--) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
}
void unsigned_short_int_2charArray(char *outputArray, unsigned short int X) {
int o = 0;
int x;
char byteNum[2];
byteNum[0] = (X & 0x00FF);
byteNum[1] = (X & 0xFF00) >> 8;
if (!isBigEndian) for (x = 0; x <= 1; x++) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
else if (isBigEndian) for (x = 1; x >= 0; x--) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
}
void short_int_2charArray(char *outputArray, short int X) {
int o = 0;
int x;
char byteNum[2];
byteNum[0] = (X & 0x00FF);
byteNum[1] = (X & 0xFF00) >> 8;
if (!isBigEndian) for (x = 0; x <= 1; x++) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
else if (isBigEndian) for (x = 1; x >= 0; x--) {outputArray[o]=byteNum[x]; o++;} // datagram.append(byteNum[x]);
}
void unsigned_char_2charArray(char *outputArray, unsigned char X) {
outputArray[0] = X;
}
void char_2charArray(char *outputArray, char X) {
outputArray[0] = X;
}
来源:https://stackoverflow.com/questions/16164620/how-to-convert-an-ieee-754-single-precision-binary-floating-point-to-decimal