How to convert a floating point number into a sequence of bytes so that it can be persisted in a file? Such algorithm must be fast and highly portable. It must allow also th
What do you mean, "portable"?
For portability, remember to keep the numbers within the limits defined in the Standard: use a single number outside these limits, and there goes all portability down the drain.
double planck_time = 5.39124E-44; /* second */
[...] 10 The values given in the following list shall be replaced by constant expressions with implementation-defined values [...] 11 The values given in the following list shall be replaced by constant expressions with implementation-defined values [...] 12 The values given in the following list shall be replaced by constant expressions with implementation-defined (positive) values [...] [...]
Note the implementation-defined in all these clauses.
What level of portability do you require? If the file is to be read on a computer with the same OS that it was generated on, than you using a binary file and just saving and restoring the bit pattern should work. Otherwise as boytheo said, ASCII is your friend.
Converting to an ascii representation would be the simplest, but if you need to deal with a colossal number of floats, then of course you should go binary. But this can be a tricky issue if you care about portability. Floating point numbers are represented differently in different machines.
If you don't want to use a canned library, then your float-binary serializer/deserializer will simply have to have "a contract" on where each bit lands and what it represents.
Here's a fun website to help with that: link.
sprintf, fprintf ? you don't get any more portable than that.
Here we go.
Portable IEEE 754 serialisation / deserialisation that should work regardless of the machine's internal floating point representation.
https://github.com/MalcolmMcLean/ieee754
/*
* read a double from a stream in ieee754 format regardless of host
* encoding.
* fp - the stream
* bigendian - set to if big bytes first, clear for little bytes
* first
*
*/
double freadieee754(FILE *fp, int bigendian)
{
unsigned char buff[8];
int i;
double fnorm = 0.0;
unsigned char temp;
int sign;
int exponent;
double bitval;
int maski, mask;
int expbits = 11;
int significandbits = 52;
int shift;
double answer;
/* read the data */
for (i = 0; i < 8; i++)
buff[i] = fgetc(fp);
/* just reverse if not big-endian*/
if (!bigendian)
{
for (i = 0; i < 4; i++)
{
temp = buff[i];
buff[i] = buff[8 - i - 1];
buff[8 - i - 1] = temp;
}
}
sign = buff[0] & 0x80 ? -1 : 1;
/* exponet in raw format*/
exponent = ((buff[0] & 0x7F) << 4) | ((buff[1] & 0xF0) >> 4);
/* read inthe mantissa. Top bit is 0.5, the successive bits half*/
bitval = 0.5;
maski = 1;
mask = 0x08;
for (i = 0; i < significandbits; i++)
{
if (buff[maski] & mask)
fnorm += bitval;
bitval /= 2.0;
mask >>= 1;
if (mask == 0)
{
mask = 0x80;
maski++;
}
}
/* handle zero specially */
if (exponent == 0 && fnorm == 0)
return 0.0;
shift = exponent - ((1 << (expbits - 1)) - 1); /* exponent = shift + bias */
/* nans have exp 1024 and non-zero mantissa */
if (shift == 1024 && fnorm != 0)
return sqrt(-1.0);
/*infinity*/
if (shift == 1024 && fnorm == 0)
{
#ifdef INFINITY
return sign == 1 ? INFINITY : -INFINITY;
#endif
return (sign * 1.0) / 0.0;
}
if (shift > -1023)
{
answer = ldexp(fnorm + 1.0, shift);
return answer * sign;
}
else
{
/* denormalised numbers */
if (fnorm == 0.0)
return 0.0;
shift = -1022;
while (fnorm < 1.0)
{
fnorm *= 2;
shift--;
}
answer = ldexp(fnorm, shift);
return answer * sign;
}
}
/*
* write a double to a stream in ieee754 format regardless of host
* encoding.
* x - number to write
* fp - the stream
* bigendian - set to write big bytes first, elee write litle bytes
* first
* Returns: 0 or EOF on error
* Notes: different NaN types and negative zero not preserved.
* if the number is too big to represent it will become infinity
* if it is too small to represent it will become zero.
*/
int fwriteieee754(double x, FILE *fp, int bigendian)
{
int shift;
unsigned long sign, exp, hibits, hilong, lowlong;
double fnorm, significand;
int expbits = 11;
int significandbits = 52;
/* zero (can't handle signed zero) */
if (x == 0)
{
hilong = 0;
lowlong = 0;
goto writedata;
}
/* infinity */
if (x > DBL_MAX)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
lowlong = 0;
goto writedata;
}
/* -infinity */
if (x < -DBL_MAX)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
hilong |= (1 << 31);
lowlong = 0;
goto writedata;
}
/* NaN - dodgy because many compilers optimise out this test, but
*there is no portable isnan() */
if (x != x)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
lowlong = 1234;
goto writedata;
}
/* get the sign */
if (x < 0) { sign = 1; fnorm = -x; }
else { sign = 0; fnorm = x; }
/* get the normalized form of f and track the exponent */
shift = 0;
while (fnorm >= 2.0) { fnorm /= 2.0; shift++; }
while (fnorm < 1.0) { fnorm *= 2.0; shift--; }
/* check for denormalized numbers */
if (shift < -1022)
{
while (shift < -1022) { fnorm /= 2.0; shift++; }
shift = -1023;
}
/* out of range. Set to infinity */
else if (shift > 1023)
{
hilong = 1024 + ((1 << (expbits - 1)) - 1);
hilong <<= (31 - expbits);
hilong |= (sign << 31);
lowlong = 0;
goto writedata;
}
else
fnorm = fnorm - 1.0; /* take the significant bit off mantissa */
/* calculate the integer form of the significand */
/* hold it in a double for now */
significand = fnorm * ((1LL << significandbits) + 0.5f);
/* get the biased exponent */
exp = shift + ((1 << (expbits - 1)) - 1); /* shift + bias */
/* put the data into two longs (for convenience) */
hibits = (long)(significand / 4294967296);
hilong = (sign << 31) | (exp << (31 - expbits)) | hibits;
x = significand - hibits * 4294967296;
lowlong = (unsigned long)(significand - hibits * 4294967296);
writedata:
/* write the bytes out to the stream */
if (bigendian)
{
fputc((hilong >> 24) & 0xFF, fp);
fputc((hilong >> 16) & 0xFF, fp);
fputc((hilong >> 8) & 0xFF, fp);
fputc(hilong & 0xFF, fp);
fputc((lowlong >> 24) & 0xFF, fp);
fputc((lowlong >> 16) & 0xFF, fp);
fputc((lowlong >> 8) & 0xFF, fp);
fputc(lowlong & 0xFF, fp);
}
else
{
fputc(lowlong & 0xFF, fp);
fputc((lowlong >> 8) & 0xFF, fp);
fputc((lowlong >> 16) & 0xFF, fp);
fputc((lowlong >> 24) & 0xFF, fp);
fputc(hilong & 0xFF, fp);
fputc((hilong >> 8) & 0xFF, fp);
fputc((hilong >> 16) & 0xFF, fp);
fputc((hilong >> 24) & 0xFF, fp);
}
return ferror(fp);
}
fwrite(), fread()? You will likely want binary, and you cannot pack the bytes any tighter unless you want to sacrifice precision which you would do in the program and then fwrite() fread() anyway; float a; double b; a=(float)b; fwrite(&a,1,sizeof(a),fp);
If you are carrying different floating point formats around they may not convert in a straight binary sense, so you may have to pick apart the bits and perform the math, this to the power that plus this, etc. IEEE 754 is a dreadful standard to use but widespread so it would minimize the effort.