I need a simple floating point rounding function, thus:
double round(double);
round(0.1) = 0
round(-0.1) = 0
round(-0.9) = -1
I can find
As pointed out in comments and other answers, the ISO C++ standard library did not add round()
until ISO C++11, when this function was pulled in by reference to the ISO C99 standard math library.
For positive operands in [½, ub] round(x) == floor (x + 0.5)
, where ub is 223 for float
when mapped to IEEE-754 (2008) binary32
, and 252 for double
when it is mapped to IEEE-754 (2008) binary64
. The numbers 23 and 52 correspond to the number of stored mantissa bits in these two floating-point formats. For positive operands in [+0, ½) round(x) == 0
, and for positive operands in (ub, +∞] round(x) == x
. As the function is symmetric about the x-axis, negative arguments x
can be handled according to round(-x) == -round(x)
.
This leads to the compact code below. It compiles into a reasonable number of machine instructions across various platforms. I observed the most compact code on GPUs, where my_roundf()
requires about a dozen instructions. Depending on processor architecture and toolchain, this floating-point based approach could be either faster or slower than the integer-based implementation from newlib referenced in a different answer.
I tested my_roundf()
exhaustively against the newlib roundf()
implementation using Intel compiler version 13, with both /fp:strict
and /fp:fast
. I also checked that the newlib version matches the roundf()
in the mathimf
library of the Intel compiler. Exhaustive testing is not possible for double-precision round()
, however the code is structurally identical to the single-precision implementation.
#include
#include
#include
#include
#include
float my_roundf (float x)
{
const float half = 0.5f;
const float one = 2 * half;
const float lbound = half;
const float ubound = 1L << 23;
float a, f, r, s, t;
s = (x < 0) ? (-one) : one;
a = x * s;
t = (a < lbound) ? x : s;
f = (a < lbound) ? 0 : floorf (a + half);
r = (a > ubound) ? x : (t * f);
return r;
}
double my_round (double x)
{
const double half = 0.5;
const double one = 2 * half;
const double lbound = half;
const double ubound = 1ULL << 52;
double a, f, r, s, t;
s = (x < 0) ? (-one) : one;
a = x * s;
t = (a < lbound) ? x : s;
f = (a < lbound) ? 0 : floor (a + half);
r = (a > ubound) ? x : (t * f);
return r;
}
uint32_t float_as_uint (float a)
{
uint32_t r;
memcpy (&r, &a, sizeof(r));
return r;
}
float uint_as_float (uint32_t a)
{
float r;
memcpy (&r, &a, sizeof(r));
return r;
}
float newlib_roundf (float x)
{
uint32_t w;
int exponent_less_127;
w = float_as_uint(x);
/* Extract exponent field. */
exponent_less_127 = (int)((w & 0x7f800000) >> 23) - 127;
if (exponent_less_127 < 23) {
if (exponent_less_127 < 0) {
/* Extract sign bit. */
w &= 0x80000000;
if (exponent_less_127 == -1) {
/* Result is +1.0 or -1.0. */
w |= ((uint32_t)127 << 23);
}
} else {
uint32_t exponent_mask = 0x007fffff >> exponent_less_127;
if ((w & exponent_mask) == 0) {
/* x has an integral value. */
return x;
}
w += 0x00400000 >> exponent_less_127;
w &= ~exponent_mask;
}
} else {
if (exponent_less_127 == 128) {
/* x is NaN or infinite so raise FE_INVALID by adding */
return x + x;
} else {
return x;
}
}
x = uint_as_float (w);
return x;
}
int main (void)
{
uint32_t argi, resi, refi;
float arg, res, ref;
argi = 0;
do {
arg = uint_as_float (argi);
ref = newlib_roundf (arg);
res = my_roundf (arg);
resi = float_as_uint (res);
refi = float_as_uint (ref);
if (resi != refi) { // check for identical bit pattern
printf ("!!!! arg=%08x res=%08x ref=%08x\n", argi, resi, refi);
return EXIT_FAILURE;
}
argi++;
} while (argi);
return EXIT_SUCCESS;
}