round() for float in C++

后端未结

关注

 22  1184

时光取名叫无心 2020-11-22 03:01

I need a simple floating point rounding function, thus:

double round(double);

round(0.1) = 0
round(-0.1) = 0
round(-0.9) = -1

I can find

22条回答

[愿得一人] (楼主)

2020-11-22 03:33

As pointed out in comments and other answers, the ISO C++ standard library did not add round() until ISO C++11, when this function was pulled in by reference to the ISO C99 standard math library.

For positive operands in [½, ub] round(x) == floor (x + 0.5), where ub is 2²³ for float when mapped to IEEE-754 (2008) binary32, and 2⁵² for double when it is mapped to IEEE-754 (2008) binary64. The numbers 23 and 52 correspond to the number of stored mantissa bits in these two floating-point formats. For positive operands in [+0, ½) round(x) == 0, and for positive operands in (ub, +∞] round(x) == x. As the function is symmetric about the x-axis, negative arguments x can be handled according to round(-x) == -round(x).

This leads to the compact code below. It compiles into a reasonable number of machine instructions across various platforms. I observed the most compact code on GPUs, where my_roundf() requires about a dozen instructions. Depending on processor architecture and toolchain, this floating-point based approach could be either faster or slower than the integer-based implementation from newlib referenced in a different answer.

I tested my_roundf() exhaustively against the newlib roundf() implementation using Intel compiler version 13, with both /fp:strict and /fp:fast. I also checked that the newlib version matches the roundf() in the mathimf library of the Intel compiler. Exhaustive testing is not possible for double-precision round(), however the code is structurally identical to the single-precision implementation.

#include 
#include 
#include 
#include 
#include 

float my_roundf (float x)
{
    const float half = 0.5f;
    const float one = 2 * half;
    const float lbound = half;
    const float ubound = 1L << 23;
    float a, f, r, s, t;
    s = (x < 0) ? (-one) : one;
    a = x * s;
    t = (a < lbound) ? x : s;
    f = (a < lbound) ? 0 : floorf (a + half);
    r = (a > ubound) ? x : (t * f);
    return r;
}

double my_round (double x)
{
    const double half = 0.5;
    const double one = 2 * half;
    const double lbound = half;
    const double ubound = 1ULL << 52;
    double a, f, r, s, t;
    s = (x < 0) ? (-one) : one;
    a = x * s;
    t = (a < lbound) ? x : s;
    f = (a < lbound) ? 0 : floor (a + half);
    r = (a > ubound) ? x : (t * f);
    return r;
}

uint32_t float_as_uint (float a)
{
    uint32_t r;
    memcpy (&r, &a, sizeof(r));
    return r;
}

float uint_as_float (uint32_t a)
{
    float r;
    memcpy (&r, &a, sizeof(r));
    return r;
}

float newlib_roundf (float x)
{
    uint32_t w;
    int exponent_less_127;

    w = float_as_uint(x);
    /* Extract exponent field. */
    exponent_less_127 = (int)((w & 0x7f800000) >> 23) - 127;
    if (exponent_less_127 < 23) {
        if (exponent_less_127 < 0) {
            /* Extract sign bit. */
            w &= 0x80000000;
            if (exponent_less_127 == -1) {
                /* Result is +1.0 or -1.0. */
                w |= ((uint32_t)127 << 23);
            }
        } else {
            uint32_t exponent_mask = 0x007fffff >> exponent_less_127;
            if ((w & exponent_mask) == 0) {
                /* x has an integral value. */
                return x;
            }
            w += 0x00400000 >> exponent_less_127;
            w &= ~exponent_mask;
        }
    } else {
        if (exponent_less_127 == 128) {
            /* x is NaN or infinite so raise FE_INVALID by adding */
            return x + x;
        } else {
            return x;
        }
    }
    x = uint_as_float (w);
    return x;
}

int main (void)
{
    uint32_t argi, resi, refi;
    float arg, res, ref;

    argi = 0;
    do {
        arg = uint_as_float (argi);
        ref = newlib_roundf (arg);
        res = my_roundf (arg);
        resi = float_as_uint (res);
        refi = float_as_uint (ref);
        if (resi != refi) { // check for identical bit pattern
            printf ("!!!! arg=%08x  res=%08x  ref=%08x\n", argi, resi, refi);
            return EXIT_FAILURE;
        }
        argi++;
    } while (argi);
    return EXIT_SUCCESS;
}

0 讨论(0)

查看其它22个回答