How to Perform Tuckerman Rounding for Floating Point Square Root

后端 未结 1 413
说谎
说谎 2021-01-17 02:40

I am trying to perform a Tuckerman Rounding Test in order to determine the correctly rounded to nearest result.

I created a program in C++ to compare two solutions t

相关标签:
1条回答
  • 2021-01-17 03:24

    The original publication that introduced Tuckerman rounding for the square root was:

    Ramesh C. Agarwal, James W. Cooley, Fred G. Gustavson, James B. Shearer, Gordon Slishman, Bryant Tuckerman, "New scalar and vector elementary functions for the IBM System/370", IBM J. Res. Develop., Vol. 30, No. 2, March 1986, pp. 126-144.

    This paper specifically points out that the multiplications used to compute the products g*(g-ulp) and g*(g+ulp) are truncating, not rounding multiplications:

    "However, these inequalities can be shown to be equivalent to

    y- * y < x <= y * y+ ,

    where * denotes System/360/370 multiplication (which truncates the result), so that the tests are easily carried out without the need for extra precision. (Note the asymmetry: one <, one <=.) If the left inequality fails, y is too large; if the right inequality fails, y is too small."

    The following C99 code shows how Tuckerman rounding is successfully utilized to deliver correctly rounded results in a single-precision square root function.

    #include <stdio.h>
    #include <stdlib.h>
    #include <fenv.h>
    #include <math.h>
    
    #pragma STDC FENV_ACCESS ON
    float mul_fp32_rz (float a, float b)
    {
        float r;
        int orig_rnd = fegetround();
        fesetround (FE_TOWARDZERO);
        r = a * b;
        fesetround (orig_rnd);
        return r;
    }
    
    float my_sqrtf (float a)
    {
        float b, r, v, w, p, s;
        int e, t, f;
    
         if ((a <= 0.0f) || isinff (a) || isnanf (a)) {
             if (a < 0.0f) {
                 r = 0.0f / 0.0f;
             } else {
                 r = a + a;
             }
         } else {
             /* compute exponent adjustments */
             b = frexpf (a, &e);
             t = e - 2*512;
             f = t / 2;
             t = t - 2 * f;
             f = f + 512;
             /* map argument into the primary approximation interval [0.25,1) */
             b = ldexpf (b, t);
             /* initial approximation to reciprocal square root */
             r =        -6.10005470e+0f;
             r = r * b + 2.28990124e+1f;
             r = r * b - 3.48110069e+1f;
             r = r * b + 2.76135244e+1f;
             r = r * b - 1.24472151e+1f;
             r = r * b + 3.84509158e+0f;
             /* round rsqrt approximation to 11 bits */
             r = rintf (r * 2048.0f); 
             r = r * (1.0f / 2048.0f);
             /* Use A. Schoenhage's coupled iteration for the square root */
             v = 0.5f * r;
             w = b * r;             
             w = (w * -w + b) * v + w;
             v = (r * -w + 1.0f) * v + v;
             w = (w * -w + b) * v + w;
             /* Tuckerman rounding: mul_rz (w, w-ulp) < b <= mul_rz (w, w+ulp) */
             p = nextafterf (w, 0.0f);
             s = nextafterf (w, 2.0f);
             if (b <= mul_fp32_rz (w, p)) {  
                 w = p;
             } else if (b > mul_fp32_rz (w, s)) {
                 w = s;
             }
             /* map back from primary approximation interval by jamming exponent */
             r = ldexpf (w, f);
         }
         return r;
     }
    
     int main (void)
     {
         volatile union {
             float f;
             unsigned int i;
         } arg, res, ref;
         arg.i = 0;
         do {
             res.f = my_sqrtf (arg.f);
             ref.f = sqrtf (arg.f);
             if (res.i != ref.i) {
                  printf ("!!!! error @ arg=%08x: res=%08x ref=%08x\n",
                          arg.i, res.i, ref.i);
                  break;
             }
             arg.i++;
        } while (arg.i);
        return EXIT_SUCCESS;
    }
    
    0 讨论(0)
提交回复
热议问题