Adding floats with gmp gives “correct” results, sort of

问题

In the code below I use mpf_add to add the string representation of two floating values. What I don't understand at this point is why 2.2 + 3.2 = 5.39999999999999999999999999999999999999. I would have thought that gmp was smart enough to give 5.4.

What am I not comprehending about how gmp does floats?

(BTW, when I first wrote this I wasn't sure how to insert a decimal point, thus the plus/minus digit stuff at the end)

BSTR __stdcall FBIGSUM(BSTR p1, BSTR p2 ) {
  USES_CONVERSION;

  F(n1);
  F(n2);
  F(res);

  LPSTR sNum1 = W2A( p1 );
  LPSTR sNum2 = W2A( p2 );

  mpf_set_str( n1, sNum1, 10 );
  mpf_set_str( n2, sNum2, 10 );

  mpf_add( res, n1, n2 );

  char * buff =  (char *) _alloca( 1024 );
  char expBuffer[ 20 ];
  mp_exp_t exp;

  mpf_get_str(buff, &exp, 10, 0, res);

  char * temp = ltoa( (long) exp, expBuffer, 10 );
  if (exp >= 0) {
    strcat(buff, "+" );
  }
  strcat(buff, expBuffer );

  BSTR bResult = _com_util::ConvertStringToBSTR( buff );
  return bResult;
}

回答1:

This is because of the inherent error of using floating-point arithmetic in a binary environment.

See the IEEE 754 standard for more information.

回答2:

What warren said.

You might have better results if you use binary coded decimal instead of floating point numbers, although I can't really direct you to any libraries for that.

回答3:

I eventually ended up answering this myself. The solution for me was to do in code what I used to do in school. The method works like this:

Take each number and make sure that the number of digits to the right of the decimal point are the same. So if adding 2.1 and 3.457, 'normalise' the first one to 2.100. Keep a record of the number of digits that are to the right of the decimal, in this case, three.
Now remove the decimal point and use mpz_add to add the two numbers, which have now become 2100 and 3457. The result is 5557.
Finally, reinsert the decimal point three characters (in this case) from the right, giving the correct answer of 5.557.

I prototyped the solution in VBScript (below)

function fadd( n1, n2 )
    dim s1, s2, max, mul, res
    normalise3 n1, n2, s1, s2, max
    s1 = replace( s1, ".", "" )
    s2 = replace( s2, ".", "" )
    mul = clng(s1) + clng(s2)
    res = left( mul, len(mul) - max ) & "." & mid( mul, len( mul ) - max + 1 )
    fadd = res
end function

sub normalise3( byval n1, byval n2, byref s1, byref s2, byref numOfDigits )
    dim a1, a2
    dim max
    if instr( n1, "." ) = 0 then n1 = n1 & "."
    if instr( n2, "." ) = 0 then n2 = n2 & "."
    a1 = split( n1, "." )
    a2 = split( n2, "." )
    max = len( a1(1) )
    if len( a2(1) ) > max then max = len( a2( 1 ) )
    s1 = a1(0) & "." & a1(1) & string( max - len( a1( 1 )), "0" )
    s2 = a2(0) & "." & a2(1) & string( max - len( a2( 1 )), "0" )
    numOfDigits = max
end sub

and finally in Visual C++ (below).

#define Z(x) mpz_t x; mpz_init( x );

BSTR __stdcall FADD( BSTR p1, BSTR p2 ) {
  USES_CONVERSION;

  LPSTR sP1 = W2A( p1 );
  LPSTR sP2 = W2A( p2 );

  char LeftOf1[ 1024 ];
  char RightOf1[ 1024 ];
  char LeftOf2[ 1024 ];
  char RightOf2[ 1024 ];
  char * dotPos;
  long numOfDigits;
  int i;
  int amtOfZeroes;

  dotPos = strstr( sP1, "." );
  if ( dotPos == NULL ) {
    strcpy( LeftOf1, sP1 );
    *RightOf1 = '\0';
  } else {
    *dotPos = '\0';
    strcpy( LeftOf1, sP1 );
    strcpy( RightOf1, (dotPos + 1) );
  }

  dotPos = strstr( sP2, "." );
  if ( dotPos == NULL ) {
    strcpy( LeftOf2, sP2 );
    *RightOf2 = '\0';
  } else {
    *dotPos = '\0';
    strcpy( LeftOf2, sP2 );
    strcpy( RightOf2, (dotPos + 1) );
  }

  numOfDigits = strlen( RightOf1 ) > strlen( RightOf2 ) ? strlen( RightOf1 ) : strlen( RightOf2 );

  strcpy( sP1, LeftOf1 );
  strcat( sP1, RightOf1 );
  amtOfZeroes = numOfDigits - strlen( RightOf1 );
  for ( i = 0; i < amtOfZeroes; i++ ) {
    strcat( sP1, "0" );
  }
  strcpy( sP2, LeftOf2 );
  strcat( sP2, RightOf2 );
  amtOfZeroes = numOfDigits - strlen( RightOf2 );
  for ( i = 0; i < amtOfZeroes; i++ ) {
    strcat( sP2, "0" );
  }


  Z(n1);
  Z(n2);
  Z(res);

  mpz_set_str( n1, sP1, 10 );
  mpz_set_str( n2, sP2, 10 );
  mpz_add( res, n1, n2 );

  char * buff =  (char *) _alloca( mpz_sizeinbase( res, 10 ) + 2 + 1 );

  mpz_get_str(buff, 10, res);

  char * here = buff + strlen(buff) - numOfDigits; 

  memmove( here + 1, here, strlen(buff)); // plus trailing null
  *(here) = '.';

  BSTR bResult = _com_util::ConvertStringToBSTR( buff );
  return bResult;
}

I accept that the C is a bit ... well ... dodgy, so please feel free to critique it. All helpful comments gratefully received.

I went on from here to implement FSUB and FMUL as well. FDIV was not nearly so satisfying, ending up in three versions and using rational numbers.

来源：https://stackoverflow.com/questions/178952/adding-floats-with-gmp-gives-correct-results-sort-of

标签

visual-c++

floating-point

bignum

gmp