1994-08-19 10:40:01 +01:00
|
|
|
/* s_cbrtf.c -- float version of s_cbrt.c.
|
|
|
|
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
Fixed all 502518670 errors of more than 1 ulp for cbrtf() on amd64.
The maximum error was 3.56 ulps.
The bug was another translation error. The double precision version
has a comment saying "new cbrt to 23 bits, may be implemented in
precision". This means exactly what it says -- that the 23 bit second
approximation for the double precision cbrt() may be implemented in
single (i.e., float) precision. It doesn't mean what the translation
assumed -- that this approximation, when implemented in float precision,
is good enough for the the final approximation in float precision.
First, float precision needs a 24 bit approximation. The "23 bit"
approximation is actually good to 24 bits on float precision args, but
only if it is evaluated in double precision. Second, the algorithm
requires a cleanup step to ensure its error bound.
In float precision, any reasonable algorithm works for the cleanup
step. Use the same algorithm as for double precision, although this
is much more than enough and is a significant pessimization, and don't
optimize or simplify anything using double precision to implement the
float case, so that the whole double precision algorithm can be verified
in float precision. A maximum error of 0.667 ulps is claimed for cbrt()
and the max for cbrtf() using the same algorithm shouldn't be different,
but the actual max for cbrtf() on amd64 is now 0.9834 ulps. (On i386
-O1 the max is 0.5006 (down from < 0.7) due to extra precision.)
2005-12-11 13:22:01 +00:00
|
|
|
* Debugged by Bruce D. Evans.
|
1994-08-19 10:40:01 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ====================================================
|
|
|
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
|
|
*
|
|
|
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
|
|
* Permission to use, copy, modify, and distribute this
|
1995-05-30 06:51:47 +01:00
|
|
|
* software is freely granted, provided that this notice
|
1994-08-19 10:40:01 +01:00
|
|
|
* is preserved.
|
|
|
|
* ====================================================
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef lint
|
1999-08-28 01:22:10 +01:00
|
|
|
static char rcsid[] = "$FreeBSD$";
|
1994-08-19 10:40:01 +01:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "math.h"
|
|
|
|
#include "math_private.h"
|
|
|
|
|
|
|
|
/* cbrtf(x)
|
|
|
|
* Return cube root of x
|
|
|
|
*/
|
1995-05-30 06:51:47 +01:00
|
|
|
static const unsigned
|
1994-08-19 10:40:01 +01:00
|
|
|
B1 = 709958130, /* B1 = (84+2/3-0.03306235651)*2**23 */
|
|
|
|
B2 = 642849266; /* B2 = (76+2/3-0.03306235651)*2**23 */
|
|
|
|
|
|
|
|
static const float
|
|
|
|
C = 5.4285717010e-01, /* 19/35 = 0x3f0af8b0 */
|
|
|
|
D = -7.0530611277e-01, /* -864/1225 = 0xbf348ef1 */
|
|
|
|
E = 1.4142856598e+00, /* 99/70 = 0x3fb50750 */
|
|
|
|
F = 1.6071428061e+00, /* 45/28 = 0x3fcdb6db */
|
|
|
|
G = 3.5714286566e-01; /* 5/14 = 0x3eb6db6e */
|
|
|
|
|
2002-05-28 19:15:04 +01:00
|
|
|
float
|
|
|
|
cbrtf(float x)
|
1994-08-19 10:40:01 +01:00
|
|
|
{
|
Fixed all 502518670 errors of more than 1 ulp for cbrtf() on amd64.
The maximum error was 3.56 ulps.
The bug was another translation error. The double precision version
has a comment saying "new cbrt to 23 bits, may be implemented in
precision". This means exactly what it says -- that the 23 bit second
approximation for the double precision cbrt() may be implemented in
single (i.e., float) precision. It doesn't mean what the translation
assumed -- that this approximation, when implemented in float precision,
is good enough for the the final approximation in float precision.
First, float precision needs a 24 bit approximation. The "23 bit"
approximation is actually good to 24 bits on float precision args, but
only if it is evaluated in double precision. Second, the algorithm
requires a cleanup step to ensure its error bound.
In float precision, any reasonable algorithm works for the cleanup
step. Use the same algorithm as for double precision, although this
is much more than enough and is a significant pessimization, and don't
optimize or simplify anything using double precision to implement the
float case, so that the whole double precision algorithm can be verified
in float precision. A maximum error of 0.667 ulps is claimed for cbrt()
and the max for cbrtf() using the same algorithm shouldn't be different,
but the actual max for cbrtf() on amd64 is now 0.9834 ulps. (On i386
-O1 the max is 0.5006 (down from < 0.7) due to extra precision.)
2005-12-11 13:22:01 +00:00
|
|
|
float r,s,t,w;
|
1994-08-19 10:40:01 +01:00
|
|
|
int32_t hx;
|
|
|
|
u_int32_t sign;
|
|
|
|
u_int32_t high;
|
|
|
|
|
|
|
|
GET_FLOAT_WORD(hx,x);
|
|
|
|
sign=hx&0x80000000; /* sign= sign(x) */
|
|
|
|
hx ^=sign;
|
|
|
|
if(hx>=0x7f800000) return(x+x); /* cbrt(NaN,INF) is itself */
|
1995-05-30 06:51:47 +01:00
|
|
|
if(hx==0)
|
1994-08-19 10:40:01 +01:00
|
|
|
return(x); /* cbrt(0) is itself */
|
|
|
|
|
|
|
|
SET_FLOAT_WORD(x,hx); /* x <- |x| */
|
|
|
|
/* rough cbrt to 5 bits */
|
|
|
|
if(hx<0x00800000) /* subnormal number */
|
|
|
|
{SET_FLOAT_WORD(t,0x4b800000); /* set t= 2**24 */
|
|
|
|
t*=x; GET_FLOAT_WORD(high,t); SET_FLOAT_WORD(t,high/3+B2);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
SET_FLOAT_WORD(t,hx/3+B1);
|
|
|
|
|
|
|
|
|
|
|
|
/* new cbrt to 23 bits */
|
|
|
|
r=t*t/x;
|
|
|
|
s=C+r*t;
|
1995-05-30 06:51:47 +01:00
|
|
|
t*=G+F/(s+E+D/s);
|
1994-08-19 10:40:01 +01:00
|
|
|
|
Fixed all 502518670 errors of more than 1 ulp for cbrtf() on amd64.
The maximum error was 3.56 ulps.
The bug was another translation error. The double precision version
has a comment saying "new cbrt to 23 bits, may be implemented in
precision". This means exactly what it says -- that the 23 bit second
approximation for the double precision cbrt() may be implemented in
single (i.e., float) precision. It doesn't mean what the translation
assumed -- that this approximation, when implemented in float precision,
is good enough for the the final approximation in float precision.
First, float precision needs a 24 bit approximation. The "23 bit"
approximation is actually good to 24 bits on float precision args, but
only if it is evaluated in double precision. Second, the algorithm
requires a cleanup step to ensure its error bound.
In float precision, any reasonable algorithm works for the cleanup
step. Use the same algorithm as for double precision, although this
is much more than enough and is a significant pessimization, and don't
optimize or simplify anything using double precision to implement the
float case, so that the whole double precision algorithm can be verified
in float precision. A maximum error of 0.667 ulps is claimed for cbrt()
and the max for cbrtf() using the same algorithm shouldn't be different,
but the actual max for cbrtf() on amd64 is now 0.9834 ulps. (On i386
-O1 the max is 0.5006 (down from < 0.7) due to extra precision.)
2005-12-11 13:22:01 +00:00
|
|
|
/* chop t to 12 bits and make it larger than cbrt(x) */
|
|
|
|
GET_FLOAT_WORD(high,t);
|
2005-12-11 17:58:14 +00:00
|
|
|
SET_FLOAT_WORD(t,(high&0xfffff000)+0x00001000);
|
Fixed all 502518670 errors of more than 1 ulp for cbrtf() on amd64.
The maximum error was 3.56 ulps.
The bug was another translation error. The double precision version
has a comment saying "new cbrt to 23 bits, may be implemented in
precision". This means exactly what it says -- that the 23 bit second
approximation for the double precision cbrt() may be implemented in
single (i.e., float) precision. It doesn't mean what the translation
assumed -- that this approximation, when implemented in float precision,
is good enough for the the final approximation in float precision.
First, float precision needs a 24 bit approximation. The "23 bit"
approximation is actually good to 24 bits on float precision args, but
only if it is evaluated in double precision. Second, the algorithm
requires a cleanup step to ensure its error bound.
In float precision, any reasonable algorithm works for the cleanup
step. Use the same algorithm as for double precision, although this
is much more than enough and is a significant pessimization, and don't
optimize or simplify anything using double precision to implement the
float case, so that the whole double precision algorithm can be verified
in float precision. A maximum error of 0.667 ulps is claimed for cbrt()
and the max for cbrtf() using the same algorithm shouldn't be different,
but the actual max for cbrtf() on amd64 is now 0.9834 ulps. (On i386
-O1 the max is 0.5006 (down from < 0.7) due to extra precision.)
2005-12-11 13:22:01 +00:00
|
|
|
|
2005-12-11 17:58:14 +00:00
|
|
|
/* one step Newton iteration to 24 bits with error less than 0.667 ulps */
|
Fixed all 502518670 errors of more than 1 ulp for cbrtf() on amd64.
The maximum error was 3.56 ulps.
The bug was another translation error. The double precision version
has a comment saying "new cbrt to 23 bits, may be implemented in
precision". This means exactly what it says -- that the 23 bit second
approximation for the double precision cbrt() may be implemented in
single (i.e., float) precision. It doesn't mean what the translation
assumed -- that this approximation, when implemented in float precision,
is good enough for the the final approximation in float precision.
First, float precision needs a 24 bit approximation. The "23 bit"
approximation is actually good to 24 bits on float precision args, but
only if it is evaluated in double precision. Second, the algorithm
requires a cleanup step to ensure its error bound.
In float precision, any reasonable algorithm works for the cleanup
step. Use the same algorithm as for double precision, although this
is much more than enough and is a significant pessimization, and don't
optimize or simplify anything using double precision to implement the
float case, so that the whole double precision algorithm can be verified
in float precision. A maximum error of 0.667 ulps is claimed for cbrt()
and the max for cbrtf() using the same algorithm shouldn't be different,
but the actual max for cbrtf() on amd64 is now 0.9834 ulps. (On i386
-O1 the max is 0.5006 (down from < 0.7) due to extra precision.)
2005-12-11 13:22:01 +00:00
|
|
|
s=t*t; /* t*t is exact */
|
|
|
|
r=x/s;
|
|
|
|
w=t+t;
|
|
|
|
r=(r-t)/(w+r); /* r-t is exact */
|
|
|
|
t=t+t*r;
|
|
|
|
|
1994-08-19 10:40:01 +01:00
|
|
|
/* retore the sign bit */
|
|
|
|
GET_FLOAT_WORD(high,t);
|
|
|
|
SET_FLOAT_WORD(t,high|sign);
|
|
|
|
return(t);
|
|
|
|
}
|