/* * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ /******************************************************************************* * * * File asinacos.c, * * Implementation of asin and acos for the PowerPC. * * * * Copyright © 1991-2001 Apple Computer, Inc. All rights reserved. * * * * Written and ported by Robert A. Murley (ram) for Mac OS X. * * Modified by A. Sazegari, September 2001. * * * * A MathLib v4 file. * * * * Based on the trigonometric functions from IBM/Taligent. * * * * July 20 2001: replaced __setflm with FEGETENVD/FESETENVD. * * replaced DblInHex typedef with hexdouble. * * September 07 2001: added #ifdef __ppc__. * * September 09 2001: added more comments. * * September 10 2001: added macros to detect PowerPC and correct compiler. * * September 21 2001: added to get to * * and . * * September 24 2001: used standard symbols for environment flags. * * September 25 2001: added FESETENVD(0.0) at start of acos. * * October 08 2001: removed . * * changed compiler errors to warnings. * * November 06 2001: commented out warning about Intel architectures. * * * * W A R N I N G: * * These routines require a 64-bit double precision IEEE-754 model. * * They are written for PowerPC only and are expecting the compiler * * to generate the correct sequence of multiply-add fused instructions. * * * * These routines are not intended for 32-bit Intel architectures. * * * * A version of gcc higher than 932 is required. * * * * GCC compiler options: * * optimization level 3 (-O3) * * -fschedule-insns -finline-functions -funroll-all-loops * * * *******************************************************************************/ #ifdef __APPLE_CC__ #if __APPLE_CC__ > 930 #include "fp_private.h" #include "fenv_private.h" extern const unsigned short SqrtTable[]; // Floating-point constants static const double k2ToM26 = 1.490116119384765625e-8; // 0x1.0p-26; static const double kPiBy2 = 1.570796326794896619231322; // 0x1.921fb54442d18p0 static const double kPiBy2Tail = 6.1232339957367660e-17; // 0x1.1a62633145c07p-54 static const double kMinNormal = 2.2250738585072014e-308; // 0x1.0p-1022 const static double a14 = 0.03085091303188211304259; const static double a13 =-0.02425125216179617744614; const static double a12 = 0.02273334623573271023373; const static double a11 = 0.0002983797813053653120360; const static double a10 = 0.008819738782817955152102; const static double a9 = 0.008130738583790024803650; const static double a8 = 0.009793486386035222577675; const static double a7 = 0.01154901189590083099584; const static double a6 = 0.01396501522140471126730; const static double a5 = 0.01735275722383019335276; const static double a4 = 0.02237215928757573539490; const static double a3 = 0.03038194444121688698408; const static double a2 = 0.04464285714288482921087; const static double a1 = 0.07499999999999990640378; const static double a0 = 0.1666666666666666667191; const static double aa11 = 0.0002983797813053653120360; const static double aa10 = 0.008819738782817955152102; const static double aa9 = 0.008130738583790024803650; const static double aa8 = 0.009793486386035222577675; const static double aa7 = 0.01154901189590083099584; const static double aa6 = 0.01396501522140471126730; const static double aa5 = 0.01735275722383019335276; const static double aa4 = 0.02237215928757573539490; const static double aa3 = 0.03038194444121688698408; const static double aa2 = 0.04464285714288482921087; const static double aa1 = 0.07499999999999990640378; const static double aa0 = 0.1666666666666666667191; static const hexdouble kInfinityu = HEXDOUBLE(0x7ff00000, 0x00000000); static const hexdouble kPiu = HEXDOUBLE(0x400921fb, 0x54442d18); #define INVERSE_TRIGONOMETRIC_NAN "34" /**************************************************************************** FUNCTION: double asin (double x) DESCRIPTION: returns the inverse sine of its argument in the range of -pi/2 to pi/2 (radians). CALLS: __FABS EXCEPTIONS raised: INEXACT, UNDERFLOW/INEXACT or INVALID. ACCURACY: Error is less than an ulp and usually less than half an ulp. Caller's rounding direction is honored. ****************************************************************************/ #ifdef notdef double asin (double x) { hexdouble cD, yD ,gD ,fpenv; double absx, x2, x3, x4, temp1, temp2, u, v; double g, y, d, y2, e; int index; unsigned long ghead, yhead, rnddir; if (x != x) // NaN argument return ( copysign ( nan ( INVERSE_TRIGONOMETRIC_NAN ), x ) ); absx = __FABS(x); FEGETENVD (fpenv.d); // save env, set default FESETENVD (0.0); cD.d = 0.5 - 0.5*absx; x2 = x*x; // under/overflows are masked if (absx <= 0.5) { // |x| <= 0.5 if (absx < k2ToM26) { // |x| < 2^(-26) if (absx == 0.0) { // x == 0.0 case is exact FESETENVD (fpenv.d); return (x); } rnddir = fpenv.i.lo & 0x3ul; // caller's rounding direction fpenv.i.lo |= FE_INEXACT; // set INEXACT yD.d = x; // Rounding away from zero: return NextDouble(x,x+x) if (((rnddir == 3ul) && (x < 0.0)) || ((rnddir == 2) && (x > 0.0))) { yD.i.lo += 1ul; if (yD.i.lo == 0ul) yD.i.hi += 1ul; absx = __FABS(yD.d); } // Set UNDERFLOW if result is subnormal if (absx < kMinNormal) // subnormal case fpenv.i.lo |= FE_UNDERFLOW; // set UNDERFLOW FESETENVD (fpenv.d); // restore caller environment return (yD.d); // return small result } // [|x| < 2^(-26)] temp1 = a14*x2 + a13; x4 = x2*x2; // polynomial approx for ArcSin fpenv.i.lo |= FE_INEXACT; // result is inexact temp1 = temp1*x4 + a11; temp2 = a12*x4 + a10; temp1 = temp1*x4 + a9; temp2 = temp2*x4 + a8; temp1 = temp1*x4 + a7; temp2 = temp2*x4 + a6; temp1 = temp1*x4 + a5; temp2 = temp2*x4 + a4; temp1 = temp1*x4 + a3; temp2 = temp2*x4 + a2; temp1 = temp1*x4 + a1; x3 = x*x2; temp1 = a0 + x2*(temp2*x2 + temp1); FESETENVD (fpenv.d); // restore caller's mode return (x + x3*temp1); } // [|x| <= 0.5] /**************************************************************************** For 0.5 < |x| < 1.0, a polynomial approximation is used to estimate the power series resulting from the identity: ArcSin(x) = pi/2 - 2*ArcSin(Sqrt(0.5(1.0 - x))). The square root is evaluated in-line and concurrently with the evaluation of the ArcSin. ***************************************************************************/ if (absx < 1.0) { // |x| < 1.0 x2 = cD.d; x4 = x2*x2; temp2 = (((((a14*x4+a12)*x4+a10)*x4+a8)*x4+aa6)*x4+aa4)*x4+aa2; temp1 = (((((a13*x4+a11)*x4+a9)*x4+aa7)*x4+aa5)*x4+aa3)*x4+aa1; ghead = ((cD.i.hi + 0x3ff00000ul) >> 1) & 0x7ff00000ul; index = (cD.i.hi >> 13) & 0xfful; yhead = 0x7fc00000ul - ghead; temp1 = temp1 + x2*temp2; gD.i.hi = ghead + ((0xff00ul & SqrtTable[index]) << 4); gD.i.lo = 0ul; yD.i.hi = yhead + ((0xfful & SqrtTable[index]) << 12); yD.i.lo = 0ul; d = cD.d - gD.d*gD.d; y = yD.d; // initial 0.5/Sqrt guess g = gD.d + y*d; y2 = y + y; // 16-bit g e = 0.5 - y*g; d = cD.d - g*g; y = y + e*y2; // 16-bit y g = g + y*d; // 32-bit g y2 = y + y; e = 0.5 - y*g; d = cD.d - g*g; y = y + e*y2; // 32-bit y g = g + y*d; // 64-bit Sqrt d = (cD.d - g*g)*y; // tail of 0.5*Sqrt (32 bits) u = kPiBy2 - 2.0*g; // pi/2 - 2.0*Sqrt (head) temp1 = aa0 + x2*temp1; v = (kPiBy2 - u) - 2.0*g; // pi/2 - 2.0*Sqrt (tail) g *= x2; d = d - 0.5*kPiBy2Tail; fpenv.i.lo |= FE_INEXACT; // result is inexact temp2 = g*temp1 + (d - 0.5*v); FESETENVD (fpenv.d); // restore caller's mode if (x > 0.0) { // take care of sign of result temp2 = -temp2; return (2.0*temp2 + u); } else return (2.0*temp2 - u); } // [|x| < 1.0] FESETENVD (fpenv.d); // restore caller's mode // If |x| == 1.0, return properly signed pi/2. if (absx == 1.0) { if (x > 0.0) return (kPiBy2 + x*kPiBy2Tail); else return (-kPiBy2 + x*kPiBy2Tail); } // |x| > 1.0 signals INVALID and returns a NaN fpenv.i.lo |= SET_INVALID; FESETENVD (fpenv.d); return ( nan ( INVERSE_TRIGONOMETRIC_NAN ) ); } #else static struct kBlock { const double k2ToM26 /* = 1.490116119384765625e-8 */ ; // 0x1.0p-26 */ ; const double kPiBy2 /* = 1.570796326794896619231322 */ ; // 0x1.921fb54442d18p0 const double kPiBy2Tail /* = 6.1232339957367660e-17 */ ; // 0x1.1a62633145c07p-54 const double kMinNormal /* = 2.2250738585072014e-308 */ ; // 0x1.0p-1022 const double a14 /* = 0.03085091303188211304259 */ ; const double a13 /* =-0.02425125216179617744614 */ ; const double a12 /* = 0.02273334623573271023373 */ ; const double a11 /* = 0.0002983797813053653120360 */ ; const double a10 /* = 0.008819738782817955152102 */ ; const double a9 /* = 0.008130738583790024803650 */ ; const double a8 /* = 0.009793486386035222577675 */ ; const double a7 /* = 0.01154901189590083099584 */ ; const double a6 /* = 0.01396501522140471126730 */ ; const double a5 /* = 0.01735275722383019335276 */ ; const double a4 /* = 0.02237215928757573539490 */ ; const double a3 /* = 0.03038194444121688698408 */ ; const double a2 /* = 0.04464285714288482921087 */ ; const double a1 /* = 0.07499999999999990640378 */ ; const double a0 /* = 0.1666666666666666667191 */ ; const double aa11 /* = 0.0002983797813053653120360 */ ; const double aa10 /* = 0.008819738782817955152102 */ ; const double aa9 /* = 0.008130738583790024803650 */ ; const double aa8 /* = 0.009793486386035222577675 */ ; const double aa7 /* = 0.01154901189590083099584 */ ; const double aa6 /* = 0.01396501522140471126730 */ ; const double aa5 /* = 0.01735275722383019335276 */ ; const double aa4 /* = 0.02237215928757573539490 */ ; const double aa3 /* = 0.03038194444121688698408 */ ; const double aa2 /* = 0.04464285714288482921087 */ ; const double aa1 /* = 0.07499999999999990640378 */ ; const double aa0 /* = 0.1666666666666666667191 */ ; } kBlock = { /* static const double k2ToM26 = */ 1.490116119384765625e-8, // 0x1.0p-26, /* static const double kPiBy2 = */ 1.570796326794896619231322, // 0x1.921fb54442d18p0 /* static const double kPiBy2Tail = */ 6.1232339957367660e-17, // 0x1.1a62633145c07p-54 /* static const double kMinNormal = */ 2.2250738585072014e-308, // 0x1.0p-1022 /* const static double a14 = */ 0.03085091303188211304259, /* const static double a13 = */-0.02425125216179617744614, /* const static double a12 = */ 0.02273334623573271023373, /* const static double a11 = */ 0.0002983797813053653120360, /* const static double a10 = */ 0.008819738782817955152102, /* const static double a9 = */ 0.008130738583790024803650, /* const static double a8 = */ 0.009793486386035222577675, /* const static double a7 = */ 0.01154901189590083099584, /* const static double a6 = */ 0.01396501522140471126730, /* const static double a5 = */ 0.01735275722383019335276, /* const static double a4 = */ 0.02237215928757573539490, /* const static double a3 = */ 0.03038194444121688698408, /* const static double a2 = */ 0.04464285714288482921087, /* const static double a1 = */ 0.07499999999999990640378, /* const static double a0 = */ 0.1666666666666666667191, /* const static double aa11 = */ 0.0002983797813053653120360, /* const static double aa10 = */ 0.008819738782817955152102, /* const static double aa9 = */ 0.008130738583790024803650, /* const static double aa8 = */ 0.009793486386035222577675, /* const static double aa7 = */ 0.01154901189590083099584, /* const static double aa6 = */ 0.01396501522140471126730, /* const static double aa5 = */ 0.01735275722383019335276, /* const static double aa4 = */ 0.02237215928757573539490, /* const static double aa3 = */ 0.03038194444121688698408, /* const static double aa2 = */ 0.04464285714288482921087, /* const static double aa1 = */ 0.07499999999999990640378, /* const static double aa0 = */ 0.1666666666666666667191 }; double asin (double x) { hexdouble cD, yD ,gD , fpenv; double absx, x2, x3, x4, temp1, temp2, u, v; double g, y, d, y2, e; register int index; register unsigned long ghead, yhead, rnddir; register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_k2ToM26, FPR_cD, FPR_yD, FPR_kMinNormal, FPR_kPiBy2, FPR_kPiBy2Tail; register struct kBlock *pK; pK = &kBlock; FPR_z = 0.0; FPR_half = 0.5; FPR_k2ToM26 = pK->k2ToM26; FPR_kMinNormal = pK->kMinNormal; FPR_kPiBy2 = pK->kPiBy2; FPR_one = 1.0; if (x != x) // NaN argument return ( copysign ( nan ( INVERSE_TRIGONOMETRIC_NAN ), x ) ); absx = __FABS( x ); FEGETENVD ( FPR_env ); // save env, set default __ENSURE( FPR_z, FPR_half, FPR_k2ToM26 ); __ENSURE( FPR_one, FPR_kMinNormal, FPR_kPiBy2 ); // ensure register loads FPR_cD = __FNMSUB( FPR_half, absx, FPR_half ); FESETENVD ( FPR_z ); x2 = __FMUL( x, x ); // under/overflows are masked cD.d = FPR_cD; if ( absx <= FPR_half ) { // |x| <= 0.5 if ( absx < FPR_k2ToM26 ) { // |x| < 2^(-26) if ( absx == FPR_z ) { // x == 0.0 case is exact FESETENVD ( FPR_env ); return ( x ); } fpenv.d = FPR_env; rnddir = fpenv.i.lo & 0x3ul; // caller's rounding direction FPR_yD = x; // Rounding away from zero: return NextDouble(x,x+x) if (((rnddir == 3ul) && (x < FPR_z)) || ((rnddir == 2) && (x > FPR_z))) { yD.d = FPR_yD; __ORI_NOOP; __ORI_NOOP; __ORI_NOOP; yD.i.lo += 1ul; if (yD.i.lo == 0ul) yD.i.hi += 1ul; FPR_yD = yD.d; absx = __FABS( FPR_yD ); } FESETENVD ( FPR_env ); // restore caller environment __PROG_INEXACT( FPR_kPiBy2 ); // Set UNDERFLOW if result is subnormal if (absx < FPR_kMinNormal) // subnormal case __PROG_UF_INEXACT( FPR_kMinNormal ); // set UNDERFLOW return ( FPR_yD ); // return small result } // [|x| < 2^(-26)] { register double r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; register double r10, r11, r12, r13, r14; r14 = pK->a14; r13 = pK->a13; temp1 = __FMADD( r14, x2, r13 ); x4 = __FMUL( x2, x2 ); x3 = __FMUL( x, x2 ); __ORI_NOOP; r11 = pK->a11; r12 = pK->a12; r9 = pK->a9; r10 = pK->a10; temp1 = __FMADD( temp1, x4, r11 ); __ORI_NOOP; temp1 = __FMADD( temp1, x4, r9 ); temp2 = __FMADD( r12, x4, r10 ); r7 = pK->a7; r8 = pK->a8; r5 = pK->a5; r6 = pK->a6; temp1 = __FMADD( temp1, x4, r7 ); temp2 = __FMADD( temp2, x4, r8 ); temp1 = __FMADD( temp1, x4, r5 ); temp2 = __FMADD( temp2, x4, r6 ); r3 = pK->a3; r4 = pK->a4; r1 = pK->a1; r2 = pK->a2; temp1 = __FMADD( temp1, x4, r3 ); temp2 = __FMADD( temp2, x4, r4 ); temp1 = __FMADD( temp1, x4, r1 ); temp2 = __FMADD( temp2, x4, r2 ); r0 = pK->a0; __ORI_NOOP; temp2 = __FMADD( temp2, x2, temp1 ); temp1 = __FMADD( x2, temp2, r0 ); } FESETENVD ( FPR_env ); // restore caller's mode __PROG_INEXACT( FPR_kPiBy2 ); return (__FMADD( x3, temp1, x )); } // [|x| <= 0.5] /**************************************************************************** For 0.5 < |x| < 1.0, a polynomial approximation is used to estimate the power series resulting from the identity: ArcSin(x) = pi/2 - 2*ArcSin(Sqrt(0.5(1.0 - x))). The square root is evaluated in-line and concurrently with the evaluation of the ArcSin. ***************************************************************************/ if (absx < FPR_one) { // |x| < 1.0 register double FPR_gD, FPR_two; register double r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; register double r10, r11, r12, r13, r14; register unsigned long GPR_p, GPR_q, GPR_r, GPR_s, GPR_t; x2 = FPR_cD; /* fmr */ x4 = __FMUL( FPR_cD, FPR_cD); // Was x2*x2; GPR_t = cD.i.hi; r0 = pK->aa0; GPR_s = GPR_t + 0x3ff00000ul; __ORI_NOOP; r14 = pK->a14; r13 = pK->a13; r12 = pK->a12; r11 = pK->a11; gD.i.lo = 0ul; index = (GPR_t >> 13) & 0xfful; GPR_p = SqrtTable[index]; // 3 instrs issue temp1 = __FMADD( r13, x4, r11 ); temp2 = __FMADD( r14, x4, r12 ); r10 = pK->a10; r9 = pK->a9; GPR_r = ((0xff00ul & GPR_p) << 4); ghead = (GPR_s >> 1) & 0x7ff00000ul; gD.i.hi = ghead + GPR_r; r8 = pK->a8; r7 = pK->aa7; yhead = 0x7fc00000ul - ghead; yD.i.lo = 0ul; GPR_q = ((0xfful & GPR_p) << 12); yD.i.hi = yhead + GPR_q; r6 = pK->aa6; r5 = pK->aa5; temp1 = __FMADD( temp1, x4, r9 ); temp2 = __FMADD( temp2, x4, r10 ); temp1 = __FMADD( temp1, x4, r7 ); temp2 = __FMADD( temp2, x4, r8 ); r4 = pK->aa4; r3 = pK->aa3; r2 = pK->aa2; r1 = pK->aa1; temp1 = __FMADD( temp1, x4, r5 ); temp2 = __FMADD( temp2, x4, r6 ); temp1 = __FMADD( temp1, x4, r3 ); temp2 = __FMADD( temp2, x4, r4 ); FPR_two = 2.0; FPR_gD = gD.d; y = yD.d; temp1 = __FMADD( temp1, x4, r1 ); temp2 = __FMADD( temp2, x4, r2 ); d = __FNMSUB( FPR_gD, FPR_gD, FPR_cD ); FPR_kPiBy2Tail = pK->kPiBy2Tail; g = __FMADD( y, d, FPR_gD ); y2 = y + y; // 16-bit g e = __FNMSUB( y, g, FPR_half ); d = __FNMSUB( g, g, FPR_cD ); y = __FMADD( e, y2, y); temp1 = __FMADD( x2, temp2, temp1 ); // 16-bit y g = __FMADD( y, d, g ); y2 = y + y; // 32-bit g e = __FNMSUB( y, g, FPR_half ); d = __FNMSUB( g, g, FPR_cD ); y = __FMADD( e, y2, y ); temp1 = __FMADD( x2, temp1, r0 ); // 32-bit y g = __FMADD( y, d, g); // 64-bit Sqrt d = __FNMSUB( g, g, FPR_cD ); u = __FNMSUB( FPR_two, g, FPR_kPiBy2 ); // pi/2 - 2.0*Sqrt (head) d = __FMUL( d, y ); v = __FNMSUB( FPR_two, g, (FPR_kPiBy2 - u) );// tail of 0.5*Sqrt (32 bits) g = __FMUL( g, x2 ); d = __FNMSUB( FPR_half, FPR_kPiBy2Tail, d ); temp2 = __FNMSUB( FPR_half, v, d ); temp2 = __FMADD( g, temp1, temp2 ); FESETENVD ( FPR_env ); // restore caller's mode __PROG_INEXACT( FPR_kPiBy2 ); if (x > FPR_z) { // take care of sign of result return __FNMSUB( FPR_two, temp2, u ); } else return __FMSUB( FPR_two, temp2, u ); } // [|x| < 1.0] // If |x| == 1.0, return properly signed pi/2. if (absx == FPR_one) { FPR_kPiBy2Tail = pK->kPiBy2Tail; FESETENVD (FPR_env); // restore caller's mode if (x > FPR_z) return __FMADD( x, FPR_kPiBy2Tail, FPR_kPiBy2 ); else return __FMSUB( x, FPR_kPiBy2Tail, FPR_kPiBy2 ); } // |x| > 1.0 signals INVALID and returns a NaN fpenv.d = FPR_env; fpenv.i.lo |= SET_INVALID; FESETENVD (fpenv.d); return ( nan ( INVERSE_TRIGONOMETRIC_NAN ) ); } // ArcSin(x) #endif #ifdef notdef float asinf( float x ) { return (float) asin ( x ); } #endif /**************************************************************************** FUNCTION: double ArcCos(double x) DESCRIPTION: returns the inverse cosine of its argument in the range of 0 to pi (radians). CALLS: __FABS EXCEPTIONS raised: INEXACT or INVALID. ACCURACY: Error is less than an ulp and usually less than half an ulp. Caller's rounding direction is honored. ****************************************************************************/ #ifdef notdef double acos(double x) { hexdouble cD, yD, gD, fpenv; double absx, x2, x3, x4, temp1, temp2, s, u, v, w; double g, y, d, y2, e; int index; unsigned long ghead,yhead; if (x != x) // NaN return ( copysign ( nan ( INVERSE_TRIGONOMETRIC_NAN ), x ) ); absx = __FABS(x); FEGETENVD (fpenv.d); // save env, set default FESETENVD (0.0); cD.d = 0.5 - 0.5*absx; x2 = x*x; if (absx <= 0.5) { // |x| <= 0.5 temp1 = a14*x2 + a13; x4 = x2*x2; fpenv.i.lo |= FE_INEXACT; // INEXACT result temp1 = temp1*x4 + a11; temp2 = a12*x4 + a10; temp1 = temp1*x4 + a9; temp2 = temp2*x4 + a8; temp1 = temp1*x4 + a7; temp2 = temp2*x4 + a6; temp1 = temp1*x4 + a5; temp2 = temp2*x4 + a4; temp1 = temp1*x4 + a3; temp2 = temp2*x4 + a2; temp1 = temp1*x4 + a1; // finish up ArcCos(x) = pi/2 - ArcSin(x) u = kPiBy2 - x; // pi/2 - x (high-order term) temp1 = temp1 + x2*temp2; v = kPiBy2 - u - x; // pi/2 - x (tail) x3 = x*x2; temp1 = a0 + x2*temp1; w = (v + kPiBy2Tail) - x3*temp1; // combine low order terms FESETENVD (fpenv.d); // restore caller's mode return (u+w); } // [|x| <= 0.5] /**************************************************************************** For 0.5 < |x| < 1.0, a polynomial approximation is used to estimate the power series resulting from the identity: ArcSin(x) = pi/2 - 2*ArcSin(Sqrt(0.5(1.0 - x))). The square root is evaluated in-line and concurrently with the evaluation of the ArcSin. ****************************************************************************/ if (absx < 1.0) { // 0.5 < |x| < 1.0 x2 = cD.d; // 0.5*(1.0 - |x|) x4 = x2*x2; fpenv.i.lo |= FE_INEXACT; // INEXACT result temp2 = a14*x4 + a12; temp1 = a13*x4 + aa11; temp2 = temp2*x4 + aa10; temp1 = temp1*x4 + aa9; temp2 = temp2*x4 + aa8; temp1 = temp1*x4 + aa7; temp2 = temp2*x4 + aa6; temp1 = temp1*x4 + aa5; temp2 = temp2*x4 + aa4; temp1 = temp1*x4 + aa3; temp2 = temp2*x4 + aa2; temp1 = temp1*x4 + aa1; ghead = ((cD.i.hi + 0x3ff00000ul) >> 1) & 0x7ff00000ul; index = (cD.i.hi >> 13) & 0xfful; yhead = 0x7fc00000ul - ghead; temp1 = temp1 + x2*temp2; // collect polynomial terms gD.i.hi = ghead + ((0xff00ul & SqrtTable[index]) << 4); gD.i.lo = 0ul; yD.i.hi = yhead + ((0xfful & SqrtTable[index]) << 12); yD.i.lo = 0ul; d = x2 - gD.d*gD.d; y = yD.d; // initial 0.5/Sqrt guess g = gD.d + y*d; // 16-bit g y2 = y + y; e = 0.5 - y*g; d = x2 - g*g; y = y + e*y2; // 16-bit y g = g + y*d; // 32-bit g y2 = y + y; e = 0.5 - y*g; d = x2 - g*g; y = y + e*y2; // 32-bit y g = g + y*d; // 64-bit Sqrt d = (x2 - g*g)*y; // tail of 0.5*Sqrt (32 bits) y2 = g + g; // 2*Sqrt if (x > 0.0) { // positive x s = (g*x2)*(aa0 + x2*temp1) + d; // low order terms FESETENVD (fpenv.d); // restore caller's mode return (y2 + 2.0*s); // combine terms } // [positive x] else { // negative x u = 2.0*kPiBy2 - y2; // high-order term v = 2.0*kPiBy2 - u - y2; // tail correction s = (g*x2)*(aa0 + x2*temp1) + ((d - kPiBy2Tail) - 0.5*v); // collect low-order terms FESETENVD (fpenv.d); // restore caller's mode s = -s; return (u + 2.0*s); // combine terms } // [negative x] } // [0.5 < |x| < 1.0] // |x| == 1.0: return exact +0.0 or inexact pi if (x == 1.0) { FESETENVD (fpenv.d); // restore caller's mode return (0.0); } if (x == -1.0) { fpenv.i.lo |= FE_INEXACT; FESETENVD (fpenv.d); // restore caller's mode return (kPiu.d + 2.0*kPiBy2Tail); } // |x| > 1.0 signals INVALID and returns a NaN fpenv.i.lo |= SET_INVALID; FESETENVD (fpenv.d); return ( nan ( INVERSE_TRIGONOMETRIC_NAN ) ); } #else double acos(double x) { hexdouble cD, yD, gD, fpenv; double absx, x2, x3, x4, temp1, temp2, s, u, v, w; double g, y, d, y2, e; int index; unsigned long ghead,yhead; register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_k2ToM26, FPR_cD, FPR_kMinNormal, FPR_kPiBy2, FPR_kPiBy2Tail; register struct kBlock *pK; pK = &kBlock; FPR_z = 0.0; FPR_half = 0.5; FPR_k2ToM26 = pK->k2ToM26; FPR_kMinNormal = pK->kMinNormal; FPR_kPiBy2 = pK->kPiBy2; FPR_one = 1.0; if (x != x) // NaN return ( copysign ( nan ( INVERSE_TRIGONOMETRIC_NAN ), x ) ); absx = __FABS(x); FEGETENVD ( FPR_env ); // save env, set default __ENSURE( FPR_z, FPR_half, FPR_k2ToM26 ); __ENSURE( FPR_one, FPR_kMinNormal, FPR_kPiBy2 ); // ensure register loads FPR_cD = __FNMSUB( FPR_half, absx, FPR_half ); FESETENVD ( FPR_z ); x2 = __FMUL( x, x ); // under/overflows are masked cD.d = FPR_cD; if ( absx <= FPR_half ) { // |x| <= 0.5 register double r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; register double r10, r11, r12, r13, r14; r14 = pK->a14; r13 = pK->a13; temp1 = __FMADD( r14, x2, r13 ); x4 = __FMUL( x2, x2 ); x3 = __FMUL( x, x2 ); u = FPR_kPiBy2 - x; // pi/2 - x (high-order term) r11 = pK->a11; r12 = pK->a12; r9 = pK->a9; r10 = pK->a10; temp1 = __FMADD( temp1, x4, r11 ); __ORI_NOOP; temp1 = __FMADD( temp1, x4, r9 ); temp2 = __FMADD( r12, x4, r10 ); r7 = pK->a7; r8 = pK->a8; r5 = pK->a5; r6 = pK->a6; temp1 = __FMADD( temp1, x4, r7 ); temp2 = __FMADD( temp2, x4, r8 ); temp1 = __FMADD( temp1, x4, r5 ); temp2 = __FMADD( temp2, x4, r6 ); r3 = pK->a3; r4 = pK->a4; r1 = pK->a1; r2 = pK->a2; temp1 = __FMADD( temp1, x4, r3 ); temp2 = __FMADD( temp2, x4, r4 ); temp1 = __FMADD( temp1, x4, r1 ); temp2 = __FMADD( temp2, x4, r2 ); r0 = pK->a0; FPR_kPiBy2Tail = pK->kPiBy2Tail; // finish up ArcCos(x) = pi/2 - ArcSin(x) temp1 = __FMADD( x2, temp2, temp1 ); v = FPR_kPiBy2 - u - x; // pi/2 - x (tail) temp1 = __FMADD( x2, temp1, a0 ); w = __FNMSUB( x3, temp1, (v + FPR_kPiBy2Tail) );// combine low order terms FESETENVD ( FPR_env ); // restore caller's mode __PROG_INEXACT( FPR_kPiBy2 ); return ( u + w ); } // [|x| <= 0.5] /**************************************************************************** For 0.5 < |x| < 1.0, a polynomial approximation is used to estimate the power series resulting from the identity: ArcSin(x) = pi/2 - 2*ArcSin(Sqrt(0.5(1.0 - x))). The square root is evaluated in-line and concurrently with the evaluation of the ArcSin. ****************************************************************************/ if ( absx < FPR_one ) { // 0.5 < |x| < 1.0 register double FPR_gD, FPR_two; register double r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; register double r10, r11, r12, r13, r14; register unsigned long GPR_p, GPR_q, GPR_r, GPR_s, GPR_t; x2 = FPR_cD; /* fmr */ x4 = __FMUL( x2, x2); GPR_t = cD.i.hi; r0 = pK->aa0; GPR_s = GPR_t + 0x3ff00000ul; __ORI_NOOP; r14 = pK->a14; r13 = pK->a13; r12 = pK->a12; r11 = pK->aa11; gD.i.lo = 0ul; index = (GPR_t >> 13) & 0xfful; GPR_p = SqrtTable[index]; // 3 instrs issue temp1 = __FMADD( r13, x4, r11 ); temp2 = __FMADD( r14, x4, r12 ); r10 = pK->aa10; r9 = pK->aa9; GPR_r = ((0xff00ul & GPR_p) << 4); ghead = (GPR_s >> 1) & 0x7ff00000ul; gD.i.hi = ghead + GPR_r; r8 = pK->aa8; r7 = pK->aa7; yhead = 0x7fc00000ul - ghead; yD.i.lo = 0ul; GPR_q = ((0xfful & GPR_p) << 12); yD.i.hi = yhead + GPR_q; r6 = pK->aa6; r5 = pK->aa5; temp1 = __FMADD( temp1, x4, r9 ); temp2 = __FMADD( temp2, x4, r10 ); temp1 = __FMADD( temp1, x4, r7 ); temp2 = __FMADD( temp2, x4, r8 ); r4 = pK->aa4; r3 = pK->aa3; r2 = pK->aa2; r1 = pK->aa1; temp1 = __FMADD( temp1, x4, r5 ); temp2 = __FMADD( temp2, x4, r6 ); temp1 = __FMADD( temp1, x4, r3 ); temp2 = __FMADD( temp2, x4, r4 ); FPR_two = 2.0; FPR_gD = gD.d; y = yD.d; temp1 = __FMADD( temp1, x4, r1 ); temp2 = __FMADD( temp2, x4, r2 ); d = __FNMSUB( FPR_gD, FPR_gD, x2 ); g = __FMADD( y, d, FPR_gD ); y2 = y + y; // 16-bit g e = __FNMSUB( y, g, FPR_half ); d = __FNMSUB( g, g, x2 ); y = __FMADD( e, y2, y); temp1 = __FMADD( x2, temp2, temp1 );// 16-bit y g = __FMADD( y, d, g ); y2 = y + y; // 32-bit g e = __FNMSUB( y, g, FPR_half ); d = __FNMSUB( g, g, x2 ); y = __FMADD( e, y2, y ); // 32-bit y g = __FMADD( y, d, g); // 64-bit Sqrt d = __FNMSUB( g, g, x2 ); // tail of 0.5*Sqrt (32 bits) y2 = g + g; // 2*Sqrt if (x > FPR_z) { // positive x s = __FMADD( __FMUL( g, x2 ), __FMADD( x2, temp1, aa0), d );// low order terms FESETENVD ( FPR_env ); // restore caller's mode __PROG_INEXACT( FPR_kPiBy2 ); return __FMADD( FPR_two, s, y2); // combine terms } // [positive x] else { // negative x register double q,r,ss,t; u = __FMSUB( FPR_two, FPR_kPiBy2, y2 ); // high-order term r = d - kPiBy2Tail; ss = __FMADD( x2, temp1, aa0 ); t = __FMUL( g, x2 ); v = __FMSUB( FPR_two, FPR_kPiBy2, u ) - y2; // tail correction q = __FNMSUB( FPR_half, v, r ); s = __FMADD( ss, t, q ); // collect low-order terms FESETENVD ( FPR_env ); // restore caller's mode __PROG_INEXACT( FPR_kPiBy2 ); return __FNMSUB( FPR_two, s, u); // combine terms } // [negative x] } // [0.5 < |x| < 1.0] // |x| == 1.0: return exact +0.0 or inexact pi if ( x == FPR_one ) { FESETENVD ( FPR_env ); // restore caller's mode return ( FPR_z ); } if ( x == -1.0 ) { FESETENVD ( FPR_env ); // restore caller's mode __PROG_INEXACT( FPR_kPiBy2 ); return (kPiu.d + 2.0*kPiBy2Tail); } // |x| > 1.0 signals INVALID and returns a NaN fpenv.d = FPR_env; fpenv.i.lo |= SET_INVALID; FESETENVD ( fpenv.d ); return ( nan ( INVERSE_TRIGONOMETRIC_NAN ) ); } // ArcCos(x) #endif #ifdef notdef float acosf( float x ) { return (float) acos ( x ); } #endif #else /* __APPLE_CC__ version */ #warning A higher version than gcc-932 is required. #endif /* __APPLE_CC__ version */ #endif /* __APPLE_CC__ */