/* * fpu/mathlib.h - Floating-point math support library * * Basilisk II (C) 1997-2005 Christian Bauer * * MC68881/68040 fpu emulation * * Original UAE FPU, copyright 1996 Herman ten Brugge * Rewrite for x86, copyright 1999-2001 Lauri Pesonen * New framework, copyright 2000-2001 Gwenole Beauchesne * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FPU_MATHLIB_H #define FPU_MATHLIB_H /* NOTE: this file shall be included only from fpu/fpu_*.cpp */ #undef PUBLIC #define PUBLIC extern #undef PRIVATE #define PRIVATE static #undef FFPU #define FFPU /**/ #undef FPU #define FPU fpu. // Define the following macro if branches are expensive. If so, // integer-based isnan() and isinf() functions are implemented. // TODO: move to Makefile.in #define BRANCHES_ARE_EXPENSIVE 1 // Use ISO C99 extended-precision math functions (glibc 2.1+) #define FPU_USE_ISO_C99 1 // NOTE: this is irrelevant on Win32 platforms since the MS libraries // don't support extended-precision floating-point computations #ifdef WIN32 #undef FPU_USE_ISO_C99 #endif // Use faster implementation of math functions, but this could cause // some incorrect results (?) // TODO: actually implement the slower but safer versions #define FPU_FAST_MATH 1 #if FPU_USE_ISO_C99 // NOTE: no prior shall be included at this point #define __USE_ISOC99 1 // for glibc 2.2.X and newer #define __USE_ISOC9X 1 // for glibc 2.1.X #include #else #include using namespace std; #endif /* -------------------------------------------------------------------------- */ /* --- Floating-point register types --- */ /* -------------------------------------------------------------------------- */ // Single : S 8*E 23*F #define FP_SINGLE_EXP_MAX 0xff #define FP_SINGLE_EXP_BIAS 0x7f // Double : S 11*E 52*F #define FP_DOUBLE_EXP_MAX 0x7ff #define FP_DOUBLE_EXP_BIAS 0x3ff // Extended : S 15*E 64*F #define FP_EXTENDED_EXP_MAX 0x7fff #define FP_EXTENDED_EXP_BIAS 0x3fff // Zeroes : E = 0 & F = 0 // Infinities : E = MAX & F = 0 // Not-A-Number : E = MAX & F # 0 /* -------------------------------------------------------------------------- */ /* --- Floating-point type shapes (IEEE-compliant) --- */ /* -------------------------------------------------------------------------- */ // Taken from glibc 2.2.x: ieee754.h // IEEE-754 float format union fpu_single_shape { fpu_single value; /* This is the IEEE 754 single-precision format. */ struct { #ifdef WORDS_BIGENDIAN unsigned int negative:1; unsigned int exponent:8; unsigned int mantissa:23; #else unsigned int mantissa:23; unsigned int exponent:8; unsigned int negative:1; #endif } ieee; /* This format makes it easier to see if a NaN is a signalling NaN. */ struct { #ifdef WORDS_BIGENDIAN unsigned int negative:1; unsigned int exponent:8; unsigned int quiet_nan:1; unsigned int mantissa:22; #else unsigned int mantissa:22; unsigned int quiet_nan:1; unsigned int exponent:8; unsigned int negative:1; #endif } ieee_nan; }; // IEEE-754 double format union fpu_double_shape { fpu_double value; /* This is the IEEE 754 double-precision format. */ struct { #ifdef WORDS_BIGENDIAN unsigned int negative:1; unsigned int exponent:11; /* Together these comprise the mantissa. */ unsigned int mantissa0:20; unsigned int mantissa1:32; #else # if HOST_FLOAT_WORDS_BIG_ENDIAN unsigned int mantissa0:20; unsigned int exponent:11; unsigned int negative:1; unsigned int mantissa1:32; # else /* Together these comprise the mantissa. */ unsigned int mantissa1:32; unsigned int mantissa0:20; unsigned int exponent:11; unsigned int negative:1; # endif #endif } ieee; /* This format makes it easier to see if a NaN is a signalling NaN. */ struct { #ifdef WORDS_BIGENDIAN unsigned int negative:1; unsigned int exponent:11; unsigned int quiet_nan:1; /* Together these comprise the mantissa. */ unsigned int mantissa0:19; unsigned int mantissa1:32; #else # if HOST_FLOAT_WORDS_BIG_ENDIAN unsigned int mantissa0:19; unsigned int quiet_nan:1; unsigned int exponent:11; unsigned int negative:1; unsigned int mantissa1:32; # else /* Together these comprise the mantissa. */ unsigned int mantissa1:32; unsigned int mantissa0:19; unsigned int quiet_nan:1; unsigned int exponent:11; unsigned int negative:1; # endif #endif } ieee_nan; /* This format is used to extract the sign_exponent and mantissa parts only */ struct { #if HOST_FLOAT_WORDS_BIG_ENDIAN unsigned int msw:32; unsigned int lsw:32; #else unsigned int lsw:32; unsigned int msw:32; #endif } parts; }; #ifdef USE_LONG_DOUBLE // IEEE-854 long double format union fpu_extended_shape { fpu_extended value; /* This is the IEEE 854 double-extended-precision format. */ struct { #ifdef WORDS_BIGENDIAN unsigned int negative:1; unsigned int exponent:15; unsigned int empty:16; unsigned int mantissa0:32; unsigned int mantissa1:32; #else # if HOST_FLOAT_WORDS_BIG_ENDIAN unsigned int exponent:15; unsigned int negative:1; unsigned int empty:16; unsigned int mantissa0:32; unsigned int mantissa1:32; # else unsigned int mantissa1:32; unsigned int mantissa0:32; unsigned int exponent:15; unsigned int negative:1; unsigned int empty:16; # endif #endif } ieee; /* This is for NaNs in the IEEE 854 double-extended-precision format. */ struct { #ifdef WORDS_BIGENDIAN unsigned int negative:1; unsigned int exponent:15; unsigned int empty:16; unsigned int one:1; unsigned int quiet_nan:1; unsigned int mantissa0:30; unsigned int mantissa1:32; #else # if HOST_FLOAT_WORDS_BIG_ENDIAN unsigned int exponent:15; unsigned int negative:1; unsigned int empty:16; unsigned int mantissa0:30; unsigned int quiet_nan:1; unsigned int one:1; unsigned int mantissa1:32; # else unsigned int mantissa1:32; unsigned int mantissa0:30; unsigned int quiet_nan:1; unsigned int one:1; unsigned int exponent:15; unsigned int negative:1; unsigned int empty:16; # endif #endif } ieee_nan; /* This format is used to extract the sign_exponent and mantissa parts only */ struct { #if HOST_FLOAT_WORDS_BIG_ENDIAN unsigned int sign_exponent:16; unsigned int empty:16; unsigned int msw:32; unsigned int lsw:32; #else unsigned int lsw:32; unsigned int msw:32; unsigned int sign_exponent:16; unsigned int empty:16; #endif } parts; }; #endif #ifdef USE_QUAD_DOUBLE // IEEE-854 quad double format union fpu_extended_shape { fpu_extended value; /* This is the IEEE 854 quad-precision format. */ struct { #ifdef WORDS_BIGENDIAN unsigned int negative:1; unsigned int exponent:15; unsigned int mantissa0:16; unsigned int mantissa1:32; unsigned int mantissa2:32; unsigned int mantissa3:32; #else unsigned int mantissa3:32; unsigned int mantissa2:32; unsigned int mantissa1:32; unsigned int mantissa0:16; unsigned int exponent:15; unsigned int negative:1; #endif } ieee; /* This is for NaNs in the IEEE 854 quad-precision format. */ struct { #ifdef WORDS_BIGENDIAN unsigned int negative:1; unsigned int exponent:15; unsigned int quiet_nan:1; unsigned int mantissa0:15; unsigned int mantissa1:30; unsigned int mantissa2:32; unsigned int mantissa3:32; #else unsigned int mantissa3:32; unsigned int mantissa2:32; unsigned int mantissa1:32; unsigned int mantissa0:15; unsigned int quiet_nan:1; unsigned int exponent:15; unsigned int negative:1; #endif } ieee_nan; /* This format is used to extract the sign_exponent and mantissa parts only */ #if HOST_FLOAT_WORDS_BIG_ENDIAN struct { uae_u64 msw; uae_u64 lsw; } parts64; struct { uae_u32 w0; uae_u32 w1; uae_u32 w2; uae_u32 w3; } parts32; #else struct { uae_u64 lsw; uae_u64 msw; } parts64; struct { uae_u32 w3; uae_u32 w2; uae_u32 w1; uae_u32 w0; } parts32; #endif }; #endif // Declare and initialize a pointer to a shape of the requested FP type #define fp_declare_init_shape(psvar, rfvar, ftype) \ fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar ) /* -------------------------------------------------------------------------- */ /* --- Extra Math Functions --- */ /* --- (most of them had to be defined before including ) --- */ /* -------------------------------------------------------------------------- */ #undef isnan #if 0 && defined(HAVE_ISNANL) # define isnan(x) isnanl((x)) #else # define isnan(x) fp_do_isnan((x)) #endif PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r) { #ifdef BRANCHES_ARE_EXPENSIVE #ifndef USE_LONG_DOUBLE fp_declare_init_shape(sxp, r, double); uae_s32 hx = sxp->parts.msw; uae_s32 lx = sxp->parts.lsw; hx &= 0x7fffffff; hx |= (uae_u32)(lx | (-lx)) >> 31; hx = 0x7ff00000 - hx; return (int)(((uae_u32)hx) >> 31); #elif USE_QUAD_DOUBLE fp_declare_init_shape(sxp, r, extended); uae_s64 hx = sxp->parts64.msw; uae_s64 lx = sxp->parts64.lsw; hx &= 0x7fffffffffffffffLL; hx |= (uae_u64)(lx | (-lx)) >> 63; hx = 0x7fff000000000000LL - hx; return (int)((uae_u64)hx >> 63); #else fp_declare_init_shape(sxp, r, extended); uae_s32 se = sxp->parts.sign_exponent; uae_s32 hx = sxp->parts.msw; uae_s32 lx = sxp->parts.lsw; se = (se & 0x7fff) << 1; lx |= hx & 0x7fffffff; se |= (uae_u32)(lx | (-lx)) >> 31; se = 0xfffe - se; // TODO: check whether rshift count is 16 or 31 return (int)(((uae_u32)(se)) >> 16); #endif #else #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE fp_declare_init_shape(sxp, r, extended); return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX) #else fp_declare_init_shape(sxp, r, double); return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX) #endif && (sxp->ieee_nan.mantissa0 != 0) && (sxp->ieee_nan.mantissa1 != 0) #ifdef USE_QUAD_DOUBLE && (sxp->ieee_nan.mantissa2 != 0) && (sxp->ieee_nan.mantissa3 != 0) #endif ; #endif } #undef isinf #if 0 && defined(HAVE_ISINFL) # define isinf(x) isinfl((x)) #else # define isinf(x) fp_do_isinf((x)) #endif PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r) { #ifdef BRANCHES_ARE_EXPENSIVE #ifndef USE_LONG_DOUBLE fp_declare_init_shape(sxp, r, double); uae_s32 hx = sxp->parts.msw; uae_s32 lx = sxp->parts.lsw; lx |= (hx & 0x7fffffff) ^ 0x7ff00000; lx |= -lx; return ~(lx >> 31) & (hx >> 30); #elif USE_QUAD_DOUBLE fp_declare_init_shape(sxp, r, extended); uae_s64 hx = sxp->parts64.msw; uae_s64 lx = sxp->parts64.lsw; lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL; lx |= -lx; return ~(lx >> 63) & (hx >> 62); #else fp_declare_init_shape(sxp, r, extended); uae_s32 se = sxp->parts.sign_exponent; uae_s32 hx = sxp->parts.msw; uae_s32 lx = sxp->parts.lsw; /* This additional ^ 0x80000000 is necessary because in Intel's internal representation of the implicit one is explicit. NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */ #ifdef __i386__ lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff); #else lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff); #endif lx |= -lx; se &= 0x8000; return ~(lx >> 31) & (1 - (se >> 14)); #endif #else #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE fp_declare_init_shape(sxp, r, extended); return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX) #else fp_declare_init_shape(sxp, r, double); return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX) #endif && (sxp->ieee_nan.mantissa0 == 0) && (sxp->ieee_nan.mantissa1 == 0) #ifdef USE_QUAD_DOUBLE && (sxp->ieee_nan.mantissa2 == 0) && (sxp->ieee_nan.mantissa3 == 0) #endif ; #endif } #undef isneg #define isneg(x) fp_do_isneg((x)) PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r) { #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE fp_declare_init_shape(sxp, r, extended); #else fp_declare_init_shape(sxp, r, double); #endif return sxp->ieee.negative; } #undef iszero #define iszero(x) fp_do_iszero((x)) PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r) { // TODO: BRANCHES_ARE_EXPENSIVE #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE fp_declare_init_shape(sxp, r, extended); #else fp_declare_init_shape(sxp, r, double); #endif return (sxp->ieee.exponent == 0) && (sxp->ieee.mantissa0 == 0) && (sxp->ieee.mantissa1 == 0) #ifdef USE_QUAD_DOUBLE && (sxp->ieee.mantissa2 == 0) && (sxp->ieee.mantissa3 == 0) #endif ; } PRIVATE inline void FFPU get_dest_flags(fpu_register const & r) { fl_dest.negative = isneg(r); fl_dest.zero = iszero(r); fl_dest.infinity = isinf(r); fl_dest.nan = isnan(r); fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan; } PRIVATE inline void FFPU get_source_flags(fpu_register const & r) { fl_source.negative = isneg(r); fl_source.zero = iszero(r); fl_source.infinity = isinf(r); fl_source.nan = isnan(r); fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan; } PRIVATE inline void FFPU make_nan(fpu_register & r) { // FIXME: is that correct ? #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE fp_declare_init_shape(sxp, r, extended); sxp->ieee.exponent = FP_EXTENDED_EXP_MAX; sxp->ieee.mantissa0 = 0xffffffff; #else fp_declare_init_shape(sxp, r, double); sxp->ieee.exponent = FP_DOUBLE_EXP_MAX; sxp->ieee.mantissa0 = 0xfffff; #endif sxp->ieee.mantissa1 = 0xffffffff; #ifdef USE_QUAD_DOUBLE sxp->ieee.mantissa2 = 0xffffffff; sxp->ieee.mantissa3 = 0xffffffff; #endif } PRIVATE inline void FFPU make_zero_positive(fpu_register & r) { #if 1 r = +0.0; #else #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE fp_declare_init_shape(sxp, r, extended); #else fp_declare_init_shape(sxp, r, double); #endif sxp->ieee.negative = 0; sxp->ieee.exponent = 0; sxp->ieee.mantissa0 = 0; sxp->ieee.mantissa1 = 0; #ifdef USE_QUAD_DOUBLE sxp->ieee.mantissa2 = 0; sxp->ieee.mantissa3 = 0; #endif #endif } PRIVATE inline void FFPU make_zero_negative(fpu_register & r) { #if 1 r = -0.0; #else #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE fp_declare_init_shape(sxp, r, extended); #else fp_declare_init_shape(sxp, r, double); #endif sxp->ieee.negative = 1; sxp->ieee.exponent = 0; sxp->ieee.mantissa0 = 0; sxp->ieee.mantissa1 = 0; #ifdef USE_QUAD_DOUBLE sxp->ieee.mantissa2 = 0; sxp->ieee.mantissa3 = 0; #endif #endif } PRIVATE inline void FFPU make_inf_positive(fpu_register & r) { #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE fp_declare_init_shape(sxp, r, extended); sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; #else fp_declare_init_shape(sxp, r, double); sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX; #endif sxp->ieee_nan.negative = 0; sxp->ieee_nan.mantissa0 = 0; sxp->ieee_nan.mantissa1 = 0; #ifdef USE_QUAD_DOUBLE sxp->ieee_nan.mantissa2 = 0; sxp->ieee_nan.mantissa3 = 0; #endif } PRIVATE inline void FFPU make_inf_negative(fpu_register & r) { #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE fp_declare_init_shape(sxp, r, extended); sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; #else fp_declare_init_shape(sxp, r, double); sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX; #endif sxp->ieee_nan.negative = 1; sxp->ieee_nan.mantissa0 = 0; sxp->ieee_nan.mantissa1 = 0; #ifdef USE_QUAD_DOUBLE sxp->ieee_nan.mantissa2 = 0; sxp->ieee_nan.mantissa3 = 0; #endif } PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r) { #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE fp_declare_init_shape(sxp, r, extended); return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS); #else fp_declare_init_shape(sxp, r, double); return (sxp->ieee.exponent - FP_DOUBLE_EXP_BIAS); #endif } // Normalize to range 1..2 PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r) { #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE fp_declare_init_shape(sxp, r, extended); sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS; #else fp_declare_init_shape(sxp, r, double); sxp->ieee.exponent = FP_DOUBLE_EXP_BIAS; #endif } // The sign of the quotient is the exclusive-OR of the sign bits // of the source and destination operands. PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb) { #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE fp_declare_init_shape(sap, ra, extended); fp_declare_init_shape(sbp, rb, extended); #else fp_declare_init_shape(sap, ra, double); fp_declare_init_shape(sbp, rb, double); #endif return ((sap->ieee.negative ^ sbp->ieee.negative) ? FPSR_QUOTIENT_SIGN : 0); } /* -------------------------------------------------------------------------- */ /* --- Math functions --- */ /* -------------------------------------------------------------------------- */ #if FPU_USE_ISO_C99 && (USE_LONG_DOUBLE || USE_QUAD_DOUBLE) # ifdef HAVE_LOGL # define fp_log logl # endif # ifdef HAVE_LOG10L # define fp_log10 log10l # endif # ifdef HAVE_EXPL # define fp_exp expl # endif # ifdef HAVE_POWL # define fp_pow powl # endif # ifdef HAVE_FABSL # define fp_fabs fabsl # endif # ifdef HAVE_SQRTL # define fp_sqrt sqrtl # endif # ifdef HAVE_SINL # define fp_sin sinl # endif # ifdef HAVE_COSL # define fp_cos cosl # endif # ifdef HAVE_TANL # define fp_tan tanl # endif # ifdef HAVE_SINHL # define fp_sinh sinhl # endif # ifdef HAVE_COSHL # define fp_cosh coshl # endif # ifdef HAVE_TANHL # define fp_tanh tanhl # endif # ifdef HAVE_ASINL # define fp_asin asinl # endif # ifdef HAVE_ACOSL # define fp_acos acosl # endif # ifdef HAVE_ATANL # define fp_atan atanl # endif # ifdef HAVE_ASINHL # define fp_asinh asinhl # endif # ifdef HAVE_ACOSHL # define fp_acosh acoshl # endif # ifdef HAVE_ATANHL # define fp_atanh atanhl # endif # ifdef HAVE_FLOORL # define fp_floor floorl # endif # ifdef HAVE_CEILL # define fp_ceil ceill # endif #endif #ifndef fp_log # define fp_log log #endif #ifndef fp_log10 # define fp_log10 log10 #endif #ifndef fp_exp # define fp_exp exp #endif #ifndef fp_pow # define fp_pow pow #endif #ifndef fp_fabs # define fp_fabs fabs #endif #ifndef fp_sqrt # define fp_sqrt sqrt #endif #ifndef fp_sin # define fp_sin sin #endif #ifndef fp_cos # define fp_cos cos #endif #ifndef fp_tan # define fp_tan tan #endif #ifndef fp_sinh # define fp_sinh sinh #endif #ifndef fp_cosh # define fp_cosh cosh #endif #ifndef fp_tanh # define fp_tanh tanh #endif #ifndef fp_asin # define fp_asin asin #endif #ifndef fp_acos # define fp_acos acos #endif #ifndef fp_atan # define fp_atan atan #endif #ifndef fp_asinh # define fp_asinh asinh #endif #ifndef fp_acosh # define fp_acosh acosh #endif #ifndef fp_atanh # define fp_atanh atanh #endif #ifndef fp_floor # define fp_floor floor #endif #ifndef fp_ceil # define fp_ceil ceil #endif #if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY) // Assembly optimized support functions. Taken from glibc 2.2.2 #undef fp_log #define fp_log fp_do_log #ifndef FPU_FAST_MATH // FIXME: unimplemented PRIVATE fpu_extended fp_do_log(fpu_extended x); #else PRIVATE inline fpu_extended fp_do_log(fpu_extended x) { fpu_extended value; __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); return value; } #endif #undef fp_log10 #define fp_log10 fp_do_log10 #ifndef FPU_FAST_MATH // FIXME: unimplemented PRIVATE fpu_extended fp_do_log10(fpu_extended x); #else PRIVATE inline fpu_extended fp_do_log10(fpu_extended x) { fpu_extended value; __asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); return value; } #endif #undef fp_exp #define fp_exp fp_do_exp #ifndef FPU_FAST_MATH // FIXME: unimplemented PRIVATE fpu_extended fp_do_exp(fpu_extended x); #else PRIVATE inline fpu_extended fp_do_exp(fpu_extended x) { fpu_extended value, exponent; __asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t" "fmul %%st(1) # x * log2(e)\n\t" "fst %%st(1)\n\t" "frndint # int(x * log2(e))\n\t" "fxch\n\t" "fsub %%st(1) # fract(x * log2(e))\n\t" "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" : "=t" (value), "=u" (exponent) : "0" (x)); value += 1.0; __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent)); return value; } #endif #undef fp_pow #define fp_pow fp_do_pow PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y); #undef fp_fabs #define fp_fabs fp_do_fabs PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x) { fpu_extended value; __asm__ __volatile__("fabs" : "=t" (value) : "0" (x)); return value; } #undef fp_sqrt #define fp_sqrt fp_do_sqrt PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x) { fpu_extended value; __asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x)); return value; } #undef fp_sin #define fp_sin fp_do_sin PRIVATE inline fpu_extended fp_do_sin(fpu_extended x) { fpu_extended value; __asm__ __volatile__("fsin" : "=t" (value) : "0" (x)); return value; } #undef fp_cos #define fp_cos fp_do_cos PRIVATE inline fpu_extended fp_do_cos(fpu_extended x) { fpu_extended value; __asm__ __volatile__("fcos" : "=t" (value) : "0" (x)); return value; } #undef fp_tan #define fp_tan fp_do_tan PRIVATE inline fpu_extended fp_do_tan(fpu_extended x) { fpu_extended value; __asm__ __volatile__("fptan" : "=t" (value) : "0" (x)); return value; } #undef fp_expm1 #define fp_expm1 fp_do_expm1 // Returns: exp(X) - 1.0 PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x) { fpu_extended value, exponent, temp; __asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t" "fmul %%st(1) # x * log2(e)\n\t" "fst %%st(1)\n\t" "frndint # int(x * log2(e))\n\t" "fxch\n\t" "fsub %%st(1) # fract(x * log2(e))\n\t" "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t" : "=t" (value), "=u" (exponent) : "0" (x)); __asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent)); temp -= 1.0; return temp + value ? temp + value : x; } #undef fp_sgn1 #define fp_sgn1 fp_do_sgn1 PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x) { #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE fp_declare_init_shape(sxp, x, extended); sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; sxp->ieee_nan.one = 1; #else fp_declare_init_shape(sxp, x, double); sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX; #endif sxp->ieee_nan.quiet_nan = 0; sxp->ieee_nan.mantissa0 = 0; sxp->ieee_nan.mantissa1 = 0; return x; } #undef fp_sinh #define fp_sinh fp_do_sinh #ifndef FPU_FAST_MATH // FIXME: unimplemented PRIVATE fpu_extended fp_do_sinh(fpu_extended x); #else PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x) { fpu_extended exm1 = fp_expm1(fp_fabs(x)); return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x); } #endif #undef fp_cosh #define fp_cosh fp_do_cosh #ifndef FPU_FAST_MATH // FIXME: unimplemented PRIVATE fpu_extended fp_do_cosh(fpu_extended x); #else PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x) { fpu_extended ex = fp_exp(x); return 0.5 * (ex + 1.0 / ex); } #endif #undef fp_tanh #define fp_tanh fp_do_tanh #ifndef FPU_FAST_MATH // FIXME: unimplemented PRIVATE fpu_extended fp_do_tanh(fpu_extended x); #else PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x) { fpu_extended exm1 = fp_expm1(-fp_fabs(x + x)); return exm1 / (exm1 + 2.0) * fp_sgn1(-x); } #endif #undef fp_atan2 #define fp_atan2 fp_do_atan2 PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x) { fpu_extended value; __asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)"); return value; } #undef fp_asin #define fp_asin fp_do_asin PRIVATE inline fpu_extended fp_do_asin(fpu_extended x) { return fp_atan2(x, fp_sqrt(1.0 - x * x)); } #undef fp_acos #define fp_acos fp_do_acos PRIVATE inline fpu_extended fp_do_acos(fpu_extended x) { return fp_atan2(fp_sqrt(1.0 - x * x), x); } #undef fp_atan #define fp_atan fp_do_atan PRIVATE inline fpu_extended fp_do_atan(fpu_extended x) { fpu_extended value; __asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)"); return value; } #undef fp_log1p #define fp_log1p fp_do_log1p // Returns: ln(1.0 + X) PRIVATE fpu_extended fp_do_log1p(fpu_extended x); #undef fp_asinh #define fp_asinh fp_do_asinh PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x) { fpu_extended y = fp_fabs(x); return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x)); } #undef fp_acosh #define fp_acosh fp_do_acosh PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x) { return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0)); } #undef fp_atanh #define fp_atanh fp_do_atanh PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x) { fpu_extended y = fp_fabs(x); return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x); } #undef fp_floor #define fp_floor fp_do_floor PRIVATE inline fpu_extended fp_do_floor(fpu_extended x) { volatile unsigned int cw; __asm__ __volatile__("fnstcw %0" : "=m" (cw)); volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); fpu_extended value; __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); __asm__ __volatile__("fldcw %0" : : "m" (cw)); return value; } #undef fp_ceil #define fp_ceil fp_do_ceil PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x) { volatile unsigned int cw; __asm__ __volatile__("fnstcw %0" : "=m" (cw)); volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); fpu_extended value; __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); __asm__ __volatile__("fldcw %0" : : "m" (cw)); return value; } #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \ PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \ { \ volatile unsigned int cw; \ __asm__ __volatile__("fnstcw %0" : "=m" (cw)); \ volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \ __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \ fpu_extended value; \ __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \ __asm__ __volatile__("fldcw %0" : : "m" (cw)); \ return value; \ } #undef fp_round_to_minus_infinity #define fp_round_to_minus_infinity fp_do_round_to_minus_infinity DEFINE_ROUND_FUNC(minus_infinity, 0x400) #undef fp_round_to_plus_infinity #define fp_round_to_plus_infinity fp_do_round_to_plus_infinity DEFINE_ROUND_FUNC(plus_infinity, 0x800) #undef fp_round_to_zero #define fp_round_to_zero fp_do_round_to_zero DEFINE_ROUND_FUNC(zero, 0xc00) #undef fp_round_to_nearest #define fp_round_to_nearest fp_do_round_to_nearest DEFINE_ROUND_FUNC(nearest, 0x000) #endif /* USE_X87_ASSEMBLY */ #ifndef fp_round_to_minus_infinity #define fp_round_to_minus_infinity(x) fp_floor(x) #endif #ifndef fp_round_to_plus_infinity #define fp_round_to_plus_infinity(x) fp_ceil(x) #endif #ifndef fp_round_to_zero #define fp_round_to_zero(x) ((int)(x)) #endif #ifndef fp_round_to_nearest #define fp_round_to_nearest(x) ((int)((x) + 0.5)) #endif #endif /* FPU_MATHLIB_H */