/* Copyright (C) 1997, 1998, 1999, 2000 Victor Shoup This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ***************************************************** The quad_float package is derived from the doubledouble package of Keith Briggs. However, the version employed in NTL has been extensively modified. Below, I attach the copyright notice from the original doubledouble package, which is currently available at http://www.labs.bt.com/people/briggsk2 ***************************************************** Copyright (C) 1997 Keith Martin Briggs Except where otherwise indicated, this program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include #include #include NTL_START_IMPL #if (defined(__GNUC__) && NTL_EXT_DOUBLE && (defined(__i386__) || defined(__i486__) || defined(__i586__))) #if (!defined(NTL_X86_FIX) && !defined(NTL_NO_X86_FIX)) #define NTL_X86_FIX #endif #endif #if (NTL_EXT_DOUBLE && !defined(NTL_X86_FIX)) #define DOUBLE volatile double #else #define DOUBLE double #endif #ifdef NTL_X86_FIX #define START_FIX \ volatile unsigned short __old_cw, __new_cw; \ asm volatile ("fnstcw %0":"=m" (__old_cw)); \ __new_cw = (__old_cw & ~0x300) | 0x200; \ asm volatile ("fldcw %0": :"m" (__new_cw)); #define END_FIX asm volatile ("fldcw %0": :"m" (__old_cw)); #else #define START_FIX #define END_FIX #endif static void normalize(quad_float& z, const double& xhi, const double& xlo) { START_FIX DOUBLE u, v; u = xhi + xlo; v = xhi - u; v = v + xlo; z.hi = u; z.lo = v; END_FIX } #if (NTL_BITS_PER_LONG >= NTL_DOUBLE_PRECISION) quad_float to_quad_float(long n) { START_FIX DOUBLE xhi, xlo; DOUBLE u, v; xhi = double(n); // Because we are assuming 2's compliment integer // arithmetic, the following prevents long(xhi) from overflowing. if (n > 0) xlo = double(n+long(-xhi)); else xlo = double(n-long(xhi)); // renormalize...just to be safe u = xhi + xlo; v = xhi - u; v = v + xlo; END_FIX return quad_float(u, v); } quad_float to_quad_float(unsigned long n) { START_FIX DOUBLE xhi, xlo, t; DOUBLE u, v; const double bnd = double(1L << (NTL_BITS_PER_LONG-2))*4.0; xhi = double(n); if (xhi >= bnd) t = xhi - bnd; else t = xhi; // we use the "to_long" function here to be as portable as possible. long llo = to_long(n - (unsigned long)(t)); xlo = double(llo); // renormalize...just to be safe u = xhi + xlo; v = xhi - u; v = v + xlo; END_FIX return quad_float(u, v); } #endif long quad_float::oprec = 10; void quad_float::SetOutputPrecision(long p) { if (p < 1) p = 1; if (NTL_OVERFLOW(p, 1, 0)) Error("quad_float: output precision too big"); oprec = p; } quad_float operator +(const quad_float& x, const quad_float& y ) { START_FIX DOUBLE H, h, T, t, S, s, e, f; DOUBLE t1; S = x.hi + y.hi; T = x.lo + y.lo; e = S - x.hi; f = T - x.lo; t1 = S-e; t1 = x.hi-t1; s = y.hi-e; s = s + t1; t1 = T-f; t1 = x.lo-t1; t = y.lo-f; t = t + t1; s = s + T; H = S + s; h = S - H; h = h + s; h = h + t; e = H + h; f = H - e; f = f + h; END_FIX return quad_float(e, f); } quad_float& operator +=(quad_float& x, const quad_float& y ) { START_FIX DOUBLE H, h, T, t, S, s, e, f; DOUBLE t1; S = x.hi + y.hi; T = x.lo + y.lo; e = S - x.hi; f = T - x.lo; t1 = S-e; t1 = x.hi-t1; s = y.hi-e; s = s + t1; t1 = T-f; t1 = x.lo-t1; t = y.lo-f; t = t + t1; s = s + T; H = S + s; h = S - H; h = h + s; h = h + t; e = H + h; f = H - e; f = f + h; x.hi = e; x.lo = f; END_FIX return x; } quad_float operator -(const quad_float& x, const quad_float& y ) { START_FIX DOUBLE H, h, T, t, S, s, e, f; DOUBLE t1, yhi, ylo; yhi = -y.hi; ylo = -y.lo; S = x.hi + yhi; T = x.lo + ylo; e = S - x.hi; f = T - x.lo; t1 = S-e; t1 = x.hi-t1; s = yhi-e; s = s + t1; t1 = T-f; t1 = x.lo-t1; t = ylo-f; t = t + t1; s = s + T; H = S + s; h = S - H; h = h + s; h = h + t; e = H + h; f = H - e; f = f + h; END_FIX return quad_float(e, f); } quad_float& operator -=(quad_float& x, const quad_float& y ) { START_FIX DOUBLE H, h, T, t, S, s, e, f; DOUBLE t1, yhi, ylo; yhi = -y.hi; ylo = -y.lo; S = x.hi + yhi; T = x.lo + ylo; e = S - x.hi; f = T - x.lo; t1 = S-e; t1 = x.hi-t1; s = yhi-e; s = s + t1; t1 = T-f; t1 = x.lo-t1; t = ylo-f; t = t + t1; s = s + T; H = S + s; h = S - H; h = h + s; h = h + t; e = H + h; f = H - e; f = f + h; x.hi = e; x.lo = f; END_FIX return x; } quad_float operator -(const quad_float& x) { START_FIX DOUBLE xhi, xlo, u, v; xhi = -x.hi; xlo = -x.lo; // it is a good idea to renormalize here, just in case // the rounding rule depends on sign, and thus we will // maintain the "normal form" for quad_float's. u = xhi + xlo; v = xhi - u; v = v + xlo; END_FIX return quad_float(u, v); } quad_float operator *(const quad_float& x,const quad_float& y ) { START_FIX DOUBLE hx, tx, hy, ty, C, c; DOUBLE t1, t2; C = NTL_QUAD_FLOAT_SPLIT*x.hi; hx = C-x.hi; c = NTL_QUAD_FLOAT_SPLIT*y.hi; hx = C-hx; tx = x.hi-hx; hy = c-y.hi; C = x.hi*y.hi; hy = c-hy; ty = y.hi-hy; // c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+(x.hi*y.lo+x.lo*y.hi); t1 = hx*hy; t1 = t1-C; t2 = hx*ty; t1 = t1+t2; t2 = tx*hy; t1 = t1+t2; t2 = tx*ty; c = t1+t2; t1 = x.hi*y.lo; t2 = x.lo*y.hi; t1 = t1+t2; c = c + t1; hx = C+c; tx = C-hx; tx = tx+c; END_FIX return quad_float(hx, tx); } quad_float& operator *=(quad_float& x,const quad_float& y ) { START_FIX DOUBLE hx, tx, hy, ty, C, c; DOUBLE t1, t2; C = NTL_QUAD_FLOAT_SPLIT*x.hi; hx = C-x.hi; c = NTL_QUAD_FLOAT_SPLIT*y.hi; hx = C-hx; tx = x.hi-hx; hy = c-y.hi; C = x.hi*y.hi; hy = c-hy; ty = y.hi-hy; // c = ((((hx*hy-C)+hx*ty)+tx*hy)+tx*ty)+(x.hi*y.lo+x.lo*y.hi); t1 = hx*hy; t1 = t1-C; t2 = hx*ty; t1 = t1+t2; t2 = tx*hy; t1 = t1+t2; t2 = tx*ty; c = t1+t2; t1 = x.hi*y.lo; t2 = x.lo*y.hi; t1 = t1+t2; c = c + t1; hx = C+c; tx = C-hx; tx = tx+c; x.hi = hx; x.lo = tx; END_FIX return x; } quad_float operator /(const quad_float& x, const quad_float& y ) { START_FIX DOUBLE hc, tc, hy, ty, C, c, U, u; DOUBLE t1; C = x.hi/y.hi; c = NTL_QUAD_FLOAT_SPLIT*C; hc = c-C; u = NTL_QUAD_FLOAT_SPLIT*y.hi; hc = c-hc; tc = C-hc; hy = u-y.hi; U = C * y.hi; hy = u-hy; ty = y.hi-hy; // u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty; u = hc*hy; u = u-U; t1 = hc*ty; u = u+t1; t1 = tc*hy; u = u+t1; t1 = tc*ty; u = u+t1; // c = ((((x.hi-U)-u)+x.lo)-C*y.lo)/y.hi; c = x.hi-U; c = c-u; c = c+x.lo; t1 = C*y.lo; c = c - t1; c = c/y.hi; hy = C+c; ty = C-hy; ty = ty+c; END_FIX return quad_float(hy, ty); } quad_float& operator /=(quad_float& x, const quad_float& y ) { START_FIX DOUBLE hc, tc, hy, ty, C, c, U, u; DOUBLE t1; C = x.hi/y.hi; c = NTL_QUAD_FLOAT_SPLIT*C; hc = c-C; u = NTL_QUAD_FLOAT_SPLIT*y.hi; hc = c-hc; tc = C-hc; hy = u-y.hi; U = C * y.hi; hy = u-hy; ty = y.hi-hy; // u = (((hc*hy-U)+hc*ty)+tc*hy)+tc*ty; u = hc*hy; u = u-U; t1 = hc*ty; u = u+t1; t1 = tc*hy; u = u+t1; t1 = tc*ty; u = u+t1; // c = ((((x.hi-U)-u)+x.lo)-C*y.lo)/y.hi; c = x.hi-U; c = c-u; c = c+x.lo; t1 = C*y.lo; c = c - t1; c = c/y.hi; hy = C+c; ty = C-hy; ty = ty+c; x.hi = hy; x.lo = ty; END_FIX return x; } quad_float sqrt(const quad_float& y) { if (y.hi < 0.0) Error("Quad: attempto to take square root of negative number"); if (y.hi == 0.0) return quad_float(0.0,0.0); double c; c = sqrt(y.hi); ForceToMem(&c); // This is fairly paranoid, but it doesn't cost too much. START_FIX DOUBLE p,q,hx,tx,u,uu,cc; DOUBLE t1; p = NTL_QUAD_FLOAT_SPLIT*c; hx = (c-p); hx = hx+p; tx = c-hx; p = hx*hx; q = hx*tx; q = q+q; u = p+q; uu = p-u; uu = uu+q; t1 = tx*tx; uu = uu+t1; cc = y.hi-u; cc = cc-uu; cc = cc+y.lo; t1 = c+c; cc = cc/t1; hx = c+cc; tx = c-hx; tx = tx+cc; END_FIX return quad_float(hx, tx); } void power(quad_float& z, const quad_float& a, long e) { quad_float res, u; unsigned long k; if (e < 0) k = -((unsigned long) e); else k = e; res = 1.0; u = a; while (k) { if (k & 1) res = res * u; k = k >> 1; if (k) u = u * u; } if (e < 0) z = 1.0/res; else z = res; } void power2(quad_float& z, long e) { z.hi = _ntl_ldexp(1.0, e); z.lo = 0; } long to_long(const quad_float& x) { double fhi, flo; fhi = floor(x.hi); if (fhi == x.hi) flo = floor(x.lo); else flo = 0; // the following code helps to prevent unnecessary integer overflow, // and guarantees that to_long(to_quad_float(a)) == a, for all long a, // provided long's are not too wide. if (fhi > 0) return long(flo) - long(-fhi); else return long(fhi) + long(flo); } // This version of ZZ to quad_float coversion relies on the // precise rounding rules implemented by the ZZ to double conversion. void conv(quad_float& z, const ZZ& a) { double xhi, xlo; conv(xhi, a); if (!IsFinite(&xhi)) { z.hi = xhi; z.lo = 0; return; } static ZZ t; conv(t, xhi); sub(t, a, t); conv(xlo, t); normalize(z, xhi, xlo); // The following is just paranoia. if (fabs(z.hi) < NTL_FDOUBLE_PRECISION && z.lo != 0) Error("internal error: ZZ to quad_float conversion"); } void conv(ZZ& z, const quad_float& x) { static ZZ t1, t2, t3; double fhi, flo; fhi = floor(x.hi); if (fhi == x.hi) { flo = floor(x.lo); conv(t1, fhi); conv(t2, flo); add(z, t1, t2); } else conv(z, fhi); } ostream& operator<<(ostream& s, const quad_float& a) { quad_float aa = a; if (!IsFinite(&aa)) { s << "NaN"; return s; } long old_p = RR::precision(); long old_op = RR::OutputPrecision(); RR::SetPrecision(long(3.33*quad_float::oprec) + 10); RR::SetOutputPrecision(quad_float::oprec); static RR t; conv(t, a); s << t; RR::SetPrecision(old_p); RR::SetOutputPrecision(old_op); return s; } istream& operator>>(istream& s, quad_float& x) { long old_p = RR::precision(); RR::SetPrecision(4*NTL_DOUBLE_PRECISION); static RR t; s >> t; conv(x, t); RR::SetPrecision(old_p); return s; } void random(quad_float& x) { long old_p = RR::precision(); RR::SetPrecision(4*NTL_DOUBLE_PRECISION); static RR t; random(t); conv(x, t); RR::SetPrecision(old_p); } quad_float random_quad_float() { quad_float x; random(x); return x; } long IsFinite(quad_float *x) { return IsFinite(&x->hi) && IsFinite(&x->lo); } long PrecisionOK() { START_FIX long k; DOUBLE l1 = (double)1; DOUBLE lh = 1/(double)2; DOUBLE epsilon; DOUBLE fudge, oldfudge; epsilon = l1; fudge = l1+l1; k = 0; do { k++; epsilon = epsilon * lh; oldfudge = fudge; fudge = l1 + epsilon; } while (fudge > l1 && fudge < oldfudge); END_FIX return k == NTL_DOUBLE_PRECISION; } quad_float floor(const quad_float& x) { double fhi = floor(x.hi); if (fhi != x.hi) return quad_float(fhi, 0.0); else { double flo = floor(x.lo); quad_float z; normalize(z, fhi, flo); return z; } } quad_float ceil(const quad_float& x) { return -floor(-x); } quad_float trunc(const quad_float& x) { if (x>=0.0) return floor(x); else return -floor(-x); } long compare(const quad_float& x, const quad_float& y) { if (x.hi > y.hi) return 1; else if (x.hi < y.hi) return -1; else if (x.lo > y.lo) return 1; else if (x.lo < y.lo) return -1; else return 0; } quad_float fabs(const quad_float& x) { if (x.hi>=0.0) return x; else return -x; } quad_float to_quad_float(const char *s) { quad_float x; long old_p = RR::precision(); RR::SetPrecision(4*NTL_DOUBLE_PRECISION); static RR t; conv(t, s); conv(x, t); RR::SetPrecision(old_p); return x; } quad_float ldexp(const quad_float& x, long exp) { // x*2^exp double xhi, xlo; quad_float z; xhi = _ntl_ldexp(x.hi, exp); xlo = _ntl_ldexp(x.lo, exp); normalize(z, xhi, xlo); return z; } quad_float exp(const quad_float& x) { // New version 97 Aug 05 /* ! Calculate a quadruple-precision exponential ! Method: ! x x.log2(e) nint[x.log2(e)] + frac[x.log2(e)] ! e = 2 = 2 ! ! iy fy ! = 2 . 2 ! Then ! fy y.loge(2) ! 2 = e ! ! Now y.loge(2) will be less than 0.3466 in absolute value. ! This is halved and a Pade aproximation is used to approximate e^x over ! the region (-0.1733, +0.1733). This approximation is then squared. */ if (x.hiDBL_MAX_10_EXP*2.302585092994045684017991) { Error("exp(quad_float): overflow"); } // changed this from "const" to "static" in v5.3, since "const" // causes the initialization to be performed with *every* invocation. static quad_float Log2 = to_quad_float("0.6931471805599453094172321214581765680755"); quad_float y,temp,ysq,sum1,sum2; long iy; y=x/Log2; temp = floor(y+0.5); iy = to_long(temp); y=(y-temp)*Log2; y=ldexp(y,-1L); ysq=y*y; sum1=y*((((ysq+3960.0)*ysq+2162160.0)*ysq+302702400.0)*ysq+8821612800.0); sum2=(((90.0*ysq+110880.0)*ysq+30270240.0)*ysq+2075673600.0)*ysq+17643225600.0; /* ! sum2 + sum1 2.sum1 ! Now approximation = ----------- = 1 + ----------- = 1 + 2.temp ! sum2 - sum1 sum2 - sum1 ! ! Then (1 + 2.temp)^2 = 4.temp.(1 + temp) + 1 */ temp=sum1/(sum2-sum1); y=temp*(temp+1); y=ldexp(y,2L); return ldexp(y+1,iy); } quad_float log(const quad_float& t) { // Newton method. See Bailey, MPFUN if (t.hi <= 0.0) { Error("log(quad_float): argument must be positive"); } double s1 = log(t.hi); ForceToMem(&s1); // Again, this is fairly paranoid. quad_float s; s = s1; quad_float e; e=exp(s); return s+(t-e)/e; // Newton step } long operator> (const quad_float& x, const quad_float& y) { return (x.hi> y.hi) || (x.hi==y.hi && x.lo> y.lo); } long operator>=(const quad_float& x, const quad_float& y) { return (x.hi>y.hi) || (x.hi==y.hi && x.lo>=y.lo); } long operator< (const quad_float& x, const quad_float& y) { return (x.hi< y.hi) || (x.hi==y.hi && x.lo< y.lo); } long operator<=(const quad_float& x, const quad_float& y) { return (x.hi