/* * This is a "wrapper" layer that builds on top of the "mpn" layer of gmp. * This layer provides much of the same functionality of the "mpz" * layer of gmp, but the interface it provides is much more like * the interface provided by lip. * * This layer was written under the following assumptions about gmp: * 1) mp_limb_t is an unsigned integral type * 2) sizeof(mp_limb_t) == sizeof(long) or sizeof(mp_limb_t) == 2*sizeof(long) * 3) the number of bits of an mp_limb_t is equal to that of a long, * or twice that of a long * 4) the number of bits of a gmp radix is equal to the number of bits * of an mp_limb_t * * Except for assumption (1), these assumptions are verified in the * installation script, and they should be universally satisfied in practice, * except when gmp is built using the proposed, new "nail" fetaure * (in which some bits of an mp_limb_t are unused). * The code here will not work properly with the "nail" feature; * however, I have (attempted to) identify all such problem spots, * and any other places where assumptions (2-4) are made, * with a comment labeled "DIRT". */ #include #include #include #include #include #include typedef mp_limb_t *_ntl_limb_t_ptr; int _ntl_gmp_hack = 0; #if (__GNU_MP_VERSION < 3) #error "You have to use GNP version >= 3.1" #endif #if ((__GNU_MP_VERSION == 3) && (__GNU_MP_VERSION_MINOR < 1)) #error "You have to use GNP version >= 3.1" #endif /* v 3.1 is supposed mpn_tdiv_qr defined, but it doesn't. Here's a workaround */ #if ((__GNU_MP_VERSION == 3) && (__GNU_MP_VERSION_MINOR == 1) && (__GNU_MP_VERSION_PATCHLEVEL == 0)) #define mpn_tdiv_qr __MPN(tdiv_qr) #ifdef __cplusplus extern "C" #endif void mpn_tdiv_qr(mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); #endif #if (defined(NTL_CXX_ONLY) && !defined(__cplusplus)) #error "CXX_ONLY flag set...must use C++ compiler" #endif union gbigint_header { long info[2]; mp_limb_t alignment; }; /* A bigint is represented as two long's, ALLOC and SIZE, followed by a * vector DATA of mp_limb_t's. * * ALLOC is of the form * (alloc << 2) | continue_flag | frozen_flag * where * - alloc is the number of allocated mp_limb_t's, * - continue flag is either 2 or 0, * - frozen_flag is either 1 or 0. * If frozen_flag is set, then the space for this bigint is *not* * managed by the _ntl_gsetlength and _ntl_gfree routines, * but are instead managed by the vec_ZZ_p and ZZVec routines. * The continue_flag is only set when the frozen_flag is set. * * SIZE is the number of mp_limb_t's actually * used by the bigint, with the sign of SIZE having * the sign of the bigint. * Note that the zero bigint is represented as SIZE=0. * * Bigint's are accessed through a handle, which is pointer to void. * A null handle logically represents the bigint zero. * This is done so that the interface presented to higher level * routines is essentially the same as that of NTL's traditional * long integer package. * * The components ALLOC, SIZE, and DATA are all accessed through * macros using pointer casts. While all of may seem a bit dirty, * it should be quite portable: objects are never referenced * through pointers of different types, and no alignmement * problems should arise. * * Actually, mp_limb_t is usually the type unsigned long. * However, on some 64-bit platforms, the type long is only 32 bits, * and gmp makes mp_limb_t unsigned long long in this case. * This is fairly rare, as the industry standard for Unix is to * have 64-bit longs on 64-bit machines. */ #if 1 #define ALLOC(p) (((long *) (p))[0]) #define SIZE(p) (((long *) (p))[1]) #define DATA(p) ((mp_limb_t *) (((long *) (p)) + 2)) #define STORAGE(len) ((long)(2*sizeof(long) + (len)*sizeof(mp_limb_t))) /* DIRT: STORAGE computes the number of bytes to allocate for a bigint * of maximal SIZE len. This should be computed so that one * can store several such bigints in a contiguous array * of memory without breaking any alignment requirements. * Currently, it is assumed (and explicitly checked in the NTL installation * script) that sizeof(mp_limb_t) is either sizeof(long) or * 2*sizeof(long), and therfore, nothing special needs to * be done to enfoce alignment requirements. If this assumption * should change, then the storage layout for bigints must be * re-designed. */ #define MustAlloc(c, len) (!(c) || (ALLOC(c) >> 2) < (len)) #define GET_SIZE_NEG(sz, neg, p) \ do \ { \ long _s; \ _s = SIZE(p); \ if (_s < 0) { \ sz = -_s; \ neg = 1; \ } \ else { \ sz = _s; \ neg = 0; \ } \ } \ while (0) #define STRIP(sz, p) \ do \ { \ long _i; \ _i = sz - 1; \ while (_i >= 0 && p[_i] == 0) _i--; \ sz = _i + 1; \ } \ while (0) #define ZEROP(p) (!p || !SIZE(p)) #define ONEP(p) (p && SIZE(p) == 1 && DATA(p)[0] == 1) #define SWAP_BIGINT(a, b) \ do \ { \ _ntl_gbigint _t; \ _t = a; \ a = b; \ b = _t; \ } \ while (0) #define SWAP_LONG(a, b) \ do \ { \ long _t; \ _t = a; \ a = b; \ b = _t; \ } \ while (0) #define SWAP_LIMB_PTR(a, b) \ do \ { \ _ntl_limb_t_ptr _t; \ _t = a; \ a = b; \ b = _t; \ } \ while (0) #define COUNT_BITS(cnt, a) \ do \ { \ long _i = 0; \ mp_limb_t _a = (a); \ \ while (_a>=256) \ _i += 8, _a >>= 8; \ if (_a >=16) \ _i += 4, _a >>= 4; \ if (_a >= 4) \ _i += 2, _a >>= 2; \ if (_a >= 2) \ _i += 2; \ else if (_a >= 1) \ _i++; \ \ cnt = _i; \ } \ while (0) #else /* These are C++ inline functions that are equivalent to the above * macros. They are mainly intended as a debugging aid. */ inline long& ALLOC(_ntl_gbigint p) { return (((long *) p)[0]); } inline long& SIZE(_ntl_gbigint p) { return (((long *) p)[1]); } inline mp_limb_t * DATA(_ntl_gbigint p) { return ((mp_limb_t *) (((long *) (p)) + 2)); } inline long STORAGE(long len) { return ((long)(2*sizeof(long) + (len)*sizeof(mp_limb_t))); } inline long MustAlloc(_ntl_gbigint c, long len) { return (!(c) || (ALLOC(c) >> 2) < (len)); } inline void GET_SIZE_NEG(long& sz, long& neg, _ntl_gbigint p) { long s; s = SIZE(p); if (s < 0) { sz = -s; neg = 1; } else { sz = s; neg = 0; } } inline void STRIP(long& sz, mp_limb_t *p) { long i; i = sz - 1; while (i >= 0 && p[i] == 0) i--; sz = i + 1; } inline long ZEROP(_ntl_gbigint p) { return !p || !SIZE(p); } inline long ONEP(_ntl_gbigint p) { return p && SIZE(p) == 1 && DATA(p)[0] == 1; } inline void SWAP_BIGINT(_ntl_gbigint& a, _ntl_gbigint& b) { _ntl_gbigint t; t = a; a = b; b = t; } inline void SWAP_LONG(long& a, long& b) { long t; t = a; a = b; b = t; } inline void SWAP_LIMB_PTR(_ntl_limb_t_ptr& a, _ntl_limb_t_ptr& b) { _ntl_limb_t_ptr t; t = a; a = b; b = t; } inline void COUNT_BITS(long& cnt, mp_limb_t a) { long i = 0; while (a>=256) i += 8, a >>= 8; if (a >=16) i += 4, a >>= 4; if (a >= 4) i += 2, a >>= 2; if (a >= 2) i += 2; else if (a >= 1) i++; cnt = i; } #endif #define STORAGE_OVF(len) NTL_OVERFLOW(len, sizeof(mp_limb_t), 2*sizeof(long)) static void ghalt(char *c) { fprintf(stderr,"fatal error:\n %s\nexit...\n",c); fflush(stderr); abort(); } #define MIN_SETL (4) /* _ntl_gsetlength allocates a multiple of MIN_SETL digits */ void _ntl_gsetlength(_ntl_gbigint *v, long len) { _ntl_gbigint x = *v; if (len < 0) ghalt("negative size allocation in _ntl_zgetlength"); if (NTL_OVERFLOW(len, NTL_ZZ_NBITS, 0)) ghalt("size too big in _ntl_gsetlength"); #ifdef NTL_SMALL_MP_SIZE_T /* this makes sure that numbers don't get too big for GMP */ if (len >= (1L << (NTL_BITS_PER_INT-4))) ghalt("size too big for GMP"); #endif if (x) { long oldlen = ALLOC(x); long fixed = oldlen & 1; oldlen = oldlen >> 2; if (fixed) { if (len > oldlen) ghalt("internal error: can't grow this _ntl_gbigint"); else return; } if (len <= oldlen) return; len++; /* always allocate at least one more than requested */ oldlen = (long) (oldlen * 1.2); /* always increase by at least 20% */ if (len < oldlen) len = oldlen; /* round up to multiple of MIN_SETL */ len = ((len+(MIN_SETL-1))/MIN_SETL)*MIN_SETL; /* test len again */ if (NTL_OVERFLOW(len, NTL_ZZ_NBITS, 0)) ghalt("size too big in _ntl_gsetlength"); if (STORAGE_OVF(len)) ghalt("reallocation failed in _ntl_gsetlength"); ALLOC(x) = len << 2; if (!(x = (_ntl_gbigint)NTL_REALLOC((void *) x, 1, STORAGE(len), 0))) { ghalt("reallocation failed in _ntl_gsetlength"); } } else { len++; len = ((len+(MIN_SETL-1))/MIN_SETL)*MIN_SETL; /* test len again */ if (NTL_OVERFLOW(len, NTL_ZZ_NBITS, 0)) ghalt("size too big in _ntl_gsetlength"); if (STORAGE_OVF(len)) ghalt("reallocation failed in _ntl_gsetlength"); if (!(x = (_ntl_gbigint)NTL_MALLOC(1, STORAGE(len), 0))) { ghalt("allocation failed in _ntl_gsetlength"); } ALLOC(x) = len << 2; SIZE(x) = 0; } *v = x; } void _ntl_gfree(_ntl_gbigint *xx) { _ntl_gbigint x = *xx; if (!x) return; if (ALLOC(x) & 1) ghalt("Internal error: can't free this _ntl_gbigint"); free((void*) x); *xx = 0; return; } void _ntl_gswap(_ntl_gbigint *a, _ntl_gbigint *b) { _ntl_gbigint c; if ((*a && (ALLOC(*a) & 1)) || (*b && (ALLOC(*b) & 1))) { static _ntl_gbigint t = 0; _ntl_gcopy(*a, &t); _ntl_gcopy(*b, a); _ntl_gcopy(t, b); return; } c = *a; *a = *b; *b = c; } void _ntl_gcopy(_ntl_gbigint a, _ntl_gbigint *bb) { _ntl_gbigint b; long sa, abs_sa, i; mp_limb_t *adata, *bdata; b = *bb; if (!a || (sa = SIZE(a)) == 0) { if (b) SIZE(b) = 0; } else { if (a != b) { if (sa >= 0) abs_sa = sa; else abs_sa = -sa; if (MustAlloc(b, abs_sa)) { _ntl_gsetlength(&b, abs_sa); *bb = b; } adata = DATA(a); bdata = DATA(b); for (i = 0; i < abs_sa; i++) bdata[i] = adata[i]; SIZE(b) = sa; } } } void _ntl_gzero(_ntl_gbigint *aa) { _ntl_gbigint a = *aa; if (a) SIZE(a) = 0; } void _ntl_gone(_ntl_gbigint *aa) { _ntl_gbigint a = *aa; if (!a) { _ntl_gsetlength(&a, 1); *aa = a; } SIZE(a) = 1; DATA(a)[0] = 1; } long _ntl_giszero(_ntl_gbigint a) { return ZEROP(a); } long _ntl_godd(_ntl_gbigint a) { if (ZEROP(a)) return 0; else return DATA(a)[0]&1; } long _ntl_gbit(_ntl_gbigint a, long p) { long bl; long sa; mp_limb_t wh; if (p < 0 || !a) return 0; bl = p/NTL_ZZ_NBITS; wh = ((mp_limb_t) 1) << (p - NTL_ZZ_NBITS*bl); sa = SIZE(a); if (sa < 0) sa = -sa; if (sa <= bl) return 0; if (DATA(a)[bl] & wh) return 1; return 0; } void _ntl_glowbits(_ntl_gbigint a, long b, _ntl_gbigint *cc) { _ntl_gbigint c; long bl; long wh; long sa; long i; mp_limb_t *adata, *cdata; if (ZEROP(a) || (b<=0)) { _ntl_gzero(cc); return; } bl = b/NTL_ZZ_NBITS; wh = b - NTL_ZZ_NBITS*bl; if (wh != 0) bl++; else wh = NTL_ZZ_NBITS; sa = SIZE(a); if (sa < 0) sa = -sa; if (sa < bl) { _ntl_gcopy(a,cc); _ntl_gabs(cc); return; } c = *cc; /* a won't move if c aliases a */ _ntl_gsetlength(&c, bl); *cc = c; adata = DATA(a); cdata = DATA(c); for (i = 0; i < bl-1; i++) cdata[i] = adata[i]; if (wh == NTL_ZZ_NBITS) cdata[bl-1] = adata[bl-1]; else cdata[bl-1] = adata[bl-1] & ((((mp_limb_t) 1) << wh) - ((mp_limb_t) 1)); STRIP(bl, cdata); SIZE(c) = bl; } long _ntl_gslowbits(_ntl_gbigint a, long p) { static _ntl_gbigint x = 0; if (p > NTL_BITS_PER_LONG) p = NTL_BITS_PER_LONG; _ntl_glowbits(a, p, &x); return _ntl_gtoint(x); } long _ntl_gsetbit(_ntl_gbigint *a, long b) { long bl; long sa, aneg; long i; mp_limb_t wh, *adata, tmp; if (b<0) ghalt("_ntl_gsetbit: negative index"); if (ZEROP(*a)) { _ntl_gintoz(1, a); _ntl_glshift(*a, b, a); return 0; } bl = (b/NTL_ZZ_NBITS); wh = ((mp_limb_t) 1) << (b - NTL_ZZ_NBITS*bl); GET_SIZE_NEG(sa, aneg, *a); if (sa > bl) { adata = DATA(*a); tmp = adata[bl] & wh; adata[bl] |= wh; if (tmp) return 1; return 0; } else { _ntl_gsetlength(a, bl+1); adata = DATA(*a); for (i = sa; i < bl; i++) adata[i] = 0; adata[bl] = wh; sa = bl+1; if (aneg) sa = -sa; SIZE(*a) = sa; return 0; } } long _ntl_gswitchbit(_ntl_gbigint *a, long b) { long bl; long sa, aneg; long i; mp_limb_t wh, *adata, tmp; if (b<0) ghalt("_ntl_gswitchbit: negative index"); if (ZEROP(*a)) { _ntl_gintoz(1, a); _ntl_glshift(*a, b, a); return 0; } bl = (b/NTL_ZZ_NBITS); wh = ((mp_limb_t) 1) << (b - NTL_ZZ_NBITS*bl); GET_SIZE_NEG(sa, aneg, *a); if (sa > bl) { adata = DATA(*a); tmp = adata[bl] & wh; adata[bl] ^= wh; if (bl == sa-1) { STRIP(sa, adata); if (aneg) sa = -sa; SIZE(*a) = sa; } if (tmp) return 1; return 0; } else { _ntl_gsetlength(a, bl+1); adata = DATA(*a); for (i = sa; i < bl; i++) adata[i] = 0; adata[bl] = wh; sa = bl+1; if (aneg) sa = -sa; SIZE(*a) = sa; return 0; } } long _ntl_gweights( long aa ) { unsigned long a; long res = 0; if (aa < 0) a = -((unsigned long) aa); else a = aa; while (a) { if (a & 1) res ++; a >>= 1; } return (res); } static long gweights_mp_limb( mp_limb_t a ) { long res = 0; while (a) { if (a & 1) res ++; a >>= 1; } return (res); } long _ntl_gweight( _ntl_gbigint a ) { long i; long sa; mp_limb_t *adata; long res; if (!a) return (0); sa = SIZE(a); if (sa < 0) sa = -sa; adata = DATA(a); res = 0; for (i = 0; i < sa; i++) res += gweights_mp_limb(adata[i]); return (res); } long _ntl_g2logs( long aa ) { long i = 0; unsigned long a; if (aa < 0) a = - ((unsigned long) aa); else a = aa; while (a>=256) i += 8, a >>= 8; if (a >=16) i += 4, a >>= 4; if (a >= 4) i += 2, a >>= 2; if (a >= 2) i += 2; else if (a >= 1) i++; return (i); } long _ntl_g2log(_ntl_gbigint a) { long la; long t; if (!a) return 0; la = SIZE(a); if (la == 0) return 0; if (la < 0) la = -la; COUNT_BITS(t, DATA(a)[la-1]); return NTL_ZZ_NBITS*(la - 1) + t; } long _ntl_gmakeodd(_ntl_gbigint *nn) { _ntl_gbigint n = *nn; long shift; mp_limb_t *ndata; mp_limb_t i; if (ZEROP(n)) return (0); shift = 0; ndata = DATA(n); while (ndata[shift] == 0) shift++; i = ndata[shift]; shift = NTL_ZZ_NBITS * shift; while ((i & 1) == 0) { shift++; i >>= 1; } _ntl_grshift(n, shift, &n); return shift; } long _ntl_gnumtwos(_ntl_gbigint n) { long shift; mp_limb_t *ndata; mp_limb_t i; if (ZEROP(n)) return (0); shift = 0; ndata = DATA(n); while (ndata[shift] == 0) shift++; i = ndata[shift]; shift = NTL_ZZ_NBITS * shift; while ((i & 1) == 0) { shift++; i >>= 1; } return shift; } void _ntl_gand(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) { _ntl_gbigint c; long sa; long sb; long sm; long i; long a_alias, b_alias; mp_limb_t *adata, *bdata, *cdata; if (ZEROP(a) || ZEROP(b)) { _ntl_gzero(cc); return; } c = *cc; a_alias = (a == c); b_alias = (b == c); sa = SIZE(a); if (sa < 0) sa = -sa; sb = SIZE(b); if (sb < 0) sb = -sb; sm = (sa > sb ? sb : sa); _ntl_gsetlength(&c, sm); if (a_alias) a = c; if (b_alias) b = c; *cc = c; adata = DATA(a); bdata = DATA(b); cdata = DATA(c); for (i = 0; i < sm; i++) cdata[i] = adata[i] & bdata[i]; STRIP(sm, cdata); SIZE(c) = sm; } void _ntl_gxor(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) { _ntl_gbigint c; long sa; long sb; long sm; long la; long i; long a_alias, b_alias; mp_limb_t *adata, *bdata, *cdata; if (ZEROP(a)) { _ntl_gcopy(b,cc); _ntl_gabs(cc); return; } if (ZEROP(b)) { _ntl_gcopy(a,cc); _ntl_gabs(cc); return; } c = *cc; a_alias = (a == c); b_alias = (b == c); sa = SIZE(a); if (sa < 0) sa = -sa; sb = SIZE(b); if (sb < 0) sb = -sb; if (sa > sb) { la = sa; sm = sb; } else { la = sb; sm = sa; } _ntl_gsetlength(&c, la); if (a_alias) a = c; if (b_alias) b = c; *cc = c; adata = DATA(a); bdata = DATA(b); cdata = DATA(c); for (i = 0; i < sm; i ++) cdata[i] = adata[i] ^ bdata[i]; if (sa > sb) for (;i < la; i++) cdata[i] = adata[i]; else for (;i < la; i++) cdata[i] = bdata[i]; STRIP(la, cdata); SIZE(c) = la; } void _ntl_gor(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) { _ntl_gbigint c; long sa; long sb; long sm; long la; long i; long a_alias, b_alias; mp_limb_t *adata, *bdata, *cdata; if (ZEROP(a)) { _ntl_gcopy(b,cc); _ntl_gabs(cc); return; } if (ZEROP(b)) { _ntl_gcopy(a,cc); _ntl_gabs(cc); return; } c = *cc; a_alias = (a == c); b_alias = (b == c); sa = SIZE(a); if (sa < 0) sa = -sa; sb = SIZE(b); if (sb < 0) sb = -sb; if (sa > sb) { la = sa; sm = sb; } else { la = sb; sm = sa; } _ntl_gsetlength(&c, la); if (a_alias) a = c; if (b_alias) b = c; *cc = c; adata = DATA(a); bdata = DATA(b); cdata = DATA(c); for (i = 0; i < sm; i ++) cdata[i] = adata[i] | bdata[i]; if (sa > sb) for (;i < la; i++) cdata[i] = adata[i]; else for (;i < la; i++) cdata[i] = bdata[i]; STRIP(la, cdata); SIZE(c) = la; } void _ntl_gnegate(_ntl_gbigint *aa) { _ntl_gbigint a = *aa; if (a) SIZE(a) = -SIZE(a); } /* * DIRT: this implementation of _ntl_gintoz relies crucially * on the assumption that the number of bits per limb_t is at least * equal to the number of bits per long. */ void _ntl_gintoz(long d, _ntl_gbigint *aa) { _ntl_gbigint a = *aa; if (d == 0) { if (a) SIZE(a) = 0; } else if (d > 0) { if (!a) { _ntl_gsetlength(&a, 1); *aa = a; } SIZE(a) = 1; DATA(a)[0] = d; } else { if (!a) { _ntl_gsetlength(&a, 1); *aa = a; } SIZE(a) = -1; DATA(a)[0] = -((mp_limb_t) d); /* careful! */ } } /* * DIRT: this implementation of _ntl_guintoz relies crucially * on the assumption that the number of bits per limb_t is at least * equal to the number of bits per long. */ void _ntl_guintoz(unsigned long d, _ntl_gbigint *aa) { _ntl_gbigint a = *aa; if (d == 0) { if (a) SIZE(a) = 0; } else { if (!a) { _ntl_gsetlength(&a, 1); *aa = a; } SIZE(a) = 1; DATA(a)[0] = d; } } long _ntl_gtoint(_ntl_gbigint a) { unsigned long res = _ntl_gtouint(a); return NTL_ULONG_TO_LONG(res); } /* * DIRT: this implementation of _ntl_gtouint relies crucially * on the assumption that the number of bits per limb_t is at least * equal to the number of bits per long. */ unsigned long _ntl_gtouint(_ntl_gbigint a) { if (ZEROP(a)) return 0; if (SIZE(a) > 0) return DATA(a)[0]; return -DATA(a)[0]; } long _ntl_gcompare(_ntl_gbigint a, _ntl_gbigint b) { long sa, sb, cmp; mp_limb_t *adata, *bdata; if (!a) sa = 0; else sa = SIZE(a); if (!b) sb = 0; else sb = SIZE(b); if (sa != sb) { if (sa > sb) return 1; else return -1; } if (sa == 0) return 0; adata = DATA(a); bdata = DATA(b); if (sa > 0) { cmp = mpn_cmp(adata, bdata, sa); if (cmp > 0) return 1; else if (cmp < 0) return -1; else return 0; } else { cmp = mpn_cmp(adata, bdata, -sa); if (cmp > 0) return -1; else if (cmp < 0) return 1; else return 0; } } long _ntl_gsign(_ntl_gbigint a) { long sa; if (!a) return 0; sa = SIZE(a); if (sa > 0) return 1; if (sa == 0) return 0; return -1; } void _ntl_gabs(_ntl_gbigint *pa) { _ntl_gbigint a = *pa; if (!a) return; if (SIZE(a) < 0) SIZE(a) = -SIZE(a); } long _ntl_gscompare(_ntl_gbigint a, long b) { if (b == 0) { long sa; if (!a) return 0; sa = SIZE(a); if (sa > 0) return 1; if (sa == 0) return 0; return -1; } else { static _ntl_gbigint B = 0; _ntl_gintoz(b, &B); return _ntl_gcompare(a, B); } } void _ntl_glshift(_ntl_gbigint n, long k, _ntl_gbigint *rres) { _ntl_gbigint res; mp_limb_t *ndata, *resdata, *resdata1; long limb_cnt, i, sn, nneg, sres; long n_alias; if (ZEROP(n)) { _ntl_gzero(rres); return; } res = *rres; n_alias = (n == res); if (!k) { if (!n_alias) _ntl_gcopy(n, rres); return; } if (k < 0) { if (k < -NTL_MAX_LONG) _ntl_gzero(rres); else _ntl_grshift(n, -k, rres); return; } GET_SIZE_NEG(sn, nneg, n); limb_cnt = k/NTL_ZZ_NBITS; sres = sn + limb_cnt + 1; if (MustAlloc(res, sres)) { _ntl_gsetlength(&res, sres); if (n_alias) n = res; *rres = res; } ndata = DATA(n); resdata = DATA(res); resdata1 = resdata + limb_cnt; k %= NTL_ZZ_NBITS; sres--; if (k != 0) { mp_limb_t t = mpn_lshift(resdata1, ndata, sn, k); if (t != 0) { resdata[sres] = t; sres++; } } else { for (i = sn-1; i >= 0; i--) resdata1[i] = ndata[i]; } for (i = 0; i < limb_cnt; i++) resdata[i] = 0; if (nneg) sres = -sres; SIZE(res) = sres; } void _ntl_grshift(_ntl_gbigint n, long k, _ntl_gbigint *rres) { _ntl_gbigint res; mp_limb_t *ndata, *resdata, *ndata1; long limb_cnt, i, sn, nneg, sres; if (ZEROP(n)) { _ntl_gzero(rres); return; } if (!k) { if (n != *rres) _ntl_gcopy(n, rres); return; } if (k < 0) { if (k < -NTL_MAX_LONG) ghalt("overflow in _ntl_glshift"); _ntl_glshift(n, -k, rres); return; } GET_SIZE_NEG(sn, nneg, n); limb_cnt = k/NTL_ZZ_NBITS; sres = sn - limb_cnt; if (sres <= 0) { _ntl_gzero(rres); return; } res = *rres; if (MustAlloc(res, sres)) { /* n won't move if res aliases n */ _ntl_gsetlength(&res, sres); *rres = res; } ndata = DATA(n); resdata = DATA(res); ndata1 = ndata + limb_cnt; k %= NTL_ZZ_NBITS; if (k != 0) { mpn_rshift(resdata, ndata1, sres, k); if (resdata[sres-1] == 0) sres--; } else { for (i = 0; i < sres; i++) resdata[i] = ndata1[i]; } if (nneg) sres = -sres; SIZE(res) = sres; } void _ntl_gadd(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) { long sa, aneg, sb, bneg, sc, cmp; mp_limb_t *adata, *bdata, *cdata, carry; _ntl_gbigint c; long a_alias, b_alias; if (ZEROP(a)) { _ntl_gcopy(b, cc); return; } if (ZEROP(b)) { _ntl_gcopy(a, cc); return; } GET_SIZE_NEG(sa, aneg, a); GET_SIZE_NEG(sb, bneg, b); if (sa < sb) { SWAP_BIGINT(a, b); SWAP_LONG(sa, sb); SWAP_LONG(aneg, bneg); } /* sa >= sb */ c = *cc; a_alias = (a == c); b_alias = (b == c); if (aneg == bneg) { /* same sign => addition */ sc = sa + 1; if (MustAlloc(c, sc)) { _ntl_gsetlength(&c, sc); if (a_alias) a = c; if (b_alias) b = c; *cc = c; } adata = DATA(a); bdata = DATA(b); cdata = DATA(c); carry = mpn_add(cdata, adata, sa, bdata, sb); if (carry) cdata[sc-1] = carry; else sc--; if (aneg) sc = -sc; SIZE(c) = sc; } else { /* opposite sign => subtraction */ sc = sa; if (MustAlloc(c, sc)) { _ntl_gsetlength(&c, sc); if (a_alias) a = c; if (b_alias) b = c; *cc = c; } adata = DATA(a); bdata = DATA(b); cdata = DATA(c); if (sa > sb) cmp = 1; else cmp = mpn_cmp(adata, bdata, sa); if (cmp == 0) { SIZE(c) = 0; } else { if (cmp < 0) cmp = 0; if (cmp > 0) cmp = 1; /* abs(a) != abs(b) && (abs(a) > abs(b) <=> cmp) */ if (cmp) mpn_sub(cdata, adata, sa, bdata, sb); else mpn_sub(cdata, bdata, sb, adata, sa); /* sa == sb */ STRIP(sc, cdata); if (aneg == cmp) sc = -sc; SIZE(c) = sc; } } } void _ntl_gsadd(_ntl_gbigint a, long b, _ntl_gbigint *cc) { static _ntl_gbigint B = 0; _ntl_gintoz(b, &B); _ntl_gadd(a, B, cc); } void _ntl_gsub(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) { long sa, aneg, sb, bneg, sc, cmp, rev; mp_limb_t *adata, *bdata, *cdata, carry; _ntl_gbigint c; long a_alias, b_alias; if (ZEROP(a)) { _ntl_gcopy(b, cc); c = *cc; if (c) SIZE(c) = -SIZE(c); return; } if (ZEROP(b)) { _ntl_gcopy(a, cc); return; } GET_SIZE_NEG(sa, aneg, a); GET_SIZE_NEG(sb, bneg, b); if (sa < sb) { SWAP_BIGINT(a, b); SWAP_LONG(sa, sb); SWAP_LONG(aneg, bneg); rev = 1; } else rev = 0; /* sa >= sb */ c = *cc; a_alias = (a == c); b_alias = (b == c); if (aneg != bneg) { /* opposite sign => addition */ sc = sa + 1; if (MustAlloc(c, sc)) { _ntl_gsetlength(&c, sc); if (a_alias) a = c; if (b_alias) b = c; *cc = c; } adata = DATA(a); bdata = DATA(b); cdata = DATA(c); carry = mpn_add(cdata, adata, sa, bdata, sb); if (carry) cdata[sc-1] = carry; else sc--; if (aneg ^ rev) sc = -sc; SIZE(c) = sc; } else { /* same sign => subtraction */ sc = sa; if (MustAlloc(c, sc)) { _ntl_gsetlength(&c, sc); if (a_alias) a = c; if (b_alias) b = c; *cc = c; } adata = DATA(a); bdata = DATA(b); cdata = DATA(c); if (sa > sb) cmp = 1; else cmp = mpn_cmp(adata, bdata, sa); if (cmp == 0) { SIZE(c) = 0; } else { if (cmp < 0) cmp = 0; if (cmp > 0) cmp = 1; /* abs(a) != abs(b) && (abs(a) > abs(b) <=> cmp) */ if (cmp) mpn_sub(cdata, adata, sa, bdata, sb); else mpn_sub(cdata, bdata, sb, adata, sa); /* sa == sb */ STRIP(sc, cdata); if ((aneg == cmp) ^ rev) sc = -sc; SIZE(c) = sc; } } } void _ntl_gsubpos(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) { long sa, sb, sc; mp_limb_t *adata, *bdata, *cdata; _ntl_gbigint c; long a_alias, b_alias; if (ZEROP(a)) { _ntl_gzero(cc); return; } if (ZEROP(b)) { _ntl_gcopy(a, cc); return; } sa = SIZE(a); sb = SIZE(b); c = *cc; a_alias = (a == c); b_alias = (b == c); sc = sa; if (MustAlloc(c, sc)) { _ntl_gsetlength(&c, sc); if (a_alias) a = c; if (b_alias) b = c; *cc = c; } adata = DATA(a); bdata = DATA(b); cdata = DATA(c); mpn_sub(cdata, adata, sa, bdata, sb); STRIP(sc, cdata); SIZE(c) = sc; } void _ntl_gmul(_ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint *cc) { static _ntl_gbigint mem = 0; long sa, aneg, sb, bneg, alias, sc; mp_limb_t *adata, *bdata, *cdata, msl; _ntl_gbigint c; if (ZEROP(a) || ZEROP(b)) { _ntl_gzero(cc); return; } GET_SIZE_NEG(sa, aneg, a); GET_SIZE_NEG(sb, bneg, b); if (a == *cc || b == *cc) { c = mem; alias = 1; } else { c = *cc; alias = 0; } sc = sa + sb; if (MustAlloc(c, sc)) _ntl_gsetlength(&c, sc); if (alias) mem = c; else *cc = c; adata = DATA(a); bdata = DATA(b); cdata = DATA(c); if (sa >= sb) msl = mpn_mul(cdata, adata, sa, bdata, sb); else msl = mpn_mul(cdata, bdata, sb, adata, sa); if (!msl) sc--; if (aneg != bneg) sc = -sc; SIZE(c) = sc; if (alias) _ntl_gcopy(mem, cc); } void _ntl_gsq(_ntl_gbigint a, _ntl_gbigint *cc) { _ntl_gmul(a, a, cc); /* this is good enough...eventually, mpn_sqr_n will be called */ } /* * DIRT: this implementation of _ntl_gsmul relies crucially * on the assumption that the number of bits per limb_t is at least * equal to the number of bits per long. */ void _ntl_gsmul(_ntl_gbigint a, long d, _ntl_gbigint *bb) { long sa, sb; long anegative, bnegative; _ntl_gbigint b; mp_limb_t *adata, *bdata; mp_limb_t dd, carry; long a_alias; if (ZEROP(a) || !d) { _ntl_gzero(bb); return; } GET_SIZE_NEG(sa, anegative, a); if (d < 0) { dd = - ((mp_limb_t) d); /* careful ! */ bnegative = 1-anegative; } else { dd = (mp_limb_t) d; bnegative = anegative; } sb = sa + 1; b = *bb; a_alias = (a == b); if (MustAlloc(b, sb)) { _ntl_gsetlength(&b, sb); if (a_alias) a = b; *bb = b; } adata = DATA(a); bdata = DATA(b); if (dd == 2) carry = mpn_lshift(bdata, adata, sa, 1); else carry = mpn_mul_1(bdata, adata, sa, dd); if (carry) bdata[sa] = carry; else sb--; if (bnegative) sb = -sb; SIZE(b) = sb; } /* * DIRT: this implementation of _ntl_gsdiv relies crucially * on the assumption that the number of bits per limb_t is at least * equal to the number of bits per long. */ long _ntl_gsdiv(_ntl_gbigint a, long d, _ntl_gbigint *bb) { long sa, aneg, sb, dneg; _ntl_gbigint b; mp_limb_t dd, *adata, *bdata; long r; if (!d) { ghalt("division by zero in _ntl_gsdiv"); } if (ZEROP(a)) { _ntl_gzero(bb); return (0); } GET_SIZE_NEG(sa, aneg, a); if (d < 0) { dd = - ((mp_limb_t) d); /* careful ! */ dneg = 1; } else { dd = (mp_limb_t) d; dneg = 0; } sb = sa; b = *bb; if (MustAlloc(b, sb)) { /* if b aliases a, then b won't move */ _ntl_gsetlength(&b, sb); *bb = b; } adata = DATA(a); bdata = DATA(b); if (dd == 2) r = mpn_rshift(bdata, adata, sa, 1) >> (NTL_ZZ_NBITS - 1); else r = mpn_divmod_1(bdata, adata, sa, dd); if (bdata[sb-1] == 0) sb--; SIZE(b) = sb; if (aneg || dneg) { if (aneg != dneg) { if (!r) { SIZE(b) = -SIZE(b); } else { _ntl_gsadd(b, 1, &b); SIZE(b) = -SIZE(b); if (dneg) r = r + d; else r = d - r; *bb = b; } } else r = -r; } return r; } /* * DIRT: this implementation of _ntl_gsmod relies crucially * on the assumption that the number of bits per limb_t is at least * equal to the number of bits per long. */ long _ntl_gsmod(_ntl_gbigint a, long d) { long sa, aneg, dneg; mp_limb_t dd, *adata; long r; if (!d) { ghalt("division by zero in _ntl_gsmod"); } if (ZEROP(a)) { return (0); } GET_SIZE_NEG(sa, aneg, a); if (d < 0) { dd = - ((mp_limb_t) d); /* careful ! */ dneg = 1; } else { dd = (mp_limb_t) d; dneg = 0; } adata = DATA(a); if (dd == 2) r = adata[0] & 1; else r = mpn_mod_1(adata, sa, dd); if (aneg || dneg) { if (aneg != dneg) { if (r) { if (dneg) r = r + d; else r = d - r; } } else r = -r; } return r; } void _ntl_gdiv(_ntl_gbigint a, _ntl_gbigint d, _ntl_gbigint *bb, _ntl_gbigint *rr) { static _ntl_gbigint b = 0, rmem = 0; _ntl_gbigint r; long sa, aneg, sb, sd, dneg, sr, in_place; mp_limb_t *adata, *ddata, *bdata, *rdata; if (ZEROP(d)) { ghalt("division by zero in _ntl_gdiv"); } if (ZEROP(a)) { if (bb) _ntl_gzero(bb); if (rr) _ntl_gzero(rr); return; } GET_SIZE_NEG(sa, aneg, a); GET_SIZE_NEG(sd, dneg, d); if (!aneg && !dneg && rr && *rr != a && *rr != d) { in_place = 1; r = *rr; } else { in_place = 0; r = rmem; } if (sa < sd) { _ntl_gzero(&b); _ntl_gcopy(a, &r); if (aneg) SIZE(r) = -SIZE(r); goto done; } sb = sa-sd+1; if (MustAlloc(b, sb)) _ntl_gsetlength(&b, sb); sr = sd; if (MustAlloc(r, sr)) _ntl_gsetlength(&r, sr); adata = DATA(a); ddata = DATA(d); bdata = DATA(b); rdata = DATA(r); mpn_tdiv_qr(bdata, rdata, 0, adata, sa, ddata, sd); if (bdata[sb-1] == 0) sb--; SIZE(b) = sb; STRIP(sr, rdata); SIZE(r) = sr; done: if (aneg || dneg) { if (aneg != dneg) { if (ZEROP(r)) { SIZE(b) = -SIZE(b); } else { if (bb) { _ntl_gsadd(b, 1, &b); SIZE(b) = -SIZE(b); } if (rr) { if (dneg) _ntl_gadd(r, d, &r); else _ntl_gsub(d, r, &r); } } } else SIZE(r) = -SIZE(r); } if (bb) _ntl_gcopy(b, bb); if (in_place) *rr = r; else { if (rr) _ntl_gcopy(r, rr); rmem = r; } } /* a simplified mod operation: assumes a >= 0, d > 0 are non-negative, * that space for the result has already been allocated, * and that inputs do not alias output. */ static void gmod_simple(_ntl_gbigint a, _ntl_gbigint d, _ntl_gbigint *rr) { static _ntl_gbigint b = 0; long sa, sb, sd, sr; mp_limb_t *adata, *ddata, *bdata, *rdata; _ntl_gbigint r; if (ZEROP(a)) { _ntl_gzero(rr); return; } sa = SIZE(a); sd = SIZE(d); if (sa < sd) { _ntl_gcopy(a, rr); return; } sb = sa-sd+1; if (MustAlloc(b, sb)) _ntl_gsetlength(&b, sb); sr = sd; r = *rr; adata = DATA(a); ddata = DATA(d); bdata = DATA(b); rdata = DATA(r); mpn_tdiv_qr(bdata, rdata, 0, adata, sa, ddata, sd); STRIP(sr, rdata); SIZE(r) = sr; } void _ntl_gmod(_ntl_gbigint a, _ntl_gbigint d, _ntl_gbigint *rr) { _ntl_gdiv(a, d, 0, rr); } void _ntl_gquickmod(_ntl_gbigint *rr, _ntl_gbigint d) { _ntl_gdiv(*rr, d, 0, rr); } void _ntl_gsqrt(_ntl_gbigint n, _ntl_gbigint *rr) { static _ntl_gbigint r = 0; long sn, sr; mp_limb_t *ndata, *rdata; if (ZEROP(n)) { _ntl_gzero(rr); return; } sn = SIZE(n); if (sn < 0) ghalt("negative argument to _ntl_sqrt"); sr = (sn+1)/2; _ntl_gsetlength(&r, sr); ndata = DATA(n); rdata = DATA(r); mpn_sqrtrem(rdata, 0, ndata, sn); STRIP(sr, rdata); SIZE(r) = sr; _ntl_gcopy(r, rr); } /* * DIRT: this implementation of _ntl_gsqrts relies crucially * on the assumption that the number of bits per limb_t is at least * equal to the number of bits per long. */ long _ntl_gsqrts(long n) { mp_limb_t ndata, rdata; if (n == 0) { return 0; } if (n < 0) ghalt("negative argument to _ntl_sqrts"); ndata = n; mpn_sqrtrem(&rdata, 0, &ndata, 1); return rdata; } void _ntl_ggcd(_ntl_gbigint m1, _ntl_gbigint m2, _ntl_gbigint *r) { static _ntl_gbigint s1 = 0, s2 = 0, res = 0; long k1, k2, k_min, l1, l2, ss1, ss2, sres; _ntl_gcopy(m1, &s1); _ntl_gabs(&s1); _ntl_gcopy(m2, &s2); _ntl_gabs(&s2); if (ZEROP(s1)) { _ntl_gcopy(s2, r); return; } if (ZEROP(s2)) { _ntl_gcopy(s1, r); return; } k1 = _ntl_gmakeodd(&s1); k2 = _ntl_gmakeodd(&s2); if (k1 <= k2) k_min = k1; else k_min = k2; l1 = _ntl_g2log(s1); l2 = _ntl_g2log(s2); ss1 = SIZE(s1); ss2 = SIZE(s2); if (ss1 >= ss2) sres = ss1; else sres = ss2; /* set to max: gmp documentation is unclear on this point */ _ntl_gsetlength(&res, sres); if (l1 >= l2) SIZE(res) = mpn_gcd(DATA(res), DATA(s1), ss1, DATA(s2), ss2); else SIZE(res) = mpn_gcd(DATA(res), DATA(s2), ss2, DATA(s1), ss1); _ntl_glshift(res, k_min, &res); _ntl_gcopy(res, r); } static long gxxeucl( _ntl_gbigint ain, _ntl_gbigint nin, _ntl_gbigint *invv, _ntl_gbigint *uu ) { static _ntl_gbigint a = 0; static _ntl_gbigint n = 0; static _ntl_gbigint q = 0; static _ntl_gbigint w = 0; static _ntl_gbigint x = 0; static _ntl_gbigint y = 0; static _ntl_gbigint z = 0; _ntl_gbigint inv = *invv; _ntl_gbigint u = *uu; long diff; long ilo; long sa; long sn; long temp; long e; long fast; long parity; long gotthem; mp_limb_t *p; long try11; long try12; long try21; long try22; long got11; long got12; long got21; long got22; double hi; double lo; double dt; double fhi, fhi1; double flo, flo1; double num; double den; double dirt; _ntl_gsetlength(&a, (e = (SIZE(ain) > SIZE(nin) ? SIZE(ain) : SIZE(nin)))); _ntl_gsetlength(&n, e); _ntl_gsetlength(&q, e); _ntl_gsetlength(&w, e); _ntl_gsetlength(&x, e); _ntl_gsetlength(&y, e); _ntl_gsetlength(&z, e); _ntl_gsetlength(&inv, e); *invv = inv; _ntl_gsetlength(&u, e); *uu = u; fhi1 = 1.0 + ((double) 32.0)/NTL_FDOUBLE_PRECISION; flo1 = 1.0 - ((double) 32.0)/NTL_FDOUBLE_PRECISION; fhi = 1.0 + ((double) 8.0)/NTL_FDOUBLE_PRECISION; flo = 1.0 - ((double) 8.0)/NTL_FDOUBLE_PRECISION; _ntl_gcopy(ain, &a); _ntl_gcopy(nin, &n); _ntl_gone(&inv); _ntl_gzero(&w); while (SIZE(n) > 0) { gotthem = 0; sa = SIZE(a); sn = SIZE(n); diff = sa - sn; if (!diff || diff == 1) { sa = SIZE(a); p = DATA(a) + (sa-1); num = (double) (*p) * NTL_ZZ_FRADIX; if (sa > 1) num += (*(--p)); num *= NTL_ZZ_FRADIX; if (sa > 2) num += (*(p - 1)); sn = SIZE(n); p = DATA(n) + (sn-1); den = (double) (*p) * NTL_ZZ_FRADIX; if (sn > 1) den += (*(--p)); den *= NTL_ZZ_FRADIX; if (sn > 2) den += (*(p - 1)); hi = fhi1 * (num + 1.0) / den; lo = flo1 * num / (den + 1.0); if (diff > 0) { hi *= NTL_ZZ_FRADIX; lo *= NTL_ZZ_FRADIX; } try11 = 1; try12 = 0; try21 = 0; try22 = 1; parity = 1; fast = 1; while (fast > 0) { parity = 1 - parity; if (hi >= NTL_SP_BOUND) fast = 0; else { ilo = (long)lo; dirt = hi - ilo; if (dirt < 1.0/NTL_FDOUBLE_PRECISION || !ilo || ilo < (long)hi) fast = 0; else { dt = lo-ilo; lo = flo / dirt; if (dt > 1.0/NTL_FDOUBLE_PRECISION) hi = fhi / dt; else hi = NTL_SP_BOUND; temp = try11; try11 = try21; if ((NTL_WSP_BOUND - temp) / ilo < try21) fast = 0; else try21 = temp + ilo * try21; temp = try12; try12 = try22; if ((NTL_WSP_BOUND - temp) / ilo < try22) fast = 0; else try22 = temp + ilo * try22; if ((fast > 0) && (parity > 0)) { gotthem = 1; got11 = try11; got12 = try12; got21 = try21; got22 = try22; } } } } } if (gotthem) { _ntl_gsmul(inv, got11, &x); _ntl_gsmul(w, got12, &y); _ntl_gsmul(inv, got21, &z); _ntl_gsmul(w, got22, &w); _ntl_gadd(x, y, &inv); _ntl_gadd(z, w, &w); _ntl_gsmul(a, got11, &x); _ntl_gsmul(n, got12, &y); _ntl_gsmul(a, got21, &z); _ntl_gsmul(n, got22, &n); _ntl_gsub(x, y, &a); _ntl_gsub(n, z, &n); } else { _ntl_gdiv(a, n, &q, &a); _ntl_gmul(q, w, &x); _ntl_gadd(inv, x, &inv); if (!ZEROP(a)) { _ntl_gdiv(n, a, &q, &n); _ntl_gmul(q, inv, &x); _ntl_gadd(w, x, &w); } else { _ntl_gcopy(n, &a); _ntl_gzero(&n); _ntl_gcopy(w, &inv); _ntl_gnegate(&inv); } } } if (_ntl_gscompare(a, 1) == 0) e = 0; else e = 1; _ntl_gcopy(a, &u); *invv = inv; *uu = u; return (e); } #if 0 void _ntl_gexteucl( _ntl_gbigint aa, _ntl_gbigint *xa, _ntl_gbigint bb, _ntl_gbigint *xb, _ntl_gbigint *d ) { static _ntl_gbigint modcon = 0; static _ntl_gbigint a=0, b=0; long anegative = 0; long bnegative = 0; _ntl_gcopy(aa, &a); _ntl_gcopy(bb, &b); if (a && SIZE(a) < 0) { anegative = 1; SIZE(a) = -SIZE(a); } else anegative = 0; if (b && SIZE(b) < 0) { bnegative = 1; SIZE(b) = -SIZE(b); } else bnegative = 0; if (ZEROP(b)) { _ntl_gone(xa); _ntl_gzero(xb); _ntl_gcopy(a, d); goto done; } if (ZEROP(a)) { _ntl_gzero(xa); _ntl_gone(xb); _ntl_gcopy(b, d); goto done; } gxxeucl(a, b, xa, d); _ntl_gmul(a, *xa, xb); _ntl_gsub(*d, *xb, xb); _ntl_gdiv(*xb, b, xb, &modcon); if (!ZEROP(modcon)) { ghalt("non-zero remainder in _ntl_gexteucl BUG"); } done: if (anegative) { _ntl_gnegate(xa); } if (bnegative) { _ntl_gnegate(xb); } } #endif void _ntl_gexteucl( _ntl_gbigint ain, _ntl_gbigint *xap, _ntl_gbigint bin, _ntl_gbigint *xbp, _ntl_gbigint *dp ) { if (ZEROP(bin)) { long asign = _ntl_gsign(ain); _ntl_gcopy(ain, dp); _ntl_gabs(dp); _ntl_gintoz( (asign >= 0 ? 1 : -1), xap); _ntl_gzero(xbp); } else if (ZEROP(ain)) { long bsign = _ntl_gsign(bin); _ntl_gcopy(bin, dp); _ntl_gabs(dp); _ntl_gzero(xap); _ntl_gintoz(bsign, xbp); } else { static _ntl_gbigint a = 0, b = 0, xa = 0, xb = 0, d = 0, tmp = 0; long sa, aneg, sb, bneg, rev; mp_limb_t *adata, *bdata, *ddata, *xadata; mp_size_t sxa, sd; GET_SIZE_NEG(sa, aneg, ain); GET_SIZE_NEG(sb, bneg, bin); _ntl_gsetlength(&a, sa+1); /* +1 because mpn_gcdext may need it */ _ntl_gcopy(ain, &a); _ntl_gsetlength(&b, sb+1); /* +1 because mpn_gcdext may need it */ _ntl_gcopy(bin, &b); adata = DATA(a); bdata = DATA(b); if (sa < sb || (sa == sb && mpn_cmp(adata, bdata, sa) < 0)) { SWAP_BIGINT(ain, bin); SWAP_LONG(sa, sb); SWAP_LONG(aneg, bneg); SWAP_LIMB_PTR(adata, bdata); rev = 1; } else rev = 0; _ntl_gsetlength(&d, sa+1); /* +1 because mpn_gcdext may need it... documentation is unclear, but this is what is done in mpz_gcdext */ _ntl_gsetlength(&xa, sa+1); /* ditto */ ddata = DATA(d); xadata = DATA(xa); sd = mpn_gcdext(ddata, xadata, &sxa, adata, sa, bdata, sb); SIZE(d) = sd; SIZE(xa) = sxa; if (aneg) _ntl_gnegate(&xa); _ntl_gmul(ain, xa, &tmp); _ntl_gsub(d, tmp, &tmp); _ntl_gdiv(tmp, bin, &xb, &tmp); if (!ZEROP(tmp)) ghalt("internal bug in _ntl_gexteucl"); if (rev) SWAP_BIGINT(xa, xb); _ntl_gcopy(xa, xap); _ntl_gcopy(xb, xbp); _ntl_gcopy(d, dp); } } long _ntl_ginv(_ntl_gbigint ain, _ntl_gbigint nin, _ntl_gbigint *invv) { static _ntl_gbigint u = 0; static _ntl_gbigint d = 0; static _ntl_gbigint a = 0; static _ntl_gbigint n = 0; long sz; long sd; mp_size_t su; if (_ntl_gscompare(nin, 1) <= 0) { ghalt("InvMod: second input <= 1"); } if (_ntl_gsign(ain) < 0) { ghalt("InvMod: first input negative"); } if (_ntl_gcompare(ain, nin) >= 0) { ghalt("InvMod: first input too big"); } sz = SIZE(nin) + 2; if (MustAlloc(a, sz)) _ntl_gsetlength(&a, sz); if (MustAlloc(n, sz)) _ntl_gsetlength(&n, sz); if (MustAlloc(d, sz)) _ntl_gsetlength(&d, sz); if (MustAlloc(u, sz)) _ntl_gsetlength(&u, sz); _ntl_gadd(ain, nin, &a); _ntl_gcopy(nin, &n); /* We apply mpn_gcdext to (a, n) = (ain+nin, nin), because that function * only computes the co-factor of the larger input. This way, we avoid * a multiplication and a division. */ sd = mpn_gcdext(DATA(d), DATA(u), &su, DATA(a), SIZE(a), DATA(n), SIZE(n)); SIZE(d) = sd; SIZE(u) = su; if (ONEP(d)) { /* * We make sure that u is in range 0..n-1, just in case * GMP is sloppy. */ while (_ntl_gsign(u) < 0) _ntl_gadd(u, nin, &u); while (_ntl_gcompare(u, nin) >= 0) _ntl_gsub(u, nin, &u); _ntl_gcopy(u, invv); return 0; } else { _ntl_gcopy(d, invv); return 1; } } void _ntl_ginvmod( _ntl_gbigint a, _ntl_gbigint n, _ntl_gbigint *c ) { if (_ntl_ginv(a, n, c)) ghalt("undefined inverse in _ntl_ginvmod"); } void _ntl_gaddmod( _ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint n, _ntl_gbigint *c ) { if (*c != n) { _ntl_gadd(a, b, c); if (_ntl_gcompare(*c, n) >= 0) _ntl_gsubpos(*c, n, c); } else { static _ntl_gbigint mem = 0; _ntl_gadd(a, b, &mem); if (_ntl_gcompare(mem, n) >= 0) _ntl_gsubpos(mem, n, c); else _ntl_gcopy(mem, c); } } void _ntl_gsubmod( _ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint n, _ntl_gbigint *c ) { static _ntl_gbigint mem = 0; long cmp; if ((cmp=_ntl_gcompare(a, b)) < 0) { _ntl_gadd(n, a, &mem); _ntl_gsubpos(mem, b, c); } else if (!cmp) _ntl_gzero(c); else _ntl_gsubpos(a, b, c); } void _ntl_gsmulmod( _ntl_gbigint a, long d, _ntl_gbigint n, _ntl_gbigint *c ) { static _ntl_gbigint mem = 0; _ntl_gsmul(a, d, &mem); _ntl_gmod(mem, n, c); } void _ntl_gmulmod( _ntl_gbigint a, _ntl_gbigint b, _ntl_gbigint n, _ntl_gbigint *c ) { static _ntl_gbigint mem = 0; _ntl_gmul(a, b, &mem); _ntl_gmod(mem, n, c); } void _ntl_gsqmod( _ntl_gbigint a, _ntl_gbigint n, _ntl_gbigint *c ) { _ntl_gmulmod(a, a, n, c); } double _ntl_gdoub_aux(_ntl_gbigint n) { double res; mp_limb_t *ndata; long i, sn, nneg; if (!n) return ((double) 0); GET_SIZE_NEG(sn, nneg, n); ndata = DATA(n); res = 0; for (i = sn-1; i >= 0; i--) res = res * NTL_ZZ_FRADIX + ((double) ndata[i]); if (nneg) res = -res; return res; } long _ntl_ground_correction(_ntl_gbigint a, long k, long residual) { long direction; long p; long sgn; long bl; mp_limb_t wh; long i; mp_limb_t *adata; if (SIZE(a) > 0) sgn = 1; else sgn = -1; adata = DATA(a); p = k - 1; bl = (p/NTL_ZZ_NBITS); wh = ((mp_limb_t) 1) << (p - NTL_ZZ_NBITS*bl); if (adata[bl] & wh) { /* bit is 1...we have to see if lower bits are all 0 in order to implement "round to even" */ if (adata[bl] & (wh - ((mp_limb_t) 1))) direction = 1; else { i = bl - 1; while (i >= 0 && adata[i] == 0) i--; if (i >= 0) direction = 1; else direction = 0; } /* use residual to break ties */ if (direction == 0 && residual != 0) { if (residual == sgn) direction = 1; else direction = -1; } if (direction == 0) { /* round to even */ wh = wh << 1; /* * DIRT: if GMP has non-empty "nails", this won't work. */ if (wh == 0) { wh = 1; bl++; } if (adata[bl] & wh) direction = 1; else direction = -1; } } else direction = -1; if (direction == 1) return sgn; return 0; } double _ntl_gdoub(_ntl_gbigint n) { static _ntl_gbigint tmp = 0; long s; long shamt; long correction; double x; s = _ntl_g2log(n); shamt = s - NTL_DOUBLE_PRECISION; if (shamt <= 0) return _ntl_gdoub_aux(n); _ntl_grshift(n, shamt, &tmp); correction = _ntl_ground_correction(n, shamt, 0); if (correction) _ntl_gsadd(tmp, correction, &tmp); x = _ntl_gdoub_aux(tmp); x = _ntl_ldexp(x, shamt); return x; } double _ntl_glog(_ntl_gbigint n) { static _ntl_gbigint tmp = 0; static double log_2; static long init = 0; long s; long shamt; long correction; double x; if (!init) { log_2 = log(2.0); init = 1; } if (_ntl_gsign(n) <= 0) ghalt("log argument <= 0"); s = _ntl_g2log(n); shamt = s - NTL_DOUBLE_PRECISION; if (shamt <= 0) return log(_ntl_gdoub_aux(n)); _ntl_grshift(n, shamt, &tmp); correction = _ntl_ground_correction(n, shamt, 0); if (correction) _ntl_gsadd(tmp, correction, &tmp); x = _ntl_gdoub_aux(tmp); return log(x) + shamt*log_2; } /* To implement _ntl_gdoubtoz, I've implemented essentially the * same algorithm as in LIP, processing in blocks of * NTL_SP_NBITS bits, rather than NTL_ZZ_NBITS. * This is conversion is rather delicate, and I don't want to * make any new assumptions about the underlying arithmetic. * This implementation should be quite portable. */ void _ntl_gdoubtoz(double a, _ntl_gbigint *xx) { static _ntl_gbigint x = 0; long neg, i, t, sz; a = floor(a); if (!_ntl_IsFinite(&a)) ghalt("_ntl_gdoubtoz: attempt to convert non-finite value"); if (a < 0) { a = -a; neg = 1; } else neg = 0; if (a == 0) { _ntl_gzero(xx); return; } sz = 0; while (a >= 1) { a = a*(1.0/NTL_SP_FBOUND); sz++; } i = 0; _ntl_gzero(&x); while (a != 0) { i++; a = a*NTL_SP_FBOUND; t = (long) a; a = a - t; if (i == 1) { _ntl_gintoz(t, &x); } else { _ntl_glshift(x, NTL_SP_NBITS, &x); _ntl_gsadd(x, t, &x); } } if (i > sz) ghalt("bug in _ntl_gdoubtoz"); _ntl_glshift(x, (sz-i)*NTL_SP_NBITS, xx); if (neg) _ntl_gnegate(xx); } /* I've adapted LIP's extended euclidean algorithm to * do rational reconstruction. -- VJS. */ long _ntl_gxxratrecon( _ntl_gbigint ain, _ntl_gbigint nin, _ntl_gbigint num_bound, _ntl_gbigint den_bound, _ntl_gbigint *num_out, _ntl_gbigint *den_out ) { static _ntl_gbigint a = 0; static _ntl_gbigint n = 0; static _ntl_gbigint q = 0; static _ntl_gbigint w = 0; static _ntl_gbigint x = 0; static _ntl_gbigint y = 0; static _ntl_gbigint z = 0; static _ntl_gbigint inv = 0; static _ntl_gbigint u = 0; static _ntl_gbigint a_bak = 0; static _ntl_gbigint n_bak = 0; static _ntl_gbigint inv_bak = 0; static _ntl_gbigint w_bak = 0; mp_limb_t *p; long diff; long ilo; long sa; long sn; long snum; long sden; long e; long fast; long temp; long parity; long gotthem; long try11; long try12; long try21; long try22; long got11; long got12; long got21; long got22; double hi; double lo; double dt; double fhi, fhi1; double flo, flo1; double num; double den; double dirt; if (_ntl_gsign(num_bound) < 0) ghalt("rational reconstruction: bad numerator bound"); if (!num_bound) snum = 0; else snum = SIZE(num_bound); if (_ntl_gsign(den_bound) <= 0) ghalt("rational reconstruction: bad denominator bound"); sden = SIZE(den_bound); if (_ntl_gsign(nin) <= 0) ghalt("rational reconstruction: bad modulus"); if (_ntl_gsign(ain) < 0 || _ntl_gcompare(ain, nin) >= 0) ghalt("rational reconstruction: bad residue"); e = SIZE(nin); _ntl_gsetlength(&a, e); _ntl_gsetlength(&n, e); _ntl_gsetlength(&q, e); _ntl_gsetlength(&w, e); _ntl_gsetlength(&x, e); _ntl_gsetlength(&y, e); _ntl_gsetlength(&z, e); _ntl_gsetlength(&inv, e); _ntl_gsetlength(&u, e); _ntl_gsetlength(&a_bak, e); _ntl_gsetlength(&n_bak, e); _ntl_gsetlength(&inv_bak, e); _ntl_gsetlength(&w_bak, e); fhi1 = 1.0 + ((double) 32.0)/NTL_FDOUBLE_PRECISION; flo1 = 1.0 - ((double) 32.0)/NTL_FDOUBLE_PRECISION; fhi = 1.0 + ((double) 8.0)/NTL_FDOUBLE_PRECISION; flo = 1.0 - ((double) 8.0)/NTL_FDOUBLE_PRECISION; _ntl_gcopy(ain, &a); _ntl_gcopy(nin, &n); _ntl_gone(&inv); _ntl_gzero(&w); while (1) { if (SIZE(w) >= sden && _ntl_gcompare(w, den_bound) > 0) break; if (SIZE(n) <= snum && _ntl_gcompare(n, num_bound) <= 0) break; _ntl_gcopy(a, &a_bak); _ntl_gcopy(n, &n_bak); _ntl_gcopy(w, &w_bak); _ntl_gcopy(inv, &inv_bak); gotthem = 0; sa = SIZE(a); sn = SIZE(n); diff = sa - sn; if (!diff || diff == 1) { sa = SIZE(a); p = DATA(a) + (sa-1); num = (double) (*p) * NTL_ZZ_FRADIX; if (sa > 1) num += (*(--p)); num *= NTL_ZZ_FRADIX; if (sa > 2) num += (*(p - 1)); sn = SIZE(n); p = DATA(n) + (sn-1); den = (double) (*p) * NTL_ZZ_FRADIX; if (sn > 1) den += (*(--p)); den *= NTL_ZZ_FRADIX; if (sn > 2) den += (*(p - 1)); hi = fhi1 * (num + 1.0) / den; lo = flo1 * num / (den + 1.0); if (diff > 0) { hi *= NTL_ZZ_FRADIX; lo *= NTL_ZZ_FRADIX; } try11 = 1; try12 = 0; try21 = 0; try22 = 1; parity = 1; fast = 1; while (fast > 0) { parity = 1 - parity; if (hi >= NTL_SP_BOUND) fast = 0; else { ilo = (long)lo; dirt = hi - ilo; if (dirt < 1.0/NTL_FDOUBLE_PRECISION || !ilo || ilo < (long)hi) fast = 0; else { dt = lo-ilo; lo = flo / dirt; if (dt > 1.0/NTL_FDOUBLE_PRECISION) hi = fhi / dt; else hi = NTL_SP_BOUND; temp = try11; try11 = try21; if ((NTL_WSP_BOUND - temp) / ilo < try21) fast = 0; else try21 = temp + ilo * try21; temp = try12; try12 = try22; if ((NTL_WSP_BOUND - temp) / ilo < try22) fast = 0; else try22 = temp + ilo * try22; if ((fast > 0) && (parity > 0)) { gotthem = 1; got11 = try11; got12 = try12; got21 = try21; got22 = try22; } } } } } if (gotthem) { _ntl_gsmul(inv, got11, &x); _ntl_gsmul(w, got12, &y); _ntl_gsmul(inv, got21, &z); _ntl_gsmul(w, got22, &w); _ntl_gadd(x, y, &inv); _ntl_gadd(z, w, &w); _ntl_gsmul(a, got11, &x); _ntl_gsmul(n, got12, &y); _ntl_gsmul(a, got21, &z); _ntl_gsmul(n, got22, &n); _ntl_gsub(x, y, &a); _ntl_gsub(n, z, &n); } else { _ntl_gdiv(a, n, &q, &a); _ntl_gmul(q, w, &x); _ntl_gadd(inv, x, &inv); if (!ZEROP(a)) { _ntl_gdiv(n, a, &q, &n); _ntl_gmul(q, inv, &x); _ntl_gadd(w, x, &w); } else { break; } } } _ntl_gcopy(a_bak, &a); _ntl_gcopy(n_bak, &n); _ntl_gcopy(w_bak, &w); _ntl_gcopy(inv_bak, &inv); _ntl_gnegate(&w); while (1) { sa = SIZE(w); if (sa < 0) SIZE(w) = -sa; if (SIZE(w) >= sden && _ntl_gcompare(w, den_bound) > 0) return 0; SIZE(w) = sa; if (SIZE(n) <= snum && _ntl_gcompare(n, num_bound) <= 0) break; fast = 0; sa = SIZE(a); sn = SIZE(n); diff = sa - sn; if (!diff || diff == 1) { sa = SIZE(a); p = DATA(a) + (sa-1); num = (double) (*p) * NTL_ZZ_FRADIX; if (sa > 1) num += (*(--p)); num *= NTL_ZZ_FRADIX; if (sa > 2) num += (*(p - 1)); sn = SIZE(n); p = DATA(n) + (sn-1); den = (double) (*p) * NTL_ZZ_FRADIX; if (sn > 1) den += (*(--p)); den *= NTL_ZZ_FRADIX; if (sn > 2) den += (*(p - 1)); hi = fhi1 * (num + 1.0) / den; lo = flo1 * num / (den + 1.0); if (diff > 0) { hi *= NTL_ZZ_FRADIX; lo *= NTL_ZZ_FRADIX; } if (hi < NTL_SP_BOUND) { ilo = (long)lo; if (ilo == (long)hi) fast = 1; } } if (fast) { if (ilo != 0) { if (ilo == 1) { _ntl_gsub(inv, w, &inv); _ntl_gsubpos(a, n, &a); } else { _ntl_gsmul(w, ilo, &x); _ntl_gsub(inv, x, &inv); _ntl_gsmul(n, ilo, &x); _ntl_gsubpos(a, x, &a); } } } else { _ntl_gdiv(a, n, &q, &a); _ntl_gmul(q, w, &x); _ntl_gsub(inv, x, &inv); } _ntl_gswap(&a, &n); _ntl_gswap(&inv, &w); } if (_ntl_gsign(w) < 0) { _ntl_gnegate(&w); _ntl_gnegate(&n); } _ntl_gcopy(n, num_out); _ntl_gcopy(w, den_out); return 1; } void _ntl_gexp( _ntl_gbigint a, long e, _ntl_gbigint *bb ) { long k; long len_a; static _ntl_gbigint res = 0; if (!e) { _ntl_gone(bb); return; } if (e < 0) ghalt("negative exponent in _ntl_gexp"); if (_ntl_giszero(a)) { _ntl_gzero(bb); return; } len_a = _ntl_g2log(a); if (len_a > (NTL_MAX_LONG-(NTL_ZZ_NBITS-1))/e) ghalt("overflow in _ntl_gexp"); _ntl_gsetlength(&res, (len_a*e+NTL_ZZ_NBITS-1)/NTL_ZZ_NBITS); _ntl_gcopy(a, &res); k = 1; while ((k << 1) <= e) k <<= 1; while (k >>= 1) { _ntl_gsq(res, &res); if (e & k) _ntl_gmul(a, res, &res); } _ntl_gcopy(res, bb); } void _ntl_gexps( long a, long e, _ntl_gbigint *bb ) { long k; long len_a; static _ntl_gbigint res = 0; if (!e) { _ntl_gone(bb); return; } if (e < 0) ghalt("negative exponent in _ntl_zexps"); if (!a) { _ntl_gzero(bb); return; } len_a = _ntl_g2logs(a); if (len_a > (NTL_MAX_LONG-(NTL_ZZ_NBITS-1))/e) ghalt("overflow in _ntl_gexps"); _ntl_gsetlength(&res, (len_a*e+NTL_ZZ_NBITS-1)/NTL_ZZ_NBITS); _ntl_gintoz(a, &res); k = 1; while ((k << 1) <= e) k <<= 1; while (k >>= 1) { _ntl_gsq(res, &res); if (e & k) _ntl_gsmul(res, a, &res); } _ntl_gcopy(res, bb); } static long OptWinSize(long n) /* finds k that minimizes n/(k+1) + 2^{k-1} */ { long k; double v, v_new; v = n/2.0 + 1.0; k = 1; for (;;) { v_new = n/((double)(k+2)) + ((double)(1L << k)); if (v_new >= v) break; v = v_new; k++; } return k; } /* DIRT: will not work with non-empty "nails" */ static mp_limb_t neg_inv_mod_limb(mp_limb_t m0) { mp_limb_t x; long k; x = 1; k = 1; while (k < NTL_ZZ_NBITS) { x += x * (1 - x * m0); k <<= 1; } return - x; } /* Montgomery reduction: * This computes res = T/b^m mod N, where b = 2^{NTL_ZZ_NBITS}. * It is assumed that N has n limbs, and that T has at most n + m limbs. * Also, inv should be set to -N^{-1} mod b. * Finally, it is assumed that T has space allocated for n + m limbs, * and that res has space allocated for n limbs. * Note: res should not overlap any inputs, and T is destroyed. * Note: res will have at most n limbs, but may not be fully reduced * mod N. In general, we will have res < T/b^m + N. */ /* DIRT: this routine may not work with non-empty "nails" */ static void redc(_ntl_gbigint T, _ntl_gbigint N, long m, mp_limb_t inv, _ntl_gbigint res) { long n, sT, i; mp_limb_t *Ndata, *Tdata, *resdata, q, d, t, c; n = SIZE(N); Ndata = DATA(N); sT = SIZE(T); Tdata = DATA(T); resdata = DATA(res); for (i = sT; i < m+n; i++) Tdata[i] = 0; c = 0; for (i = 0; i < m; i++) { q = Tdata[i]*inv; d = mpn_addmul_1(Tdata+i, Ndata, n, q); t = Tdata[i+n] + d; Tdata[i+n] = t + c; if (t < d || (c == 1 && t + c == 0)) c = 1; else c = 0; } if (c) { mpn_sub_n(resdata, Tdata + m, Ndata, n); } else { for (i = 0; i < n; i++) resdata[i] = Tdata[m + i]; } i = n; STRIP(i, resdata); SIZE(res) = i; SIZE(T) = 0; } #define REDC_CROSS (32) void _ntl_gpowermod(_ntl_gbigint g, _ntl_gbigint e, _ntl_gbigint F, _ntl_gbigint *h) /* h = g^e mod f using "sliding window" algorithm remark: the notation (h, g, e, F) is strange, because I copied the code from BB.c. */ { _ntl_gbigint res, gg, *v, t; long n, i, k, val, cnt, m; long use_redc, sF; mp_limb_t inv; if (_ntl_gsign(g) < 0 || _ntl_gcompare(g, F) >= 0 || _ntl_gscompare(F, 1) <= 0) ghalt("PowerMod: bad args"); if (_ntl_gscompare(e, 0) == 0) { _ntl_gone(h); return; } if (_ntl_gscompare(e, 1) == 0) { _ntl_gcopy(g, h); return; } if (_ntl_gscompare(e, -1) == 0) { _ntl_ginvmod(g, F, h); return; } if (_ntl_gscompare(e, 2) == 0) { _ntl_gsqmod(g, F, h); return; } if (_ntl_gscompare(e, -2) == 0) { res = 0; _ntl_gsqmod(g, F, &res); _ntl_ginvmod(res, F, h); _ntl_gfree(&res); return; } n = _ntl_g2log(e); sF = SIZE(F); res = 0; _ntl_gsetlength(&res, sF*2); t = 0; _ntl_gsetlength(&t, sF*2); use_redc = (DATA(F)[0] & 1) && sF < REDC_CROSS; gg = 0; if (use_redc) { _ntl_glshift(g, sF*NTL_ZZ_NBITS, &res); _ntl_gmod(res, F, &gg); inv = neg_inv_mod_limb(DATA(F)[0]); } else _ntl_gcopy(g, &gg); if (_ntl_gscompare(g, 2) == 0) { /* plain square-and-multiply algorithm, optimized for g == 2 */ _ntl_gbigint F1 = 0; if (use_redc) { long shamt; COUNT_BITS(shamt, DATA(F)[sF-1]); shamt = NTL_ZZ_NBITS - shamt; _ntl_glshift(F, shamt, &F1); } _ntl_gcopy(gg, &res); for (i = n - 2; i >= 0; i--) { _ntl_gsq(res, &t); if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); if (_ntl_gbit(e, i)) { _ntl_gadd(res, res, &res); if (use_redc) { while (SIZE(res) > sF) { _ntl_gsubpos(res, F1, &res); } } else { if (_ntl_gcompare(res, F) >= 0) _ntl_gsubpos(res, F, &res); } } } if (use_redc) { _ntl_gcopy(res, &t); redc(t, F, sF, inv, res); if (_ntl_gcompare(res, F) >= 0) { _ntl_gsub(res, F, &res); } } if (_ntl_gsign(e) < 0) _ntl_ginvmod(res, F, &res); _ntl_gcopy(res, h); _ntl_gfree(&res); _ntl_gfree(&gg); _ntl_gfree(&t); _ntl_gfree(&F1); return; } if (n < 16) { /* plain square-and-multiply algorithm */ _ntl_gcopy(gg, &res); for (i = n - 2; i >= 0; i--) { _ntl_gsq(res, &t); if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); if (_ntl_gbit(e, i)) { _ntl_gmul(res, gg, &t); if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); } } if (use_redc) { _ntl_gcopy(res, &t); redc(t, F, sF, inv, res); if (_ntl_gcompare(res, F) >= 0) { _ntl_gsub(res, F, &res); } } if (_ntl_gsign(e) < 0) _ntl_ginvmod(res, F, &res); _ntl_gcopy(res, h); _ntl_gfree(&res); _ntl_gfree(&gg); _ntl_gfree(&t); return; } k = OptWinSize(n); if (k > 5) k = 5; v = (_ntl_gbigint *) NTL_MALLOC((1L << (k-1)), sizeof(_ntl_gbigint), 0); if (!v) ghalt("out of memory"); for (i = 0; i < (1L << (k-1)); i++) { v[i] = 0; _ntl_gsetlength(&v[i], sF); } _ntl_gcopy(gg, &v[0]); if (k > 1) { _ntl_gsq(gg, &t); if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); for (i = 1; i < (1L << (k-1)); i++) { _ntl_gmul(v[i-1], res, &t); if (use_redc) redc(t, F, sF, inv, v[i]); else _ntl_gmod(t, F, &v[i]); } } _ntl_gcopy(gg, &res); val = 0; for (i = n-2; i >= 0; i--) { val = (val << 1) | _ntl_gbit(e, i); if (val == 0) { _ntl_gsq(res, &t); if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); } else if (val >= (1L << (k-1)) || i == 0) { cnt = 0; while ((val & 1) == 0) { val = val >> 1; cnt++; } m = val; while (m > 0) { _ntl_gsq(res, &t); if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); m = m >> 1; } _ntl_gmul(res, v[val >> 1], &t); if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); while (cnt > 0) { _ntl_gsq(res, &t); if (use_redc) redc(t, F, sF, inv, res); else _ntl_gmod(t, F, &res); cnt--; } val = 0; } } if (use_redc) { _ntl_gcopy(res, &t); redc(t, F, sF, inv, res); if (_ntl_gcompare(res, F) >= 0) { _ntl_gsub(res, F, &res); } } if (_ntl_gsign(e) < 0) _ntl_ginvmod(res, F, &res); _ntl_gcopy(res, h); _ntl_gfree(&res); _ntl_gfree(&gg); _ntl_gfree(&t); for (i = 0; i < (1L << (k-1)); i++) _ntl_gfree(&v[i]); free(v); } long _ntl_gsize(_ntl_gbigint rep) { if (!rep) return 0; else if (SIZE(rep) < 0) return -SIZE(rep); else return SIZE(rep); } long _ntl_gisone(_ntl_gbigint rep) { return rep != 0 && SIZE(rep) == 1 && DATA(rep)[0] == 1; } long _ntl_gsptest(_ntl_gbigint rep) { return !rep || SIZE(rep) == 0 || ((SIZE(rep) == 1 || SIZE(rep) == -1) && DATA(rep)[0] < ((mp_limb_t) NTL_SP_BOUND)); } long _ntl_gwsptest(_ntl_gbigint rep) { return !rep || SIZE(rep) == 0 || ((SIZE(rep) == 1 || SIZE(rep) == -1) && DATA(rep)[0] < ((mp_limb_t) NTL_WSP_BOUND)); } long _ntl_gcrtinrange(_ntl_gbigint g, _ntl_gbigint a) { long sa, sg, i; mp_limb_t carry, u, v; mp_limb_t *adata, *gdata; if (!a || SIZE(a) <= 0) return 0; sa = SIZE(a); if (!g) return 1; sg = SIZE(g); if (sg == 0) return 1; if (sg < 0) sg = -sg; if (sa-sg > 1) return 1; if (sa-sg < 0) return 0; adata = DATA(a); gdata = DATA(g); carry=0; if (sa-sg == 1) { if (adata[sa-1] > ((mp_limb_t) 1)) return 1; carry = 1; } i = sg-1; u = 0; v = 0; while (i >= 0 && u == v) { u = (carry << (NTL_ZZ_NBITS-1)) + (adata[i] >> 1); v = gdata[i]; carry = (adata[i] & 1); i--; } if (u == v) { if (carry) return 1; return (SIZE(g) > 0); } else return (u > v); } /* DIRT: this routine will not work with non-empty "nails" */ void _ntl_gfrombytes(_ntl_gbigint *x, const unsigned char *p, long n) { long BytesPerLimb; long lw, r, i, j; mp_limb_t *xp, t; if (n <= 0) { x = 0; return; } BytesPerLimb = NTL_ZZ_NBITS/8; lw = n/BytesPerLimb; r = n - lw*BytesPerLimb; if (r != 0) lw++; else r = BytesPerLimb; _ntl_gsetlength(x, lw); xp = DATA(*x); for (i = 0; i < lw-1; i++) { t = 0; for (j = 0; j < BytesPerLimb; j++) { t >>= 8; t += (((mp_limb_t)(*p)) & ((mp_limb_t) 255)) << ((BytesPerLimb-1)*8); p++; } xp[i] = t; } t = 0; for (j = 0; j < r; j++) { t >>= 8; t += (((mp_limb_t)(*p)) & ((mp_limb_t) 255)) << ((BytesPerLimb-1)*8); p++; } t >>= (BytesPerLimb-r)*8; xp[lw-1] = t; STRIP(lw, xp); SIZE(*x) = lw; } /* DIRT: this routine will not work with non-empty "nails" */ void _ntl_gbytesfromz(unsigned char *p, _ntl_gbigint a, long n) { long BytesPerLimb; long lbits, lbytes, min_bytes, min_words, r; long i, j; mp_limb_t *ap, t; if (n < 0) n = 0; BytesPerLimb = NTL_ZZ_NBITS/8; lbits = _ntl_g2log(a); lbytes = (lbits+7)/8; min_bytes = (lbytes < n) ? lbytes : n; min_words = min_bytes/BytesPerLimb; r = min_bytes - min_words*BytesPerLimb; if (r != 0) min_words++; else r = BytesPerLimb; if (a) ap = DATA(a); else ap = 0; for (i = 0; i < min_words-1; i++) { t = ap[i]; for (j = 0; j < BytesPerLimb; j++) { *p = t & ((mp_limb_t) 255); t >>= 8; p++; } } if (min_words > 0) { t = ap[min_words-1]; for (j = 0; j < r; j++) { *p = t & ((mp_limb_t) 255); t >>= 8; p++; } } for (j = min_bytes; j < n; j++) { *p = 0; p++; } } long _ntl_gblock_construct_alloc(_ntl_gbigint *x, long d, long n) { long d1, sz, AllocAmt, m, j, alloc; char *p; _ntl_gbigint t; /* check n value */ if (n <= 0) ghalt("block construct: n must be positive"); /* check d value */ if (d <= 0) ghalt("block construct: d must be positive"); if (NTL_OVERFLOW(d, NTL_ZZ_NBITS, NTL_ZZ_NBITS)) ghalt("block construct: d too large"); #ifdef NTL_SMALL_MP_SIZE_T /* this makes sure that numbers don't get too big for GMP */ if (d >= (1L << (NTL_BITS_PER_INT-4))) ghalt("size too big for GMP"); #endif d1 = d + 1; if (STORAGE_OVF(d1)) ghalt("block construct: d too large"); sz = STORAGE(d1); AllocAmt = NTL_MAX_ALLOC_BLOCK/sz; if (AllocAmt == 0) AllocAmt = 1; if (AllocAmt < n) m = AllocAmt; else m = n; p = (char *) NTL_MALLOC(m, sz, 0); if (!p) ghalt("out of memory in _ntl_gblock_construct"); *x = (_ntl_gbigint) p; for (j = 0; j < m; j++) { t = (_ntl_gbigint) p; alloc = (d1 << 2) | 1; if (j < m-1) alloc |= 2; ALLOC(t) = alloc; SIZE(t) = 0; p += sz; } return m; } void _ntl_gblock_construct_set(_ntl_gbigint x, _ntl_gbigint *y, long i) { long d1, sz; d1 = ALLOC(x) >> 2; sz = STORAGE(d1); *y = (_ntl_gbigint) (((char *) x) + i*sz); } long _ntl_gblock_destroy(_ntl_gbigint x) { long d1, sz, alloc, m; char *p; _ntl_gbigint t; d1 = ALLOC(x) >> 2; sz = STORAGE(d1); p = (char *) x; m = 1; for (;;) { t = (_ntl_gbigint) p; alloc = ALLOC(t); if ((alloc & 1) == 0) ghalt("corrupted memory detected in _ntl_gblock_destroy"); if ((alloc & 2) == 0) break; m++; p += sz; } free(x); return m; } long _ntl_gblock_storage(long d) { long d1, sz; d1 = d + 1; sz = STORAGE(d1) + sizeof(_ntl_gbigint); return sz; } /* * This is a completely portable MulMod routine. */ #define SP_MUL_MOD(r, a, b, n) \ { \ long l__a = (a); \ long l__b = (b); \ long l__n = (n); \ long l__q; \ unsigned long l__res; \ \ l__q = (long) ((((double) l__a) * ((double) l__b)) / ((double) l__n)); \ l__res = ((unsigned long) l__a)*((unsigned long) l__b) - \ ((unsigned long) l__q)*((unsigned long) l__n); \ if (l__res >> (NTL_BITS_PER_LONG-1)) \ l__res += l__n; \ else if (((long) l__res) >= l__n) \ l__res -= l__n; \ \ r = (long) l__res; \ } #if (NTL_ARITH_RIGHT_SHIFT && defined(NTL_AVOID_BRANCHING) && !defined(NTL_CLEAN_INT)) #define SP_MUL_MOD2(res, a, b, n, bninv) \ do { \ long _a = (a); \ long _b = (b); \ long _n = (n); \ double _bninv = (bninv); \ long _q, _res; \ \ _q = (long) (((double) _a) * _bninv); \ _res = _a*_b - _q*_n; \ \ _res += (_res >> (NTL_BITS_PER_LONG-1)) & _n; \ _res -= _n; \ _res += (_res >> (NTL_BITS_PER_LONG-1)) & _n; \ \ res = _res; \ } while (0) #else /* * This is a completely portable MulMod routine. */ #define SP_MUL_MOD2(res, a, b, n, bninv) \ do { \ long _a = (a); \ long _b = (b); \ long _n = (n); \ double _bninv = (bninv); \ long _q; \ unsigned long _res; \ \ _q = (long) (((double) _a) * _bninv); \ _res = ((unsigned long) _a)*((unsigned long) _b) - \ ((unsigned long) _q)*((unsigned long) _n); \ \ if (_res >> (NTL_BITS_PER_LONG-1)) \ _res += _n; \ else if (((long) _res) >= _n) \ _res -= _n; \ \ res = (long) _res; \ } while (0) #endif static long SpecialPower(long e, long p) { long a; long x, y; a = (long) ((((mp_limb_t) 1) << (NTL_ZZ_NBITS-2)) % ((mp_limb_t) p)); SP_MUL_MOD(a, a, 2, p); SP_MUL_MOD(a, a, 2, p); x = 1; y = a; while (e) { if (e & 1) SP_MUL_MOD(x, x, y, p); SP_MUL_MOD(y, y, y, p); e = e >> 1; } return x; } static void sp_ext_eucl(long *dd, long *ss, long *tt, long a, long b) { long u, v, u0, v0, u1, v1, u2, v2, q, r; long aneg = 0, bneg = 0; if (a < 0) { if (a < -NTL_MAX_LONG) ghalt("integer overflow"); a = -a; aneg = 1; } if (b < 0) { if (b < -NTL_MAX_LONG) ghalt("integer overflow"); b = -b; bneg = 1; } u1=1; v1=0; u2=0; v2=1; u = a; v = b; while (v != 0) { q = u / v; r = u % v; u = v; v = r; u0 = u2; v0 = v2; u2 = u1 - q*u2; v2 = v1- q*v2; u1 = u0; v1 = v0; } if (aneg) u1 = -u1; if (bneg) v1 = -v1; *dd = u; *ss = u1; *tt = v1; } static long sp_inv_mod(long a, long n) { long d, s, t; sp_ext_eucl(&d, &s, &t, a, n); if (d != 1) ghalt("inverse undefined"); if (s < 0) return s + n; else return s; } /* ------ HERE ------ */ struct crt_body_gmp { _ntl_gbigint *v; long sbuf; long n; _ntl_gbigint buf; }; struct crt_body_gmp1 { long n; long levels; long *primes; long *inv_vec; long *val_vec; long *index_vec; _ntl_gbigint *prod_vec; _ntl_gbigint *rem_vec; _ntl_gbigint *coeff_vec; _ntl_gbigint temps[2]; _ntl_gbigint modulus; }; struct crt_body { long strategy; union { struct crt_body_gmp G; struct crt_body_gmp1 G1; } U; }; void _ntl_gcrt_struct_init(void **crt_struct, long n, _ntl_gbigint p, const long *primes) { struct crt_body *c; c = (struct crt_body *) NTL_MALLOC(1, sizeof(struct crt_body), 0); if (!c) ghalt("out of memory"); if (n >= 600) { struct crt_body_gmp1 *C = &c->U.G1; long *q; long i, j; long levels, vec_len; long *val_vec, *inv_vec; long *index_vec; _ntl_gbigint *prod_vec, *rem_vec, *coeff_vec; _ntl_gbigint *temps; C->modulus = 0; _ntl_gcopy(p, &C->modulus); temps = &C->temps[0]; temps[0] = 0; temps[1] = 0; q = (long *) NTL_MALLOC(n, sizeof(long), 0); if (!q) ghalt("out of memory"); val_vec = (long *) NTL_MALLOC(n, sizeof(long), 0); if (!val_vec) ghalt("out of memory"); inv_vec = (long *) NTL_MALLOC(n, sizeof(long), 0); if (!inv_vec) ghalt("out of memory"); for (i = 0; i < n; i++) q[i] = primes[i]; levels = 0; while ((n >> levels) >= 16) levels++; vec_len = (1L << levels) - 1; index_vec = (long *) NTL_MALLOC((vec_len+1), sizeof(long), 0); if (!index_vec) ghalt("out of memory"); prod_vec = (_ntl_gbigint *) NTL_MALLOC(vec_len, sizeof(_ntl_gbigint), 0); if (!prod_vec) ghalt("out of memory"); rem_vec = (_ntl_gbigint *) NTL_MALLOC(vec_len, sizeof(_ntl_gbigint), 0); if (!rem_vec) ghalt("out of memory"); coeff_vec = (_ntl_gbigint *) NTL_MALLOC(n, sizeof(_ntl_gbigint), 0); if (!coeff_vec) ghalt("out of memory"); for (i = 0; i < vec_len; i++) prod_vec[i] = 0; for (i = 0; i < vec_len; i++) rem_vec[i] = 0; for (i = 0; i < n; i++) coeff_vec[i] = 0; index_vec[0] = 0; index_vec[1] = n; for (i = 0; i <= levels-2; i++) { long start = (1L << i) - 1; long finish = (1L << (i+1)) - 2; for (j = finish; j >= start; j--) { index_vec[2*j+2] = index_vec[j] + (index_vec[j+1] - index_vec[j])/2; index_vec[2*j+1] = index_vec[j]; } index_vec[2*finish+3] = n; } for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { /* multiply primes index_vec[i]..index_vec[i+1]-1 into * prod_vec[i] */ _ntl_gone(&prod_vec[i]); for (j = index_vec[i]; j < index_vec[i+1]; j++) _ntl_gsmul(prod_vec[i], q[j], &prod_vec[i]); } for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { for (j = index_vec[i]; j < index_vec[i+1]; j++) _ntl_gsdiv(prod_vec[i], q[j], &coeff_vec[j]); } for (i = (1L << (levels-1)) - 2; i >= 0; i--) _ntl_gmul(prod_vec[2*i+1], prod_vec[2*i+2], &prod_vec[i]); /* the following is asymptotically the bottleneck...but it * it probably doesn't matter. */ for (i = 0; i < n; i++) { long tt; _ntl_gsdiv(prod_vec[0], q[i], &temps[0]); tt = mpn_mod_1(DATA(temps[0]), SIZE(temps[0]), q[i]); inv_vec[i] = sp_inv_mod(tt, q[i]); } c->strategy = 2; C->n = n; C->primes = q; C->val_vec = val_vec; C->inv_vec = inv_vec; C->levels = levels; C->index_vec = index_vec; C->prod_vec = prod_vec; C->rem_vec = rem_vec; C->coeff_vec = coeff_vec; *crt_struct = (void *) c; return; } { struct crt_body_gmp *C = &c->U.G; long i; c->strategy = 1; C->n = n; C->v = (_ntl_gbigint *) NTL_MALLOC(n, sizeof(_ntl_gbigint), 0); if (!C->v) ghalt("out of memory"); for (i = 0; i < n; i++) C->v[i] = 0; C->sbuf = SIZE(p)+2; C->buf = 0; _ntl_gsetlength(&C->buf, C->sbuf); *crt_struct = (void *) c; return; } } void _ntl_gcrt_struct_insert(void *crt_struct, long i, _ntl_gbigint m) { struct crt_body *c = (struct crt_body *) crt_struct; switch (c->strategy) { case 1: { _ntl_gcopy(m, &c->U.G.v[i]); break; } default: ghalt("_ntl_gcrt_struct_insert: inconsistent strategy"); } /* end switch */ } void _ntl_gcrt_struct_free(void *crt_struct) { struct crt_body *c = (struct crt_body *) crt_struct; switch (c->strategy) { case 1: { struct crt_body_gmp *C = &c->U.G; long i, n; n = C->n; for (i = 0; i < n; i++) _ntl_gfree(&C->v[i]); _ntl_gfree(&C->buf); free(C->v); free(c); break; } case 2: { struct crt_body_gmp1 *C = &c->U.G1; long n = C->n; long levels = C->levels; long *primes = C->primes; long *inv_vec = C->inv_vec; long *val_vec = C->val_vec; long *index_vec = C->index_vec; _ntl_gbigint *prod_vec = C->prod_vec; _ntl_gbigint *rem_vec = C->rem_vec; _ntl_gbigint *coeff_vec = C->coeff_vec; _ntl_gbigint *temps = C->temps; _ntl_gbigint modulus = C->modulus; long vec_len = (1L << levels) - 1; long i; for (i = 0; i < vec_len; i++) _ntl_gfree(&prod_vec[i]); for (i = 0; i < vec_len; i++) _ntl_gfree(&rem_vec[i]); for (i = 0; i < n; i++) _ntl_gfree(&coeff_vec[i]); _ntl_gfree(&temps[0]); _ntl_gfree(&temps[1]); _ntl_gfree(&modulus); free(primes); free(inv_vec); free(val_vec); free(index_vec); free(prod_vec); free(rem_vec); free(coeff_vec); free(c); break; } default: ghalt("_ntl_gcrt_struct_free: inconsistent strategy"); } /* end case */ } static void gadd_mul_many(_ntl_gbigint *res, _ntl_gbigint *a, long *b, long n, long sz) { mp_limb_t *xx, *yy; long i, sx; long sy; mp_limb_t carry; sx = sz + 2; if (MustAlloc(*res, sx)) _ntl_gsetlength(res, sx); xx = DATA(*res); for (i = 0; i < sx; i++) xx[i] = 0; for (i = 0; i < n; i++) { if (!a[i]) continue; yy = DATA(a[i]); sy = SIZE(a[i]); if (!sy || !b[i]) continue; carry = mpn_addmul_1(xx, yy, sy, b[i]); yy = xx + sy; *yy += carry; if (*yy < carry) { /* unsigned comparison! */ do { yy++; *yy += 1; } while (*yy == 0); } } while (sx > 0 && xx[sx-1] == 0) sx--; SIZE(*res) = sx; } void _ntl_gcrt_struct_eval(void *crt_struct, _ntl_gbigint *x, const long *b) { struct crt_body *c = (struct crt_body *) crt_struct; switch (c->strategy) { case 1: { struct crt_body_gmp *C = &c->U.G; mp_limb_t *xx, *yy; _ntl_gbigint *a; long i, sx, n; long sy; mp_limb_t carry; n = C->n; sx = C->sbuf; xx = DATA(C->buf); for (i = 0; i < sx; i++) xx[i] = 0; a = C->v; for (i = 0; i < n; i++) { if (!a[i]) continue; yy = DATA(a[i]); sy = SIZE(a[i]); if (!sy || !b[i]) continue; carry = mpn_addmul_1(xx, yy, sy, b[i]); yy = xx + sy; *yy += carry; if (*yy < carry) { /* unsigned comparison! */ do { yy++; *yy += 1; } while (*yy == 0); } } while (sx > 0 && xx[sx-1] == 0) sx--; SIZE(C->buf) = sx; _ntl_gcopy(C->buf, x); break; } case 2: { struct crt_body_gmp1 *C = &c->U.G1; long n = C->n; long levels = C->levels; long *primes = C->primes; long *inv_vec = C->inv_vec; long *val_vec = C->val_vec; long *index_vec = C->index_vec; _ntl_gbigint *prod_vec = C->prod_vec; _ntl_gbigint *rem_vec = C->rem_vec; _ntl_gbigint *coeff_vec = C->coeff_vec; _ntl_gbigint *temps = C->temps; long vec_len = (1L << levels) - 1; long i, j; for (i = 0; i < n; i++) { SP_MUL_MOD(val_vec[i], b[i], inv_vec[i], primes[i]); } for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { long j1 = index_vec[i]; long j2 = index_vec[i+1]; gadd_mul_many(&rem_vec[i], &coeff_vec[j1], &val_vec[j1], j2-j1, SIZE(prod_vec[i])); } for (i = (1L << (levels-1)) - 2; i >= 0; i--) { _ntl_gmul(prod_vec[2*i+1], rem_vec[2*i+2], &temps[0]); _ntl_gmul(rem_vec[2*i+1], prod_vec[2*i+2], &temps[1]); _ntl_gadd(temps[0], temps[1], &rem_vec[i]); } /* temps[0] = rem_vec[0] mod prod_vec[0] (least absolute residue) */ _ntl_gmod(rem_vec[0], prod_vec[0], &temps[0]); _ntl_gsub(temps[0], prod_vec[0], &temps[1]); _ntl_gnegate(&temps[1]); if (_ntl_gcompare(temps[0], temps[1]) > 0) { _ntl_gnegate(&temps[1]); _ntl_gcopy(temps[1], &temps[0]); } _ntl_gmod(temps[0], C->modulus, &temps[1]); _ntl_gcopy(temps[1], x); break; } default: ghalt("_crt_gstruct_eval: inconsistent strategy"); } /* end case */ } long _ntl_gcrt_struct_special(void *crt_struct) { struct crt_body *c = (struct crt_body *) crt_struct; return (c->strategy == 2); } struct rem_body_lip { long n; long *primes; }; struct rem_body_gmp { long n; long levels; long *primes; long *index_vec; _ntl_gbigint *prod_vec; _ntl_gbigint *rem_vec; }; struct rem_body_gmp1 { long n; long levels; long *primes; long *index_vec; long *len_vec; mp_limb_t *inv_vec; long *corr_vec; double *corraux_vec; _ntl_gbigint *prod_vec; _ntl_gbigint *rem_vec; }; struct rem_body { long strategy; union { struct rem_body_lip L; struct rem_body_gmp G; struct rem_body_gmp1 G1; } U; }; void _ntl_grem_struct_init(void **rem_struct, long n, _ntl_gbigint modulus, const long *p) { struct rem_body *r; r = (struct rem_body *) NTL_MALLOC(1, sizeof(struct rem_body), 0); if (!r) ghalt("out of memory"); if (n >= 32 && n <= 256) { struct rem_body_gmp1 *R = &r->U.G1; long *q; long i, j; long levels, vec_len; long *index_vec; long *len_vec, *corr_vec; double *corraux_vec; mp_limb_t *inv_vec; _ntl_gbigint *prod_vec, *rem_vec; q = (long *) NTL_MALLOC(n, sizeof(long), 0); if (!q) ghalt("out of memory"); for (i = 0; i < n; i++) q[i] = p[i]; levels = 0; while ((n >> levels) >= 4) levels++; vec_len = (1L << levels) - 1; index_vec = (long *) NTL_MALLOC((vec_len+1), sizeof(long), 0); if (!index_vec) ghalt("out of memory"); len_vec = (long *) NTL_MALLOC(vec_len, sizeof(long), 0); if (!len_vec) ghalt("out of memory"); inv_vec = (mp_limb_t *) NTL_MALLOC(vec_len, sizeof(mp_limb_t), 0); if (!inv_vec) ghalt("out of memory"); corr_vec = (long *) NTL_MALLOC(n, sizeof(long), 0); if (!corr_vec) ghalt("out of memory"); corraux_vec = (double *) NTL_MALLOC(n, sizeof(double), 0); if (!corraux_vec) ghalt("out of memory"); prod_vec = (_ntl_gbigint *) NTL_MALLOC(vec_len, sizeof(_ntl_gbigint), 0); if (!prod_vec) ghalt("out of memory"); rem_vec = (_ntl_gbigint *) NTL_MALLOC(vec_len, sizeof(_ntl_gbigint), 0); if (!rem_vec) ghalt("out of memory"); for (i = 0; i < vec_len; i++) prod_vec[i] = 0; for (i = 0; i < vec_len; i++) rem_vec[i] = 0; index_vec[0] = 0; index_vec[1] = n; for (i = 0; i <= levels-2; i++) { long start = (1L << i) - 1; long finish = (1L << (i+1)) - 2; for (j = finish; j >= start; j--) { index_vec[2*j+2] = index_vec[j] + (index_vec[j+1] - index_vec[j])/2; index_vec[2*j+1] = index_vec[j]; } index_vec[2*finish+3] = n; } for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { /* multiply primes index_vec[i]..index_vec[i+1]-1 into * prod_vec[i] */ _ntl_gone(&prod_vec[i]); for (j = index_vec[i]; j < index_vec[i+1]; j++) _ntl_gsmul(prod_vec[i], q[j], &prod_vec[i]); } for (i = (1L << (levels-1)) - 2; i >= 3; i--) _ntl_gmul(prod_vec[2*i+1], prod_vec[2*i+2], &prod_vec[i]); for (i = 3; i < vec_len; i++) len_vec[i] = _ntl_gsize(prod_vec[i]); /* Set len_vec[1] = len_vec[2] = * max(_ntl_gsize(modulus), len_vec[3..6]). * This is a bit paranoid, but it makes the code * more robust. */ j = _ntl_gsize(modulus); for (i = 3; i <= 6; i++) if (len_vec[i] > j) j = len_vec[i]; len_vec[1] = len_vec[2] = j; for (i = 3; i < vec_len; i++) inv_vec[i] = neg_inv_mod_limb(DATA(prod_vec[i])[0]); for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { for (j = index_vec[i]; j < index_vec[i+1]; j++) { corr_vec[j] = SpecialPower(len_vec[1] - len_vec[i], q[j]); corraux_vec[j] = ((double) corr_vec[j])/((double) q[j]); } } /* allocate length in advance to streamline eval code */ _ntl_gsetlength(&rem_vec[0], len_vec[1]); /* a special temp */ for (i = 1; i < vec_len; i++) _ntl_gsetlength(&rem_vec[i], len_vec[i]); r->strategy = 2; R->n = n; R->primes = q; R->levels = levels; R->index_vec = index_vec; R->len_vec = len_vec; R->inv_vec = inv_vec; R->corr_vec = corr_vec; R->corraux_vec = corraux_vec; R->prod_vec = prod_vec; R->rem_vec = rem_vec; *rem_struct = (void *) r; } else if (n >= 32) { struct rem_body_gmp *R = &r->U.G; long *q; long i, j; long levels, vec_len; long *index_vec; _ntl_gbigint *prod_vec, *rem_vec; q = (long *) NTL_MALLOC(n, sizeof(long), 0); if (!q) ghalt("out of memory"); for (i = 0; i < n; i++) q[i] = p[i]; levels = 0; while ((n >> levels) >= 4) levels++; vec_len = (1L << levels) - 1; index_vec = (long *) NTL_MALLOC((vec_len+1), sizeof(long), 0); if (!index_vec) ghalt("out of memory"); prod_vec = (_ntl_gbigint *) NTL_MALLOC(vec_len, sizeof(_ntl_gbigint), 0); if (!prod_vec) ghalt("out of memory"); rem_vec = (_ntl_gbigint *) NTL_MALLOC(vec_len, sizeof(_ntl_gbigint), 0); if (!rem_vec) ghalt("out of memory"); for (i = 0; i < vec_len; i++) prod_vec[i] = 0; for (i = 0; i < vec_len; i++) rem_vec[i] = 0; index_vec[0] = 0; index_vec[1] = n; for (i = 0; i <= levels-2; i++) { long start = (1L << i) - 1; long finish = (1L << (i+1)) - 2; for (j = finish; j >= start; j--) { index_vec[2*j+2] = index_vec[j] + (index_vec[j+1] - index_vec[j])/2; index_vec[2*j+1] = index_vec[j]; } index_vec[2*finish+3] = n; } for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { /* multiply primes index_vec[i]..index_vec[i+1]-1 into * prod_vec[i] */ _ntl_gone(&prod_vec[i]); for (j = index_vec[i]; j < index_vec[i+1]; j++) _ntl_gsmul(prod_vec[i], q[j], &prod_vec[i]); } for (i = (1L << (levels-1)) - 2; i >= 3; i--) _ntl_gmul(prod_vec[2*i+1], prod_vec[2*i+2], &prod_vec[i]); /* allocate length in advance to streamline eval code */ _ntl_gsetlength(&rem_vec[1], _ntl_gsize(modulus)); _ntl_gsetlength(&rem_vec[2], _ntl_gsize(modulus)); for (i = 1; i < (1L << (levels-1)) - 1; i++) { _ntl_gsetlength(&rem_vec[2*i+1], _ntl_gsize(prod_vec[2*i+1])); _ntl_gsetlength(&rem_vec[2*i+2], _ntl_gsize(prod_vec[2*i+2])); } r->strategy = 1; R->n = n; R->primes = q; R->levels = levels; R->index_vec = index_vec; R->prod_vec = prod_vec; R->rem_vec = rem_vec; *rem_struct = (void *) r; } else { struct rem_body_lip *R = &r->U.L; long *q; long i; r->strategy = 0; R->n = n; q = (long *) NTL_MALLOC(n, sizeof(long), 0); if (!q) ghalt("out of memory"); R->primes = q; for (i = 0; i < n; i++) q[i] = p[i]; *rem_struct = (void *) r; } } void _ntl_grem_struct_free(void *rem_struct) { struct rem_body *r = (struct rem_body *) rem_struct; switch (r->strategy) { case 0: { free(r->U.L.primes); free(r); break; } case 1: { struct rem_body_gmp *R = &r->U.G; long levels = R->levels; long vec_len = (1L << levels) - 1; long i; for (i = 0; i < vec_len; i++) _ntl_gfree(&R->prod_vec[i]); for (i = 0; i < vec_len; i++) _ntl_gfree(&R->rem_vec[i]); free(R->primes); free(R->index_vec); free(R->prod_vec); free(R->rem_vec); free(r); break; } case 2: { struct rem_body_gmp1 *R = &r->U.G1; long levels = R->levels; long vec_len = (1L << levels) - 1; long i; for (i = 0; i < vec_len; i++) _ntl_gfree(&R->prod_vec[i]); for (i = 0; i < vec_len; i++) _ntl_gfree(&R->rem_vec[i]); free(R->primes); free(R->index_vec); free(R->len_vec); free(R->corr_vec); free(R->inv_vec); free(R->corraux_vec); free(R->prod_vec); free(R->rem_vec); free(r); break; } default: ghalt("_ntl_grem_struct_free: inconsistent strategy"); } /* end switch */ } void _ntl_grem_struct_eval(void *rem_struct, long *x, _ntl_gbigint a) { struct rem_body *r = (struct rem_body *) rem_struct; switch (r->strategy) { case 0: { struct rem_body_lip *R = &r->U.L; long n = R->n; long *q = R->primes; long j; mp_limb_t *adata; long sa; if (!a) sa = 0; else sa = SIZE(a); if (sa == 0) { for (j = 0; j < n; j++) x[j] = 0; break; } adata = DATA(a); for (j = 0; j < n; j++) x[j] = mpn_mod_1(adata, sa, q[j]); break; } case 1: { struct rem_body_gmp *R = &r->U.G; long n = R->n; long levels = R->levels; long *q = R->primes; long *index_vec = R->index_vec; _ntl_gbigint *prod_vec = R->prod_vec; _ntl_gbigint *rem_vec = R->rem_vec; long vec_len = (1L << levels) - 1; long i, j; if (ZEROP(a)) { for (j = 0; j < n; j++) x[j] = 0; break; } _ntl_gcopy(a, &rem_vec[1]); _ntl_gcopy(a, &rem_vec[2]); for (i = 1; i < (1L << (levels-1)) - 1; i++) { gmod_simple(rem_vec[i], prod_vec[2*i+1], &rem_vec[2*i+1]); gmod_simple(rem_vec[i], prod_vec[2*i+2], &rem_vec[2*i+2]); } for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { long lo = index_vec[i]; long hi = index_vec[i+1]; mp_limb_t *s1p = DATA(rem_vec[i]); long s1size = SIZE(rem_vec[i]); if (s1size == 0) { for (j = lo; j U.G1; long n = R->n; long levels = R->levels; long *q = R->primes; long *index_vec = R->index_vec; long *len_vec = R->len_vec; long *corr_vec = R->corr_vec; double *corraux_vec = R->corraux_vec; mp_limb_t *inv_vec = R->inv_vec; _ntl_gbigint *prod_vec = R->prod_vec; _ntl_gbigint *rem_vec = R->rem_vec; long vec_len = (1L << levels) - 1; long i, j; if (ZEROP(a)) { for (j = 0; j < n; j++) x[j] = 0; break; } _ntl_gcopy(a, &rem_vec[1]); _ntl_gcopy(a, &rem_vec[2]); for (i = 1; i < (1L << (levels-1)) - 1; i++) { _ntl_gcopy(rem_vec[i], &rem_vec[0]); redc(rem_vec[0], prod_vec[2*i+1], len_vec[i]-len_vec[2*i+1], inv_vec[2*i+1], rem_vec[2*i+1]); redc(rem_vec[i], prod_vec[2*i+2], len_vec[i]-len_vec[2*i+2], inv_vec[2*i+2], rem_vec[2*i+2]); } for (i = (1L << (levels-1)) - 1; i < vec_len; i++) { long lo = index_vec[i]; long hi = index_vec[i+1]; mp_limb_t *s1p = DATA(rem_vec[i]); long s1size = SIZE(rem_vec[i]); if (s1size == 0) { for (j = lo; j